Index: usr.sbin/bhyve/Makefile =================================================================== --- usr.sbin/bhyve/Makefile +++ usr.sbin/bhyve/Makefile @@ -19,6 +19,7 @@ audio.c \ bhyvegc.c \ bhyverun.c \ + block_backends.c \ block_if.c \ bootrom.c \ console.c \ Index: usr.sbin/bhyve/block_backends.h =================================================================== --- usr.sbin/bhyve/block_backends.h +++ usr.sbin/bhyve/block_backends.h @@ -25,7 +25,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: head/usr.sbin/bhyve/block_if.h 347033 2019-05-02 22:46:37Z jhb $ + * $FreeBSD$ */ /* @@ -35,12 +35,16 @@ * another thread. */ -#ifndef _BLOCK_IF_H_ -#define _BLOCK_IF_H_ +#ifndef _BLOCK_BACKENDS_H_ +#define _BLOCK_BACKENDS_H_ +#include #include #include +/* Opaque type representing a block device backend. */ +typedef struct block_backend block_backend_t; + /* * BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in * a single request. BLOCKIF_RING_MAX is the maxmimum number of @@ -49,6 +53,26 @@ #define BLOCKIF_IOV_MAX 128 /* not practical to be IOV_MAX */ #define BLOCKIF_RING_MAX 128 +#define BLOCKIF_SIG 0xb109b109 + +#define BLOCKIF_NUMTHR 8 +#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) + +enum blockop { + BOP_READ, + BOP_WRITE, + BOP_FLUSH, + BOP_DELETE +}; + +enum blockstat { + BST_FREE, + BST_BLOCK, + BST_PEND, + BST_BUSY, + BST_DONE +}; + struct blockif_req { int br_iovcnt; off_t br_offset; @@ -58,21 +82,78 @@ struct iovec br_iov[BLOCKIF_IOV_MAX]; }; -struct blockif_ctxt; -struct blockif_ctxt *blockif_open(const char *optstr, const char *ident); -off_t blockif_size(struct blockif_ctxt *bc); -void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, - uint8_t *s); -int blockif_sectsz(struct blockif_ctxt *bc); -void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off); -int blockif_queuesz(struct blockif_ctxt *bc); -int blockif_is_ro(struct blockif_ctxt *bc); -int blockif_candelete(struct blockif_ctxt *bc); -int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_close(struct blockif_ctxt *bc); +/* Interface between block device frontends and backends. */ +int blockbe_open(block_backend_t **ret, const char *optstr, + const char *pci_ident); +off_t blockbe_size(block_backend_t *be); +void blockbe_chs(block_backend_t *be, uint16_t *c, uint8_t *h, + uint8_t *s); +int blockbe_sectsz(block_backend_t *be); +void blockbe_psectsz(block_backend_t *be, int *size, int *off); +int blockbe_queuesz(block_backend_t *be); +int blockbe_is_ro(block_backend_t *be); +int blockbe_candelete(block_backend_t *be); +int blockbe_read(block_backend_t *be, struct blockif_req *breq); +int blockbe_write(block_backend_t *be, struct blockif_req *breq); +int blockbe_flush(block_backend_t *be, struct blockif_req *breq); +int blockbe_delete(block_backend_t *be, struct blockif_req *breq); +int blockbe_cancel(block_backend_t *be, struct blockif_req *breq); +int blockbe_close(block_backend_t *be); -#endif /* _BLOCK_IF_H_ */ +/* + * Each block device backend registers a set of function pointers that are + * used to implement the net backends API. + */ +struct block_backend { + const char *prefix; /* prefix matching this backend */ + + /* + * Routines used to initialize and cleanup the resources needed + * by a backend. The cleanup function is used internally, + * and should not be called by the frontend. + */ + void (*init)(void); + + void (*cleanup)(block_backend_t **be); + + int (*open)(block_backend_t **be, const char *optstr, + const char *pci_ident, const char *be_ident); + + off_t (*size)(block_backend_t *be); + + void (*chs)(block_backend_t *be, uint16_t *c, uint8_t *h, + uint8_t *s); + + int (*sectsz)(block_backend_t *be); + + void (*psectsz)(block_backend_t *be, int *size, int *off); + + int (*queuesz)(block_backend_t *be); + + int (*is_ro)(block_backend_t *be); + + int (*candelete)(block_backend_t *be); + + int (*read)(block_backend_t *be, struct blockif_req *breq); + + int (*write)(block_backend_t *be, struct blockif_req *breq); + + int (*flush)(block_backend_t *be, struct blockif_req *breq); + + int (*delete)(block_backend_t *be, struct blockif_req *breq); + + int (*cancel)(block_backend_t *be, struct blockif_req *breq); + + int (*close)(block_backend_t *be); + + struct pci_vtblk_softc *sc; + struct blockif_ctxt *bc; + + /* Size of backend-specific private data. */ + size_t priv_size; + + /* Room for backend-specific data. */ + char opaque[0]; +}; + +#endif /* _BLOCK_BACKENDS_H_ */ Index: usr.sbin/bhyve/block_backends.c =================================================================== --- usr.sbin/bhyve/block_backends.c +++ usr.sbin/bhyve/block_backends.c @@ -58,794 +58,154 @@ #include +#include + #include "bhyverun.h" #include "debug.h" #include "mevent.h" -#include "block_if.h" +#include "block_backends.h" -#define BLOCKIF_SIG 0xb109b109 +SET_DECLARE(block_backend_set, block_backend_t); -#define BLOCKIF_NUMTHR 8 -#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) - -enum blockop { - BOP_READ, - BOP_WRITE, - BOP_FLUSH, - BOP_DELETE -}; - -enum blockstat { - BST_FREE, - BST_BLOCK, - BST_PEND, - BST_BUSY, - BST_DONE -}; - -struct blockif_elem { - TAILQ_ENTRY(blockif_elem) be_link; - struct blockif_req *be_req; - enum blockop be_op; - enum blockstat be_status; - pthread_t be_tid; - off_t be_block; -}; - -struct blockif_ctxt { - int bc_magic; - int bc_fd; - int bc_ischr; - int bc_isgeom; - int bc_candelete; - int bc_rdonly; - off_t bc_size; - int bc_sectsz; - int bc_psectsz; - int bc_psectoff; - int bc_closing; - pthread_t bc_btid[BLOCKIF_NUMTHR]; - pthread_mutex_t bc_mtx; - pthread_cond_t bc_cond; - - /* Request elements and free/pending/busy queues */ - TAILQ_HEAD(, blockif_elem) bc_freeq; - TAILQ_HEAD(, blockif_elem) bc_pendq; - TAILQ_HEAD(, blockif_elem) bc_busyq; - struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; -}; - -static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; - -struct blockif_sig_elem { - pthread_mutex_t bse_mtx; - pthread_cond_t bse_cond; - int bse_pending; - struct blockif_sig_elem *bse_next; -}; - -static struct blockif_sig_elem *blockif_bse_head; - -static int -blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, - enum blockop op) +/* + * Initialize a backend and attach to the frontend. + * This is called during frontend initialization. + * @pbe is a pointer to the backend to be initialized + * @devname is the backend-name as supplied on the command line, + * e.g. -s 2:0,frontend-name,backend-name[,other-args] + * @cb is the receive callback supplied by the frontend, + * and it is invoked in the event loop when a receive + * event is generated in the hypervisor, + * @param is a pointer to the frontend, and normally used as + * the argument for the callback. + */ +int +blockbe_open(block_backend_t **ret, const char *optstr, + const char *pci_ident); { - struct blockif_elem *be, *tbe; - off_t off; - int i; + block_backend_t **bbe = NULL; + int err; + *ret = NULL; - be = TAILQ_FIRST(&bc->bc_freeq); - assert(be != NULL); - assert(be->be_status == BST_FREE); - TAILQ_REMOVE(&bc->bc_freeq, be, be_link); - be->be_req = breq; - be->be_op = op; - switch (op) { - case BOP_READ: - case BOP_WRITE: - case BOP_DELETE: - off = breq->br_offset; - for (i = 0; i < breq->br_iovcnt; i++) - off += breq->br_iov[i].iov_len; - break; - default: - off = OFF_MAX; - } - be->be_block = off; - TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { - if (tbe->be_block == breq->br_offset) - break; - } - if (tbe == NULL) { - TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { - if (tbe->be_block == breq->br_offset) - break; - } - } - if (tbe == NULL) - be->be_status = BST_PEND; - else - be->be_status = BST_BLOCK; - TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); - return (be->be_status == BST_PEND); -} - -static int -blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) -{ - struct blockif_elem *be; - - TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { - if (be->be_status == BST_PEND) - break; - assert(be->be_status == BST_BLOCK); - } - if (be == NULL) - return (0); - TAILQ_REMOVE(&bc->bc_pendq, be, be_link); - be->be_status = BST_BUSY; - be->be_tid = t; - TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); - *bep = be; - return (1); -} - -static void -blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) -{ - struct blockif_elem *tbe; - - if (be->be_status == BST_DONE || be->be_status == BST_BUSY) - TAILQ_REMOVE(&bc->bc_busyq, be, be_link); - else - TAILQ_REMOVE(&bc->bc_pendq, be, be_link); - TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { - if (tbe->be_req->br_offset == be->be_block) - tbe->be_status = BST_PEND; - } - be->be_tid = 0; - be->be_status = BST_FREE; - be->be_req = NULL; - TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); -} - -static void -blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) -{ - struct blockif_req *br; - off_t arg[2]; - ssize_t clen, len, off, boff, voff; - int i, err; - - br = be->be_req; - if (br->br_iovcnt <= 1) - buf = NULL; - err = 0; - switch (be->be_op) { - case BOP_READ: - if (buf == NULL) { - if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, - br->br_offset)) < 0) - err = errno; - else - br->br_resid -= len; - break; - } - i = 0; - off = voff = 0; - while (br->br_resid > 0) { - len = MIN(br->br_resid, MAXPHYS); - if (pread(bc->bc_fd, buf, len, br->br_offset + - off) < 0) { - err = errno; - break; - } - boff = 0; - do { - clen = MIN(len - boff, br->br_iov[i].iov_len - - voff); - memcpy(br->br_iov[i].iov_base + voff, - buf + boff, clen); - if (clen < br->br_iov[i].iov_len - voff) - voff += clen; - else { - i++; - voff = 0; - } - boff += clen; - } while (boff < len); - off += len; - br->br_resid -= len; - } - break; - case BOP_WRITE: - if (bc->bc_rdonly) { - err = EROFS; - break; - } - if (buf == NULL) { - if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, - br->br_offset)) < 0) - err = errno; - else - br->br_resid -= len; - break; - } - i = 0; - off = voff = 0; - while (br->br_resid > 0) { - len = MIN(br->br_resid, MAXPHYS); - boff = 0; - do { - clen = MIN(len - boff, br->br_iov[i].iov_len - - voff); - memcpy(buf + boff, - br->br_iov[i].iov_base + voff, clen); - if (clen < br->br_iov[i].iov_len - voff) - voff += clen; - else { - i++; - voff = 0; - } - boff += clen; - } while (boff < len); - if (pwrite(bc->bc_fd, buf, len, br->br_offset + - off) < 0) { - err = errno; - break; - } - off += len; - br->br_resid -= len; - } - break; - case BOP_FLUSH: - if (bc->bc_ischr) { - if (ioctl(bc->bc_fd, DIOCGFLUSH)) - err = errno; - } else if (fsync(bc->bc_fd)) - err = errno; - break; - case BOP_DELETE: - if (!bc->bc_candelete) - err = EOPNOTSUPP; - else if (bc->bc_rdonly) - err = EROFS; - else if (bc->bc_ischr) { - arg[0] = br->br_offset; - arg[1] = br->br_resid; - if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) - err = errno; - else - br->br_resid = 0; - } else - err = EOPNOTSUPP; - break; - default: - err = EINVAL; - break; - } - - be->be_status = BST_DONE; - - (*br->br_callback)(br, err); -} - -static void * -blockif_thr(void *arg) -{ - struct blockif_ctxt *bc; - struct blockif_elem *be; - pthread_t t; - uint8_t *buf; - - bc = arg; - if (bc->bc_isgeom) - buf = malloc(MAXPHYS); - else - buf = NULL; - t = pthread_self(); - - pthread_mutex_lock(&bc->bc_mtx); - for (;;) { - while (blockif_dequeue(bc, t, &be)) { - pthread_mutex_unlock(&bc->bc_mtx); - blockif_proc(bc, be, buf); - pthread_mutex_lock(&bc->bc_mtx); - blockif_complete(bc, be); - } - /* Check ctxt status here to see if exit requested */ - if (bc->bc_closing) - break; - pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); - } - pthread_mutex_unlock(&bc->bc_mtx); - - if (buf) - free(buf); - pthread_exit(NULL); - return (NULL); -} - -static void -blockif_sigcont_handler(int signal, enum ev_type type, void *arg) -{ - struct blockif_sig_elem *bse; - - for (;;) { - /* - * Process the entire list even if not intended for - * this thread. - */ - do { - bse = blockif_bse_head; - if (bse == NULL) - return; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, - (uintptr_t)bse, - (uintptr_t)bse->bse_next)); - - pthread_mutex_lock(&bse->bse_mtx); - bse->bse_pending = 0; - pthread_cond_signal(&bse->bse_cond); - pthread_mutex_unlock(&bse->bse_mtx); - } -} - -static void -blockif_init(void) -{ - mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); - (void) signal(SIGCONT, SIG_IGN); -} - -struct blockif_ctxt * -blockif_open(const char *optstr, const char *ident) -{ - char tname[MAXCOMLEN + 1]; - char name[MAXPATHLEN]; - char *nopt, *xopts, *cp; - struct blockif_ctxt *bc; - struct stat sbuf; - struct diocgattr_arg arg; - off_t size, psectsz, psectoff; - int extra, fd, i, sectsz; - int nocache, sync, ro, candelete, geom, ssopt, pssopt; -#ifndef WITHOUT_CAPSICUM - cap_rights_t rights; - cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; -#endif - - pthread_once(&blockif_once, blockif_init); - - fd = -1; - ssopt = 0; - nocache = 0; - sync = 0; - ro = 0; - + /* + * Find the block device backend that matches the user-provided + * device name. block_backend_set is built using a linker set. + */ + SET_FOREACH(bbe, block_backend_set) { + + /* + * How do we find the appropriate open for each backend? + * We should itterate over all block_open() functions until one + * returns true as an indication that it accepts the give descriptor + * in opstr + */ /* - * The first element in the optstring is always a pathname. - * Optional elements follow + * Local access has a pattern like: + * 3:0,virtio-blk,file/somewhere/guest.img + * 3:0,virtio-blk,/dev/xxxx + * The file part should be in optstr so we check if optstr exists as a file + * in which case it would be access to the local filesystem. + * This is handled by the blk-local backend. + * If this does not match then other backends in the block_backend_set have + * their open() called. The first one returning 0 is a match and is used + * with the specification in optstr + * */ - nopt = xopts = strdup(optstr); - while (xopts != NULL) { - cp = strsep(&xopts, ","); - if (cp == nopt) /* file or device pathname */ - continue; - else if (!strcmp(cp, "nocache")) - nocache = 1; - else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) - sync = 1; - else if (!strcmp(cp, "ro")) - ro = 1; - else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) - ; - else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) - pssopt = ssopt; - else { - EPRINTLN("Invalid device option \"%s\"", cp); - goto err; - } - } - - extra = 0; - if (nocache) - extra |= O_DIRECT; - if (sync) - extra |= O_SYNC; - - fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); - if (fd < 0 && !ro) { - /* Attempt a r/w fail with a r/o open */ - fd = open(nopt, O_RDONLY | extra); - ro = 1; - } - - if (fd < 0) { - warn("Could not open backing file: %s", nopt); - goto err; - } - - if (fstat(fd, &sbuf) < 0) { - warn("Could not stat backing file %s", nopt); - goto err; + if ((err = (*bbe)->open(ret, optstr, pci_ident, be_ident)) == 0) { + break; } - -#ifndef WITHOUT_CAPSICUM - cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, - CAP_WRITE); - if (ro) - cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); - - if (caph_rights_limit(fd, &rights) == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - - /* - * Deal with raw devices - */ - size = sbuf.st_size; - sectsz = DEV_BSIZE; - psectsz = psectoff = 0; - candelete = geom = 0; - if (S_ISCHR(sbuf.st_mode)) { - if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || - ioctl(fd, DIOCGSECTORSIZE, §sz)) { - perror("Could not fetch dev blk/sector size"); - goto err; - } - assert(size != 0); - assert(sectsz != 0); - if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) - ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); - strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); - arg.len = sizeof(arg.value.i); - if (ioctl(fd, DIOCGATTR, &arg) == 0) - candelete = arg.value.i; - if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) - geom = 1; - } else - psectsz = sbuf.st_blksize; - -#ifndef WITHOUT_CAPSICUM - if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - - if (ssopt != 0) { - if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || - ssopt > pssopt) { - EPRINTLN("Invalid sector size %d/%d", - ssopt, pssopt); - goto err; - } - - /* - * Some backend drivers (e.g. cd0, ada0) require that the I/O - * size be a multiple of the device's sector size. - * - * Validate that the emulated sector size complies with this - * requirement. - */ - if (S_ISCHR(sbuf.st_mode)) { - if (ssopt < sectsz || (ssopt % sectsz) != 0) { - EPRINTLN("Sector size %d incompatible " - "with underlying device sector size %d", - ssopt, sectsz); - goto err; - } - } - - sectsz = ssopt; - psectsz = pssopt; - psectoff = 0; - } - - bc = calloc(1, sizeof(struct blockif_ctxt)); - if (bc == NULL) { - perror("calloc"); - goto err; - } - - bc->bc_magic = BLOCKIF_SIG; - bc->bc_fd = fd; - bc->bc_ischr = S_ISCHR(sbuf.st_mode); - bc->bc_isgeom = geom; - bc->bc_candelete = candelete; - bc->bc_rdonly = ro; - bc->bc_size = size; - bc->bc_sectsz = sectsz; - bc->bc_psectsz = psectsz; - bc->bc_psectoff = psectoff; - pthread_mutex_init(&bc->bc_mtx, NULL); - pthread_cond_init(&bc->bc_cond, NULL); - TAILQ_INIT(&bc->bc_freeq); - TAILQ_INIT(&bc->bc_pendq); - TAILQ_INIT(&bc->bc_busyq); - for (i = 0; i < BLOCKIF_MAXREQ; i++) { - bc->bc_reqs[i].be_status = BST_FREE; - TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); - } - - for (i = 0; i < BLOCKIF_NUMTHR; i++) { - pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); - snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); - pthread_set_name_np(bc->bc_btid[i], tname); - } - - return (bc); -err: - if (fd >= 0) - close(fd); - free(nopt); - return (NULL); + } + return err; } -static int -blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, - enum blockop op) +off_t +blockbe_size(block_backend_t *be) { - int err; - - err = 0; - - pthread_mutex_lock(&bc->bc_mtx); - if (!TAILQ_EMPTY(&bc->bc_freeq)) { - /* - * Enqueue and inform the block i/o thread - * that there is work available - */ - if (blockif_enqueue(bc, breq, op)) - pthread_cond_signal(&bc->bc_cond); - } else { - /* - * Callers are not allowed to enqueue more than - * the specified blockif queue limit. Return an - * error to indicate that the queue length has been - * exceeded. - */ - err = E2BIG; - } - pthread_mutex_unlock(&bc->bc_mtx); - - return (err); + assert(be != NULL); + return (be->size(be)); } -int -blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) +void +blockbe_chs(block_backend_t *be, uint16_t *c, uint8_t *h, + uint8_t *s) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_READ)); + assert(be != NULL); + (be->chs(be, c, h, s)); } int -blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) +blockbe_sectsz(block_backend_t *be) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_WRITE)); + assert(be != NULL); + return (be->sectsz(be)); } -int -blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) +void +blockbe_psectsz(block_backend_t *be, int *size, int *off) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_FLUSH)); + assert(be != NULL); + return (be->psectsz(be, size, off)); } int -blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) +blockbe_queuesz(block_backend_t *be) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_DELETE)); + assert(be != NULL); + return (be->queuesz(be)); } int -blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) +blockbe_is_ro(block_backend_t *be) { - struct blockif_elem *be; - - assert(bc->bc_magic == BLOCKIF_SIG); - - pthread_mutex_lock(&bc->bc_mtx); - /* - * Check pending requests. - */ - TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { - if (be->be_req == breq) - break; - } - if (be != NULL) { - /* - * Found it. - */ - blockif_complete(bc, be); - pthread_mutex_unlock(&bc->bc_mtx); - - return (0); - } - - /* - * Check in-flight requests. - */ - TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { - if (be->be_req == breq) - break; - } - if (be == NULL) { - /* - * Didn't find it. - */ - pthread_mutex_unlock(&bc->bc_mtx); - return (EINVAL); - } - - /* - * Interrupt the processing thread to force it return - * prematurely via it's normal callback path. - */ - while (be->be_status == BST_BUSY) { - struct blockif_sig_elem bse, *old_head; - - pthread_mutex_init(&bse.bse_mtx, NULL); - pthread_cond_init(&bse.bse_cond, NULL); - - bse.bse_pending = 1; - - do { - old_head = blockif_bse_head; - bse.bse_next = old_head; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, - (uintptr_t)old_head, - (uintptr_t)&bse)); - - pthread_kill(be->be_tid, SIGCONT); - - pthread_mutex_lock(&bse.bse_mtx); - while (bse.bse_pending) - pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); - pthread_mutex_unlock(&bse.bse_mtx); - } - - pthread_mutex_unlock(&bc->bc_mtx); - - /* - * The processing thread has been interrupted. Since it's not - * clear if the callback has been invoked yet, return EBUSY. - */ - return (EBUSY); + assert(be != NULL); + return (be->is_ro(be)); } int -blockif_close(struct blockif_ctxt *bc) +blockbe_candelete(block_backend_t *be) { - void *jval; - int i; - - assert(bc->bc_magic == BLOCKIF_SIG); - - /* - * Stop the block i/o thread - */ - pthread_mutex_lock(&bc->bc_mtx); - bc->bc_closing = 1; - pthread_mutex_unlock(&bc->bc_mtx); - pthread_cond_broadcast(&bc->bc_cond); - for (i = 0; i < BLOCKIF_NUMTHR; i++) - pthread_join(bc->bc_btid[i], &jval); - - /* XXX Cancel queued i/o's ??? */ - - /* - * Release resources - */ - bc->bc_magic = 0; - close(bc->bc_fd); - free(bc); - - return (0); + assert(be != NULL); + return (be->candelete(be)); } -/* - * Return virtual C/H/S values for a given block. Use the algorithm - * outlined in the VHD specification to calculate values. - */ -void -blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) +int +blockbe_read(block_backend_t *be, struct blockif_req *breq) { - off_t sectors; /* total sectors of the block dev */ - off_t hcyl; /* cylinders times heads */ - uint16_t secpt; /* sectors per track */ - uint8_t heads; - - assert(bc->bc_magic == BLOCKIF_SIG); - - sectors = bc->bc_size / bc->bc_sectsz; - - /* Clamp the size to the largest possible with CHS */ - if (sectors > 65535UL*16*255) - sectors = 65535UL*16*255; - - if (sectors >= 65536UL*16*63) { - secpt = 255; - heads = 16; - hcyl = sectors / secpt; - } else { - secpt = 17; - hcyl = sectors / secpt; - heads = (hcyl + 1023) / 1024; - - if (heads < 4) - heads = 4; - - if (hcyl >= (heads * 1024) || heads > 16) { - secpt = 31; - heads = 16; - hcyl = sectors / secpt; - } - if (hcyl >= (heads * 1024)) { - secpt = 63; - heads = 16; - hcyl = sectors / secpt; - } - } - - *c = hcyl / heads; - *h = heads; - *s = secpt; + assert(be != NULL); + return (be->read(be, breq)); } -/* - * Accessors - */ -off_t -blockif_size(struct blockif_ctxt *bc) +int +blockbe_write(block_backend_t *be, struct blockif_req *breq) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_size); + assert(be != NULL); + return (be->write(be, breq)); } int -blockif_sectsz(struct blockif_ctxt *bc) +blockbe_flush(block_backend_t *be, struct blockif_req *breq) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_sectsz); + assert(be != NULL); + return (be->flush(be, breq)); } -void -blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - *size = bc->bc_psectsz; - *off = bc->bc_psectoff; -} - int -blockif_queuesz(struct blockif_ctxt *bc) +blockbe_delete(block_backend_t *be, struct blockif_req *breq) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (BLOCKIF_MAXREQ - 1); + assert(be != NULL); + return (be->delete(be, breq)); } int -blockif_is_ro(struct blockif_ctxt *bc) +blockbe_cancel(block_backend_t *be, struct blockif_req *breq) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_rdonly); + assert(be != NULL); + return (be->cancel(be, breq)); } int -blockif_candelete(struct blockif_ctxt *bc) +blockbe_close(block_backend_t *be) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_candelete); + assert(be != NULL); + return (be->close(be)); } Index: usr.sbin/bhyve/block_if.h =================================================================== --- usr.sbin/bhyve/block_if.h +++ usr.sbin/bhyve/block_if.h @@ -38,41 +38,45 @@ #ifndef _BLOCK_IF_H_ #define _BLOCK_IF_H_ +#include #include #include -/* - * BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in - * a single request. BLOCKIF_RING_MAX is the maxmimum number of - * pending requests that can be queued. - */ -#define BLOCKIF_IOV_MAX 128 /* not practical to be IOV_MAX */ -#define BLOCKIF_RING_MAX 128 +#include "block_backends.h" -struct blockif_req { - int br_iovcnt; - off_t br_offset; - ssize_t br_resid; - void (*br_callback)(struct blockif_req *req, int err); - void *br_param; - struct iovec br_iov[BLOCKIF_IOV_MAX]; +struct locblk_elem { + TAILQ_ENTRY(locblk_elem) be_link; + struct blockif_req *be_req; + enum blockop be_op; + enum blockstat be_status; + pthread_t be_tid; + off_t be_block; }; +typedef struct locblk_elem locblk_elem_t; -struct blockif_ctxt; -struct blockif_ctxt *blockif_open(const char *optstr, const char *ident); -off_t blockif_size(struct blockif_ctxt *bc); -void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, - uint8_t *s); -int blockif_sectsz(struct blockif_ctxt *bc); -void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off); -int blockif_queuesz(struct blockif_ctxt *bc); -int blockif_is_ro(struct blockif_ctxt *bc); -int blockif_candelete(struct blockif_ctxt *bc); -int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_close(struct blockif_ctxt *bc); +struct locblk_ctxt { + int bc_magic; + int bc_fd; + int bc_ischr; + int bc_isgeom; + int bc_candelete; + int bc_rdonly; + off_t bc_size; + int bc_sectsz; + int bc_psectsz; + int bc_psectoff; + int bc_closing; + pthread_t bc_btid[BLOCKIF_NUMTHR]; + pthread_mutex_t bc_mtx; + pthread_cond_t bc_cond; + /* Request elements and free/pending/busy queues */ + TAILQ_HEAD(, locblk_elem) bc_freeq; + TAILQ_HEAD(, locblk_elem) bc_pendq; + TAILQ_HEAD(, locblk_elem) bc_busyq; + + /* */ + struct locblk_elem bc_reqs[BLOCKIF_MAXREQ]; +}; +typedef struct locblk_ctxt locblk_ctxt_t; #endif /* _BLOCK_IF_H_ */ Index: usr.sbin/bhyve/block_if.c =================================================================== --- usr.sbin/bhyve/block_if.c +++ usr.sbin/bhyve/block_if.c @@ -58,79 +58,31 @@ #include +#include + #include "bhyverun.h" #include "debug.h" #include "mevent.h" #include "block_if.h" -#define BLOCKIF_SIG 0xb109b109 +SET_DECLARE(block_backend_set, block_backend_t); -#define BLOCKIF_NUMTHR 8 -#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) +static pthread_once_t locblk_once = PTHREAD_ONCE_INIT; -enum blockop { - BOP_READ, - BOP_WRITE, - BOP_FLUSH, - BOP_DELETE -}; - -enum blockstat { - BST_FREE, - BST_BLOCK, - BST_PEND, - BST_BUSY, - BST_DONE -}; - -struct blockif_elem { - TAILQ_ENTRY(blockif_elem) be_link; - struct blockif_req *be_req; - enum blockop be_op; - enum blockstat be_status; - pthread_t be_tid; - off_t be_block; -}; - -struct blockif_ctxt { - int bc_magic; - int bc_fd; - int bc_ischr; - int bc_isgeom; - int bc_candelete; - int bc_rdonly; - off_t bc_size; - int bc_sectsz; - int bc_psectsz; - int bc_psectoff; - int bc_closing; - pthread_t bc_btid[BLOCKIF_NUMTHR]; - pthread_mutex_t bc_mtx; - pthread_cond_t bc_cond; - - /* Request elements and free/pending/busy queues */ - TAILQ_HEAD(, blockif_elem) bc_freeq; - TAILQ_HEAD(, blockif_elem) bc_pendq; - TAILQ_HEAD(, blockif_elem) bc_busyq; - struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; -}; - -static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; - -struct blockif_sig_elem { +struct locblk_sig_elem { pthread_mutex_t bse_mtx; pthread_cond_t bse_cond; int bse_pending; - struct blockif_sig_elem *bse_next; + struct locblk_sig_elem *bse_next; }; -static struct blockif_sig_elem *blockif_bse_head; +static struct locblk_sig_elem *locblk_bse_head; static int -blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, +locblk_enqueue(struct locblk_ctxt *bc, struct blockif_req *breq, enum blockop op) { - struct blockif_elem *be, *tbe; + struct locblk_elem *be, *tbe; off_t off; int i; @@ -171,9 +123,9 @@ } static int -blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) +locblk_dequeue(struct locblk_ctxt *bc, pthread_t t, struct locblk_elem **bep) { - struct blockif_elem *be; + struct locblk_elem *be; TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_status == BST_PEND) @@ -191,9 +143,9 @@ } static void -blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) +locblk_complete(struct locblk_ctxt *bc, struct locblk_elem *be) { - struct blockif_elem *tbe; + struct locblk_elem *tbe; if (be->be_status == BST_DONE || be->be_status == BST_BUSY) TAILQ_REMOVE(&bc->bc_busyq, be, be_link); @@ -210,7 +162,7 @@ } static void -blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) +locblk_proc(struct locblk_ctxt *bc, struct locblk_elem *be, uint8_t *buf) { struct blockif_req *br; off_t arg[2]; @@ -331,10 +283,10 @@ } static void * -blockif_thr(void *arg) +locblk_thr(void *arg) { - struct blockif_ctxt *bc; - struct blockif_elem *be; + struct locblk_ctxt *bc; + struct locblk_elem *be; pthread_t t; uint8_t *buf; @@ -347,11 +299,11 @@ pthread_mutex_lock(&bc->bc_mtx); for (;;) { - while (blockif_dequeue(bc, t, &be)) { + while (locblk_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); - blockif_proc(bc, be, buf); + locblk_proc(bc, be, buf); pthread_mutex_lock(&bc->bc_mtx); - blockif_complete(bc, be); + locblk_complete(bc, be); } /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) @@ -367,9 +319,9 @@ } static void -blockif_sigcont_handler(int signal, enum ev_type type, void *arg) +locblk_sigcont_handler(int signal, enum ev_type type, void *arg) { - struct blockif_sig_elem *bse; + struct locblk_sig_elem *bse; for (;;) { /* @@ -377,10 +329,10 @@ * this thread. */ do { - bse = blockif_bse_head; + bse = locblk_bse_head; if (bse == NULL) return; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, + } while (!atomic_cmpset_ptr((uintptr_t *)&locblk_bse_head, (uintptr_t)bse, (uintptr_t)bse->bse_next)); @@ -391,20 +343,63 @@ } } +static int +locblk_request(struct locblk_ctxt *bc, struct blockif_req *breq, + enum blockop op) +{ + int err = 0; + + pthread_mutex_lock(&bc->bc_mtx); + if (!TAILQ_EMPTY(&bc->bc_freeq)) { + /* + * Enqueue and inform the block i/o thread + * that there is work available + */ + if (locblk_enqueue(bc, breq, op)) + pthread_cond_signal(&bc->bc_cond); + } else { + /* + * Callers are not allowed to enqueue more than + * the specified blockif queue limit. Return an + * error to indicate that the queue length has been + * exceeded. + */ + err = E2BIG; + } + pthread_mutex_unlock(&bc->bc_mtx); + + return (err); +} + +/* + * Routines for access blockdevices on the local system. + * be it either a file, or a block device + */ static void -blockif_init(void) +locblk_init(void) { - mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); + mevent_add(SIGCONT, EVF_SIGNAL, locblk_sigcont_handler, NULL); (void) signal(SIGCONT, SIG_IGN); } -struct blockif_ctxt * -blockif_open(const char *optstr, const char *ident) +static void +locblk_cleanup(block_backend_t **be) +{ /* empty block + * currently no cleanup required. + */ +} + +static block_backend_t locblk_backend; + +static int +locblk_open(block_backend_t **be, const char *optstr, const char *pci_ident, + const char *be_ident) { + char tname[MAXCOMLEN + 1]; char name[MAXPATHLEN]; char *nopt, *xopts, *cp; - struct blockif_ctxt *bc; + struct locblk_ctxt *bc; struct stat sbuf; struct diocgattr_arg arg; off_t size, psectsz, psectoff; @@ -415,7 +410,7 @@ cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; #endif - pthread_once(&blockif_once, blockif_init); + pthread_once(&locblk_once, locblk_init); fd = -1; ssopt = 0; @@ -454,6 +449,14 @@ if (sync) extra |= O_SYNC; + /* + * device and option parsing completed correctly so set the + * backend return value to this backend. + * Anything that returns an error below is due to errors in + * handling of the device. But this is the correct backend. + */ + *be = &locblk_backend; + fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); if (fd < 0 && !ro) { /* Attempt a r/w fail with a r/o open */ @@ -462,14 +465,14 @@ } if (fd < 0) { - warn("Could not open backing file: %s", nopt); + warn("blk-local: Could not open backing file: %s", nopt); goto err; } - if (fstat(fd, &sbuf) < 0) { + if (fstat(fd, &sbuf) < 0) { warn("Could not stat backing file %s", nopt); goto err; - } + } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, @@ -481,10 +484,10 @@ errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif - /* + /* * Deal with raw devices */ - size = sbuf.st_size; + size = sbuf.st_size; sectsz = DEV_BSIZE; psectsz = psectoff = 0; candelete = geom = 0; @@ -541,11 +544,13 @@ psectoff = 0; } - bc = calloc(1, sizeof(struct blockif_ctxt)); + bc = calloc(1, sizeof(struct locblk_ctxt)); if (bc == NULL) { perror("calloc"); goto err; } + /* And update the backend descriptor for this */ + (*be)->bc = (struct blockif_ctxt*)bc; bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; @@ -568,101 +573,76 @@ } for (i = 0; i < BLOCKIF_NUMTHR; i++) { - pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); - snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); + pthread_create(&bc->bc_btid[i], NULL, locblk_thr, bc); + snprintf(tname, sizeof(tname), "blk-%s-%d", pci_ident, i); pthread_set_name_np(bc->bc_btid[i], tname); } - return (bc); + return (0); err: if (fd >= 0) close(fd); free(nopt); - return (NULL); + return (-1); } -static int -blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, - enum blockop op) -{ - int err; - err = 0; - - pthread_mutex_lock(&bc->bc_mtx); - if (!TAILQ_EMPTY(&bc->bc_freeq)) { - /* - * Enqueue and inform the block i/o thread - * that there is work available - */ - if (blockif_enqueue(bc, breq, op)) - pthread_cond_signal(&bc->bc_cond); - } else { - /* - * Callers are not allowed to enqueue more than - * the specified blockif queue limit. Return an - * error to indicate that the queue length has been - * exceeded. - */ - err = E2BIG; - } - pthread_mutex_unlock(&bc->bc_mtx); - - return (err); -} - int -blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) +locblk_read(block_backend_t *be, struct blockif_req *breq) { - + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_READ)); + return (locblk_request(bc, breq, BOP_READ)); } int -blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) +locblk_write(block_backend_t *be, struct blockif_req *breq) { + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_WRITE)); + return (locblk_request(bc, breq, BOP_WRITE)); } int -blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) +locblk_flush(block_backend_t *be, struct blockif_req *breq) { + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_FLUSH)); + return (locblk_request(bc, breq, BOP_FLUSH)); } int -blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) +locblk_delete(block_backend_t *be, struct blockif_req *breq) { + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_DELETE)); + return (locblk_request(bc, breq, BOP_DELETE)); } int -blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) +locblk_cancel(block_backend_t *be, struct blockif_req *breq) { - struct blockif_elem *be; + struct locblk_elem *belem; + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); /* * Check pending requests. */ - TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { - if (be->be_req == breq) + TAILQ_FOREACH(belem, &bc->bc_pendq, be_link) { + if (belem->be_req == breq) break; } if (be != NULL) { /* * Found it. */ - blockif_complete(bc, be); + locblk_complete(bc, belem); pthread_mutex_unlock(&bc->bc_mtx); return (0); @@ -671,11 +651,11 @@ /* * Check in-flight requests. */ - TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { - if (be->be_req == breq) + TAILQ_FOREACH(belem, &bc->bc_busyq, be_link) { + if (belem->be_req == breq) break; } - if (be == NULL) { + if (belem == NULL) { /* * Didn't find it. */ @@ -687,8 +667,8 @@ * Interrupt the processing thread to force it return * prematurely via it's normal callback path. */ - while (be->be_status == BST_BUSY) { - struct blockif_sig_elem bse, *old_head; + while (belem->be_status == BST_BUSY) { + struct locblk_sig_elem bse, *old_head; pthread_mutex_init(&bse.bse_mtx, NULL); pthread_cond_init(&bse.bse_cond, NULL); @@ -696,13 +676,13 @@ bse.bse_pending = 1; do { - old_head = blockif_bse_head; + old_head = locblk_bse_head; bse.bse_next = old_head; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, + } while (!atomic_cmpset_ptr((uintptr_t *)&locblk_bse_head, (uintptr_t)old_head, (uintptr_t)&bse)); - pthread_kill(be->be_tid, SIGCONT); + pthread_kill(belem->be_tid, SIGCONT); pthread_mutex_lock(&bse.bse_mtx); while (bse.bse_pending) @@ -720,11 +700,12 @@ } int -blockif_close(struct blockif_ctxt *bc) +locblk_close(block_backend_t *be) { void *jval; int i; + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); /* @@ -754,13 +735,14 @@ * outlined in the VHD specification to calculate values. */ void -blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) +locblk_chs(block_backend_t *be, uint16_t *c, uint8_t *h, uint8_t *s) { off_t sectors; /* total sectors of the block dev */ off_t hcyl; /* cylinders times heads */ uint16_t secpt; /* sectors per track */ uint8_t heads; + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); sectors = bc->bc_size / bc->bc_sectsz; @@ -802,50 +784,73 @@ * Accessors */ off_t -blockif_size(struct blockif_ctxt *bc) +locblk_size(block_backend_t *be) { - + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_size); } int -blockif_sectsz(struct blockif_ctxt *bc) +locblk_sectsz(block_backend_t *be) { - + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_sectsz); } void -blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) +locblk_psectsz(block_backend_t *be, int *size, int *off) { - + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); *size = bc->bc_psectsz; *off = bc->bc_psectoff; } int -blockif_queuesz(struct blockif_ctxt *bc) +locblk_queuesz(block_backend_t *be) { - + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); return (BLOCKIF_MAXREQ - 1); } int -blockif_is_ro(struct blockif_ctxt *bc) +locblk_is_ro(block_backend_t *be) { - + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } int -blockif_candelete(struct blockif_ctxt *bc) +locblk_candelete(block_backend_t *be) { - + locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc); assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_candelete); } + +static +block_backend_t locblk_backend = { + .prefix = "blk-local", + .init = locblk_init, + .cleanup = locblk_cleanup, + .open = locblk_open, + .size = locblk_size, + .chs = locblk_chs, + .sectsz = locblk_sectsz, + .psectsz = locblk_psectsz, + .queuesz = locblk_queuesz, + .is_ro = locblk_is_ro, + .candelete = locblk_candelete, + .read = locblk_read, + .write = locblk_write, + .flush = locblk_flush, + .delete = locblk_delete, + .cancel = locblk_cancel, + .close = locblk_close, + .priv_size = 0, +}; +DATA_SET(block_backend_set, locblk_backend); Index: usr.sbin/bhyve/pci_ahci.c =================================================================== --- usr.sbin/bhyve/pci_ahci.c +++ usr.sbin/bhyve/pci_ahci.c @@ -134,6 +134,7 @@ }; struct ahci_port { + block_backend_t *be; struct blockif_ctxt *bctx; struct pci_ahci_softc *pr_sc; uint8_t *cmd_lst; @@ -492,7 +493,7 @@ /* * Try to cancel the outstanding blockif request. */ - error = blockif_cancel(p->bctx, &aior->io_req); + error = blockbe_cancel(p->be, &aior->io_req); if (error != 0) continue; @@ -639,7 +640,7 @@ /* If we got limited by IOV length, round I/O down to sector size. */ if (j == BLOCKIF_IOV_MAX) { - extra = todo % blockif_sectsz(p->bctx); + extra = todo % blockbe_sectsz(p->be); todo -= extra; assert(todo > 0); while (extra > 0) { @@ -712,8 +713,8 @@ if (!len) len = 256; } - lba *= blockif_sectsz(p->bctx); - len *= blockif_sectsz(p->bctx); + lba *= blockbe_sectsz(p->be); + len *= blockbe_sectsz(p->be); /* Pull request off free list */ aior = STAILQ_FIRST(&p->iofhd); @@ -738,9 +739,9 @@ ahci_write_fis_d2h_ncq(p, slot); if (readop) - err = blockif_read(p->bctx, breq); + err = blockbe_read(p->be, breq); else - err = blockif_write(p->bctx, breq); + err = blockbe_write(p->be, breq); assert(err == 0); } @@ -774,7 +775,7 @@ */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); - err = blockif_flush(p->bctx, breq); + err = blockbe_flush(p->be, breq); assert(err == 0); } @@ -872,8 +873,8 @@ aior->more = (len != done); breq = &aior->io_req; - breq->br_offset = elba * blockif_sectsz(p->bctx); - breq->br_resid = elen * blockif_sectsz(p->bctx); + breq->br_offset = elba * blockbe_sectsz(p->be); + breq->br_resid = elen * blockbe_sectsz(p->be); /* * Mark this command in-flight. @@ -888,7 +889,7 @@ if (ncq && first) ahci_write_fis_d2h_ncq(p, slot); - err = blockif_delete(p->bctx, breq); + err = blockbe_delete(p->be, breq); assert(err == 0); } @@ -957,7 +958,7 @@ memcpy(buf8, p->err_cfis, sizeof(p->err_cfis)); ahci_checksum(buf8, sizeof(buf)); } else if (cfis[4] == 0x13) { /* SATA NCQ Send and Receive Log */ - if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) { + if (blockbe_candelete(p->be) && !blockbe_is_ro(p->be)) { buf[0x00] = 1; /* SFQ DSM supported */ buf[0x01] = 1; /* SFQ DSM TRIM supported */ } @@ -989,12 +990,12 @@ uint16_t cyl; uint8_t sech, heads; - ro = blockif_is_ro(p->bctx); - candelete = blockif_candelete(p->bctx); - sectsz = blockif_sectsz(p->bctx); - sectors = blockif_size(p->bctx) / sectsz; - blockif_chs(p->bctx, &cyl, &heads, &sech); - blockif_psectsz(p->bctx, &psectsz, &psectoff); + ro = blockbe_is_ro(p->be); + candelete = blockbe_candelete(p->be); + sectsz = blockbe_sectsz(p->be); + sectors = blockbe_size(p->be) / sectsz; + blockbe_chs(p->be, &cyl, &heads, &sech); + blockbe_psectsz(p->be, &psectsz, &psectoff); memset(buf, 0, sizeof(buf)); buf[0] = 0x0040; buf[1] = cyl; @@ -1181,7 +1182,7 @@ uint8_t buf[8]; uint64_t sectors; - sectors = blockif_size(p->bctx) / 2048; + sectors = blockbe_size(p->be) / 2048; be32enc(buf, sectors - 1); be32enc(buf + 4, 2048); cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; @@ -1241,7 +1242,7 @@ *bp++ = 0x14; *bp++ = 0xaa; *bp++ = 0; - sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx); + sectors = blockbe_size(p->be) / blockbe_sectsz(p->be); sectors >>= 2; if (msf) { *bp++ = 0; @@ -1317,7 +1318,7 @@ *bp++ = 0; *bp++ = 0; *bp++ = 0; - sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx); + sectors = blockbe_size(p->be) / blockbe_sectsz(p->be); sectors >>= 2; if (msf) { *bp++ = 0; @@ -1430,7 +1431,7 @@ /* Stuff request onto busy list. */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); - err = blockif_read(p->bctx, breq); + err = blockbe_read(p->be, breq); assert(err == 0); } @@ -2020,7 +2021,7 @@ struct ahci_ioreq *vr; int i; - pr->ioqsz = blockif_queuesz(pr->bctx); + pr->ioqsz = blockbe_queuesz(pr->be); pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq)); STAILQ_INIT(&pr->iofhd); @@ -2358,12 +2359,19 @@ */ snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot, pi->pi_func, p); - bctxt = blockif_open(opts, bident); - if (bctxt == NULL) { + + ret = blockbe_open(&(sc->port[p].be),opts, bident); + if (ret != 0) { sc->ports = p; ret = 1; goto open_fail; } + + bctxt = calloc(1, sizeof(struct locblk_ctxt)); + if (bctxt == NULL) { + perror("calloc"); + goto open_fail; + } sc->port[p].bctx = bctxt; sc->port[p].pr_sc = sc; sc->port[p].port = p; @@ -2424,7 +2432,7 @@ if (ret) { for (p = 0; p < sc->ports; p++) { if (sc->port[p].bctx != NULL) - blockif_close(sc->port[p].bctx); + blockbe_close(sc->port[p].be); } free(sc); } Index: usr.sbin/bhyve/pci_nvme.c =================================================================== --- usr.sbin/bhyve/pci_nvme.c +++ usr.sbin/bhyve/pci_nvme.c @@ -169,6 +169,7 @@ }; struct pci_nvme_blockstore { + block_backend_t *be; enum nvme_storage_type type; void *ctx; uint64_t size; @@ -490,7 +491,7 @@ } } else sc->submit_queues = calloc(sc->num_squeues + 1, - sizeof(struct nvme_submission_queue)); + sizeof(struct nvme_submission_queue)); if (sc->compl_queues != NULL) { for (int i = 0; i < sc->num_cqueues + 1; i++) { @@ -505,7 +506,7 @@ } } else { sc->compl_queues = calloc(sc->num_cqueues + 1, - sizeof(struct nvme_completion_queue)); + sizeof(struct nvme_completion_queue)); for (int i = 0; i < sc->num_cqueues + 1; i++) pthread_mutex_init(&sc->compl_queues[i].mtx, NULL); @@ -530,7 +531,7 @@ asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1; sc->submit_queues[0].size = asqs; sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq, - sizeof(struct nvme_command) * asqs); + sizeof(struct nvme_command) * asqs); DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p", __func__, sc->regs.asq, sc->submit_queues[0].qbase)); @@ -1171,10 +1172,10 @@ req->io_req.br_callback = pci_nvme_io_partial; if (!do_write) - err = blockif_read(sc->nvstore.ctx, + err = blockbe_read(sc->nvstore.be, &req->io_req); else - err = blockif_write(sc->nvstore.ctx, + err = blockbe_write(sc->nvstore.be, &req->io_req); /* wait until req completes before cont */ @@ -1517,10 +1518,10 @@ err = 0; switch (cmd->opc) { case NVME_OPC_READ: - err = blockif_read(sc->nvstore.ctx, &req->io_req); + err = blockbe_read(sc->nvstore.be, &req->io_req); break; case NVME_OPC_WRITE: - err = blockif_write(sc->nvstore.ctx, &req->io_req); + err = blockbe_write(sc->nvstore.be, &req->io_req); break; default: WPRINTF(("%s unhandled io command 0x%x", @@ -1842,10 +1843,10 @@ static int pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) { - char bident[sizeof("XX:X:X")]; + char pci_ident[sizeof("XX:X:X")]; char *uopt, *xopts, *config; uint32_t sectsz; - int optidx; + int optidx, res; sc->max_queues = NVME_QUEUES; sc->max_qentries = NVME_MAX_QENTRIES; @@ -1897,16 +1898,21 @@ } else if (!strcmp("eui64", xopts)) { sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0)); } else if (optidx == 0) { - snprintf(bident, sizeof(bident), "%d:%d", + snprintf(pci_ident, sizeof(pci_ident), "%d:%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); - sc->nvstore.ctx = blockif_open(xopts, bident); - if (sc->nvstore.ctx == NULL) { + res = blockbe_open(&(sc->nvstore.be), xopts, pci_ident); + if (res != 0) { perror("Could not open backing file"); free(uopt); return (-1); } + sc->nvstore.ctx = calloc(1, sizeof(struct locblk_ctxt)); + if ( sc->nvstore.ctx == NULL) { + perror("calloc"); + return(-1); + } sc->nvstore.type = NVME_STOR_BLOCKIF; - sc->nvstore.size = blockif_size(sc->nvstore.ctx); + sc->nvstore.size = blockbe_size(sc->nvstore.be); } else { EPRINTLN("Invalid option %s", xopts); free(uopt); @@ -1924,7 +1930,7 @@ if (sectsz == 512 || sectsz == 4096 || sectsz == 8192) sc->nvstore.sectsz = sectsz; else if (sc->nvstore.type != NVME_STOR_RAM) - sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx); + sc->nvstore.sectsz = blockbe_sectsz(sc->nvstore.be); for (sc->nvstore.sectsz_bits = 9; (1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz; sc->nvstore.sectsz_bits++); Index: usr.sbin/bhyve/pci_virtio_block.c =================================================================== --- usr.sbin/bhyve/pci_virtio_block.c +++ usr.sbin/bhyve/pci_virtio_block.c @@ -123,7 +123,7 @@ /* * Debug printf */ -static int pci_vtblk_debug; +static int pci_vtblk_debug = 0 ; #define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params #define WPRINTF(params) PRINTLN params @@ -140,6 +140,7 @@ struct pci_vtblk_softc { struct virtio_softc vbsc_vs; pthread_mutex_t vsc_mtx; + block_backend_t *vbsc_be; struct vqueue_info vbsc_vq; struct vtblk_config vbsc_cfg; struct blockif_ctxt *bc; @@ -259,14 +260,14 @@ switch (type) { case VBH_OP_READ: - err = blockif_read(sc->bc, &io->io_req); + err = blockbe_read(sc->vbsc_be, &io->io_req); break; case VBH_OP_WRITE: - err = blockif_write(sc->bc, &io->io_req); + err = blockbe_write(sc->vbsc_be, &io->io_req); break; case VBH_OP_FLUSH: case VBH_OP_FLUSH_OUT: - err = blockif_flush(sc->bc, &io->io_req); + err = blockbe_flush(sc->vbsc_be, &io->io_req); break; case VBH_OP_IDENT: /* Assume a single buffer */ @@ -295,35 +296,36 @@ static int pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { - char bident[sizeof("XX:X:X")]; - struct blockif_ctxt *bctxt; + char pci_ident[sizeof("XX:X:X")]; + char be_ident[16]; MD5_CTX mdctx; u_char digest[16]; struct pci_vtblk_softc *sc; off_t size; - int i, sectsz, sts, sto; + int i, res, sectsz, sts, sto; if (opts == NULL) { WPRINTF(("virtio-block: backing device required")); return (1); } - - /* + sc = calloc(1, sizeof(struct pci_vtblk_softc)); + + /* * The supplied backing file has to exist */ - snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func); - bctxt = blockif_open(opts, bident); - if (bctxt == NULL) { - perror("Could not open backing file"); + snprintf(pci_ident, sizeof(pci_ident), "%d:%d", pi->pi_slot, pi->pi_func); + snprintf(be_ident, sizeof(be_ident), "%s", "blk-local"); + res = blockbe_open(&sc->vbsc_be, opts, pci_ident); + if (res != 0) { + perror("virtio_block:_Could not open backing file"); return (1); + free(sc); } - size = blockif_size(bctxt); - sectsz = blockif_sectsz(bctxt); - blockif_psectsz(bctxt, &sts, &sto); - - sc = calloc(1, sizeof(struct pci_vtblk_softc)); - sc->bc = bctxt; + size = blockbe_size(sc->vbsc_be); + sectsz = blockbe_sectsz(sc->vbsc_be); + blockbe_psectsz(sc->vbsc_be, &sts, &sto); + for (i = 0; i < VTBLK_RINGSZ; i++) { struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i]; io->io_req.br_callback = pci_vtblk_done; @@ -388,7 +390,7 @@ pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) { - blockif_close(sc->bc); + blockbe_close(sc->vbsc_be); free(sc); return (1); }