Index: usr.sbin/bhyve/Makefile =================================================================== --- usr.sbin/bhyve/Makefile +++ usr.sbin/bhyve/Makefile @@ -20,6 +20,7 @@ bhyvegc.c \ bhyverun.c \ block_if.c \ + block_local.c \ bootrom.c \ console.c \ consport.c \ Index: usr.sbin/bhyve/block_if.h =================================================================== --- usr.sbin/bhyve/block_if.h +++ usr.sbin/bhyve/block_if.h @@ -38,6 +38,9 @@ #ifndef _BLOCK_IF_H_ #define _BLOCK_IF_H_ +#include + +#include #include #include @@ -49,6 +52,26 @@ #define BLOCKIF_IOV_MAX 128 /* not practical to be IOV_MAX */ #define BLOCKIF_RING_MAX 128 +#define BLOCKIF_SIG 0xb109b109 + +#define BLOCKIF_NUMTHR 8 +#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) + +enum blockop { + BOP_READ, + BOP_WRITE, + BOP_FLUSH, + BOP_DELETE +}; + +enum blockstat { + BST_FREE, + BST_BLOCK, + BST_PEND, + BST_BUSY, + BST_DONE +}; + struct blockif_req { int br_iovcnt; off_t br_offset; @@ -57,12 +80,50 @@ void *br_param; struct iovec br_iov[BLOCKIF_IOV_MAX]; }; +typedef struct blockif_req blockif_req_t; -struct blockif_ctxt; -struct blockif_ctxt *blockif_open(const char *optstr, const char *ident); +struct blockif_elem { + TAILQ_ENTRY(blockif_elem) be_link; + struct blockif_req *be_req; + enum blockop be_op; + enum blockstat be_status; + pthread_t be_tid; + off_t be_block; +}; +typedef struct blockif_elem blockif_elem_t; + +/* Opaque type representing a block device backend. */ +typedef struct block_backend block_backend_t; + +struct blockif_ctxt { + int bc_magic; + int bc_fd; + int bc_ischr; + int bc_isgeom; + int bc_candelete; + int bc_rdonly; + off_t bc_size; + int bc_sectsz; + int bc_psectsz; + int bc_psectoff; + int bc_closing; + pthread_t bc_btid[BLOCKIF_NUMTHR]; + pthread_mutex_t bc_mtx; + pthread_cond_t bc_cond; + + block_backend_t *be; + + /* Request elements and free/pending/busy queues */ + TAILQ_HEAD(, blockif_elem) bc_freeq; + TAILQ_HEAD(, blockif_elem) bc_pendq; + TAILQ_HEAD(, blockif_elem) bc_busyq; + struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; +}; +typedef struct blockif_ctxt blockif_ctxt_t; + +blockif_ctxt_t *blockif_open(const char *optstr, const char *ident); off_t blockif_size(struct blockif_ctxt *bc); -void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, - uint8_t *s); +void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s); int blockif_sectsz(struct blockif_ctxt *bc); void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off); int blockif_queuesz(struct blockif_ctxt *bc); @@ -74,5 +135,55 @@ int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_close(struct blockif_ctxt *bc); + +/* + * Each block device backend registers a set of function pointers that are + * used to implement the net backends API. + */ +struct block_backend { + const char *bb_prefix; /* identifier used to set the thread info */ + const char *bb_scheme; /* identifier used parse the option string */ + /* should terminate in a ':' */ + /* + * Routines used to initialize and cleanup the resources needed + * by a backend. The init and cleanup function are used internally, + * and should not be called by the frontend. + */ + void (*bb_init)(void); + + void (*bb_cleanup)(blockif_ctxt_t *bc); + + blockif_ctxt_t * (*bb_open)(const char *optstr, const char *ident); + + off_t (*bb_size)(blockif_ctxt_t *bc); + + void (*bb_chs)(blockif_ctxt_t *bc, uint16_t *c, uint8_t *h, + uint8_t *s); + + int (*bb_sectsz)(blockif_ctxt_t *bc); + + void (*bb_psectsz)(blockif_ctxt_t *bc, int *size, int *off); + + int (*bb_queuesz)(blockif_ctxt_t *bc); + + int (*bb_is_ro)(blockif_ctxt_t *bc); + + int (*bb_candelete)(blockif_ctxt_t *bc); + + int (*bb_read)(blockif_ctxt_t *bc, blockif_req_t *breq); + + int (*bb_write)(blockif_ctxt_t *bc, blockif_req_t *breq); + + int (*bb_flush)(blockif_ctxt_t *bc, blockif_req_t *breq); + + int (*bb_delete)(blockif_ctxt_t *bc, blockif_req_t *breq); + + int (*bb_cancel)(blockif_ctxt_t *bc, blockif_req_t *breq); + + int (*bb_close)(blockif_ctxt_t *bc); + + /* Room for backend-specific data. */ + char *bb_opaque; +}; #endif /* _BLOCK_IF_H_ */ Index: usr.sbin/bhyve/block_if.c =================================================================== --- usr.sbin/bhyve/block_if.c +++ usr.sbin/bhyve/block_if.c @@ -32,820 +32,142 @@ __FBSDID("$FreeBSD$"); #include -#ifndef WITHOUT_CAPSICUM -#include -#endif -#include -#include -#include -#include -#include - -#include -#ifndef WITHOUT_CAPSICUM -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include -#include +#include -#include "bhyverun.h" -#include "debug.h" -#include "mevent.h" #include "block_if.h" -#define BLOCKIF_SIG 0xb109b109 +SET_DECLARE(block_backend_set, block_backend_t); -#define BLOCKIF_NUMTHR 8 -#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) - -enum blockop { - BOP_READ, - BOP_WRITE, - BOP_FLUSH, - BOP_DELETE -}; - -enum blockstat { - BST_FREE, - BST_BLOCK, - BST_PEND, - BST_BUSY, - BST_DONE -}; - -struct blockif_elem { - TAILQ_ENTRY(blockif_elem) be_link; - struct blockif_req *be_req; - enum blockop be_op; - enum blockstat be_status; - pthread_t be_tid; - off_t be_block; -}; - -struct blockif_ctxt { - int bc_magic; - int bc_fd; - int bc_ischr; - int bc_isgeom; - int bc_candelete; - int bc_rdonly; - off_t bc_size; - int bc_sectsz; - int bc_psectsz; - int bc_psectoff; - int bc_closing; - pthread_t bc_btid[BLOCKIF_NUMTHR]; - pthread_mutex_t bc_mtx; - pthread_cond_t bc_cond; - - /* Request elements and free/pending/busy queues */ - TAILQ_HEAD(, blockif_elem) bc_freeq; - TAILQ_HEAD(, blockif_elem) bc_pendq; - TAILQ_HEAD(, blockif_elem) bc_busyq; - struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; -}; - -static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; - -struct blockif_sig_elem { - pthread_mutex_t bse_mtx; - pthread_cond_t bse_cond; - int bse_pending; - struct blockif_sig_elem *bse_next; -}; - -static struct blockif_sig_elem *blockif_bse_head; - -static int -blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, - enum blockop op) -{ - struct blockif_elem *be, *tbe; - off_t off; - int i; - - be = TAILQ_FIRST(&bc->bc_freeq); - assert(be != NULL); - assert(be->be_status == BST_FREE); - TAILQ_REMOVE(&bc->bc_freeq, be, be_link); - be->be_req = breq; - be->be_op = op; - switch (op) { - case BOP_READ: - case BOP_WRITE: - case BOP_DELETE: - off = breq->br_offset; - for (i = 0; i < breq->br_iovcnt; i++) - off += breq->br_iov[i].iov_len; - break; - default: - off = OFF_MAX; - } - be->be_block = off; - TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { - if (tbe->be_block == breq->br_offset) - break; - } - if (tbe == NULL) { - TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { - if (tbe->be_block == breq->br_offset) - break; - } - } - if (tbe == NULL) - be->be_status = BST_PEND; - else - be->be_status = BST_BLOCK; - TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); - return (be->be_status == BST_PEND); -} - -static int -blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) -{ - struct blockif_elem *be; - - TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { - if (be->be_status == BST_PEND) - break; - assert(be->be_status == BST_BLOCK); - } - if (be == NULL) - return (0); - TAILQ_REMOVE(&bc->bc_pendq, be, be_link); - be->be_status = BST_BUSY; - be->be_tid = t; - TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); - *bep = be; - return (1); -} - -static void -blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) -{ - struct blockif_elem *tbe; - - if (be->be_status == BST_DONE || be->be_status == BST_BUSY) - TAILQ_REMOVE(&bc->bc_busyq, be, be_link); - else - TAILQ_REMOVE(&bc->bc_pendq, be, be_link); - TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { - if (tbe->be_req->br_offset == be->be_block) - tbe->be_status = BST_PEND; - } - be->be_tid = 0; - be->be_status = BST_FREE; - be->be_req = NULL; - TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); -} - -static void -blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) -{ - struct blockif_req *br; - off_t arg[2]; - ssize_t clen, len, off, boff, voff; - int i, err; - - br = be->be_req; - if (br->br_iovcnt <= 1) - buf = NULL; - err = 0; - switch (be->be_op) { - case BOP_READ: - if (buf == NULL) { - if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, - br->br_offset)) < 0) - err = errno; - else - br->br_resid -= len; - break; - } - i = 0; - off = voff = 0; - while (br->br_resid > 0) { - len = MIN(br->br_resid, MAXPHYS); - if (pread(bc->bc_fd, buf, len, br->br_offset + - off) < 0) { - err = errno; - break; - } - boff = 0; - do { - clen = MIN(len - boff, br->br_iov[i].iov_len - - voff); - memcpy(br->br_iov[i].iov_base + voff, - buf + boff, clen); - if (clen < br->br_iov[i].iov_len - voff) - voff += clen; - else { - i++; - voff = 0; - } - boff += clen; - } while (boff < len); - off += len; - br->br_resid -= len; - } - break; - case BOP_WRITE: - if (bc->bc_rdonly) { - err = EROFS; - break; - } - if (buf == NULL) { - if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, - br->br_offset)) < 0) - err = errno; - else - br->br_resid -= len; - break; - } - i = 0; - off = voff = 0; - while (br->br_resid > 0) { - len = MIN(br->br_resid, MAXPHYS); - boff = 0; - do { - clen = MIN(len - boff, br->br_iov[i].iov_len - - voff); - memcpy(buf + boff, - br->br_iov[i].iov_base + voff, clen); - if (clen < br->br_iov[i].iov_len - voff) - voff += clen; - else { - i++; - voff = 0; - } - boff += clen; - } while (boff < len); - if (pwrite(bc->bc_fd, buf, len, br->br_offset + - off) < 0) { - err = errno; - break; - } - off += len; - br->br_resid -= len; - } - break; - case BOP_FLUSH: - if (bc->bc_ischr) { - if (ioctl(bc->bc_fd, DIOCGFLUSH)) - err = errno; - } else if (fsync(bc->bc_fd)) - err = errno; - break; - case BOP_DELETE: - if (!bc->bc_candelete) - err = EOPNOTSUPP; - else if (bc->bc_rdonly) - err = EROFS; - else if (bc->bc_ischr) { - arg[0] = br->br_offset; - arg[1] = br->br_resid; - if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) - err = errno; - else - br->br_resid = 0; - } else - err = EOPNOTSUPP; - break; - default: - err = EINVAL; - break; - } - - be->be_status = BST_DONE; - - (*br->br_callback)(br, err); -} - -static void * -blockif_thr(void *arg) -{ - struct blockif_ctxt *bc; - struct blockif_elem *be; - pthread_t t; - uint8_t *buf; - - bc = arg; - if (bc->bc_isgeom) - buf = malloc(MAXPHYS); - else - buf = NULL; - t = pthread_self(); - - pthread_mutex_lock(&bc->bc_mtx); - for (;;) { - while (blockif_dequeue(bc, t, &be)) { - pthread_mutex_unlock(&bc->bc_mtx); - blockif_proc(bc, be, buf); - pthread_mutex_lock(&bc->bc_mtx); - blockif_complete(bc, be); - } - /* Check ctxt status here to see if exit requested */ - if (bc->bc_closing) - break; - pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); - } - pthread_mutex_unlock(&bc->bc_mtx); - - if (buf) - free(buf); - pthread_exit(NULL); - return (NULL); -} - -static void -blockif_sigcont_handler(int signal, enum ev_type type, void *arg) -{ - struct blockif_sig_elem *bse; - - for (;;) { - /* - * Process the entire list even if not intended for - * this thread. - */ - do { - bse = blockif_bse_head; - if (bse == NULL) - return; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, - (uintptr_t)bse, - (uintptr_t)bse->bse_next)); - - pthread_mutex_lock(&bse->bse_mtx); - bse->bse_pending = 0; - pthread_cond_signal(&bse->bse_cond); - pthread_mutex_unlock(&bse->bse_mtx); - } -} - -static void -blockif_init(void) -{ - mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); - (void) signal(SIGCONT, SIG_IGN); -} - -struct blockif_ctxt * +blockif_ctxt_t * blockif_open(const char *optstr, const char *ident) { - char tname[MAXCOMLEN + 1]; - char name[MAXPATHLEN]; - char *nopt, *xopts, *cp; - struct blockif_ctxt *bc; - struct stat sbuf; - struct diocgattr_arg arg; - off_t size, psectsz, psectoff; - int extra, fd, i, sectsz; - int nocache, sync, ro, candelete, geom, ssopt, pssopt; -#ifndef WITHOUT_CAPSICUM - cap_rights_t rights; - cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; -#endif + block_backend_t **bbe = NULL; + blockif_ctxt_t *ret = NULL; - pthread_once(&blockif_once, blockif_init); - - fd = -1; - ssopt = 0; - nocache = 0; - sync = 0; - ro = 0; - /* - * The first element in the optstring is always a pathname. - * Optional elements follow + * Find the block device backend that matches the user-provided + * device name. block_backend_set is built using a linker set. */ - nopt = xopts = strdup(optstr); - while (xopts != NULL) { - cp = strsep(&xopts, ","); - if (cp == nopt) /* file or device pathname */ - continue; - else if (!strcmp(cp, "nocache")) - nocache = 1; - else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) - sync = 1; - else if (!strcmp(cp, "ro")) - ro = 1; - else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) - ; - else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) - pssopt = ssopt; - else { - EPRINTLN("Invalid device option \"%s\"", cp); - goto err; - } - } - - extra = 0; - if (nocache) - extra |= O_DIRECT; - if (sync) - extra |= O_SYNC; - - fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); - if (fd < 0 && !ro) { - /* Attempt a r/w fail with a r/o open */ - fd = open(nopt, O_RDONLY | extra); - ro = 1; - } - - if (fd < 0) { - warn("Could not open backing file: %s", nopt); - goto err; - } - - if (fstat(fd, &sbuf) < 0) { - warn("Could not stat backing file %s", nopt); - goto err; - } - -#ifndef WITHOUT_CAPSICUM - cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, - CAP_WRITE); - if (ro) - cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); - - if (caph_rights_limit(fd, &rights) == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - - /* - * Deal with raw devices - */ - size = sbuf.st_size; - sectsz = DEV_BSIZE; - psectsz = psectoff = 0; - candelete = geom = 0; - if (S_ISCHR(sbuf.st_mode)) { - if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || - ioctl(fd, DIOCGSECTORSIZE, §sz)) { - perror("Could not fetch dev blk/sector size"); - goto err; - } - assert(size != 0); - assert(sectsz != 0); - if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) - ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); - strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); - arg.len = sizeof(arg.value.i); - if (ioctl(fd, DIOCGATTR, &arg) == 0) - candelete = arg.value.i; - if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) - geom = 1; - } else - psectsz = sbuf.st_blksize; - -#ifndef WITHOUT_CAPSICUM - if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); -#endif - - if (ssopt != 0) { - if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || - ssopt > pssopt) { - EPRINTLN("Invalid sector size %d/%d", - ssopt, pssopt); - goto err; - } - + SET_FOREACH(bbe, block_backend_set) { /* - * Some backend drivers (e.g. cd0, ada0) require that the I/O - * size be a multiple of the device's sector size. - * - * Validate that the emulated sector size complies with this - * requirement. + * How do we find the appropriate open for each backend? + * We iterate over all block*_open() functions registered until one + * returns true as an indication that it accepts the give descriptor + * in opstr */ - if (S_ISCHR(sbuf.st_mode)) { - if (ssopt < sectsz || (ssopt % sectsz) != 0) { - EPRINTLN("Sector size %d incompatible " - "with underlying device sector size %d", - ssopt, sectsz); - goto err; - } + /* + * Local access has a pattern like: + * 3:0,virtio-blk,file/somewhere/guest.img + * 3:0,virtio-blk,/dev/xxxx + * Or new style: + * 3:0,virtio-blk,file:file/somewhere/guest.img + * 3:0,virtio-blk,file:/dev/xxxxx + * Requesting a RADOS block device should look something like: + * 3:0,virtio-blk,rbd:pool/image,option_1,option_2=val,.... + * For local filesystem references in optstr shall exist. + * This is handled by the blk-local backend. + * If this does not match then other backends in the block_backend_set have + * their bb_open() called. The first one returning a non-NULL backend pointer + * is a match and is used with the specification in optstr + */ + if (strstr(optstr, (*bbe)->bb_scheme) != NULL) { + ret = (*bbe)->bb_open(optstr, ident); + /* fill in the backend that is used to open this request */ + ret->be = *bbe; + return (ret); } - - sectsz = ssopt; - psectsz = pssopt; - psectoff = 0; + } - - bc = calloc(1, sizeof(struct blockif_ctxt)); - if (bc == NULL) { - perror("calloc"); - goto err; - } - - bc->bc_magic = BLOCKIF_SIG; - bc->bc_fd = fd; - bc->bc_ischr = S_ISCHR(sbuf.st_mode); - bc->bc_isgeom = geom; - bc->bc_candelete = candelete; - bc->bc_rdonly = ro; - bc->bc_size = size; - bc->bc_sectsz = sectsz; - bc->bc_psectsz = psectsz; - bc->bc_psectoff = psectoff; - pthread_mutex_init(&bc->bc_mtx, NULL); - pthread_cond_init(&bc->bc_cond, NULL); - TAILQ_INIT(&bc->bc_freeq); - TAILQ_INIT(&bc->bc_pendq); - TAILQ_INIT(&bc->bc_busyq); - for (i = 0; i < BLOCKIF_MAXREQ; i++) { - bc->bc_reqs[i].be_status = BST_FREE; - TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); - } - - for (i = 0; i < BLOCKIF_NUMTHR; i++) { - pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); - snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); - pthread_set_name_np(bc->bc_btid[i], tname); - } - - return (bc); -err: - if (fd >= 0) - close(fd); - free(nopt); + /* final attempt, try the legacy style local reference */ + if (ret == NULL) { + char newoptstr[MAXPATHLEN] = "file:"; + strcat(newoptstr, optstr); + if (( ret = (*bbe)->bb_open(newoptstr, ident)) != NULL) { + /* fill in the backend that is used to open this request */ + ret->be = *bbe; + return (ret); + } + } return (NULL); } -static int -blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, - enum blockop op) -{ - int err; - - err = 0; - - pthread_mutex_lock(&bc->bc_mtx); - if (!TAILQ_EMPTY(&bc->bc_freeq)) { - /* - * Enqueue and inform the block i/o thread - * that there is work available - */ - if (blockif_enqueue(bc, breq, op)) - pthread_cond_signal(&bc->bc_cond); - } else { - /* - * Callers are not allowed to enqueue more than - * the specified blockif queue limit. Return an - * error to indicate that the queue length has been - * exceeded. - */ - err = E2BIG; - } - pthread_mutex_unlock(&bc->bc_mtx); - - return (err); -} - int -blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) +blockif_read(blockif_ctxt_t *bc, blockif_req_t *breq) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_READ)); + return ((bc->be)->bb_read(bc, breq)); } int -blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) +blockif_write(blockif_ctxt_t *bc, blockif_req_t *breq) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_WRITE)); + return ((bc->be)->bb_write(bc, breq)); } int -blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) +blockif_flush(blockif_ctxt_t *bc, blockif_req_t *breq) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_FLUSH)); + return ((bc->be)->bb_flush(bc, breq)); } int -blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) +blockif_delete(blockif_ctxt_t *bc, blockif_req_t *breq) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_DELETE)); + return ((bc->be)->bb_delete(bc, breq)); } int -blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) +blockif_cancel(blockif_ctxt_t *bc, blockif_req_t *breq) { - struct blockif_elem *be; - - assert(bc->bc_magic == BLOCKIF_SIG); - - pthread_mutex_lock(&bc->bc_mtx); - /* - * Check pending requests. - */ - TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { - if (be->be_req == breq) - break; - } - if (be != NULL) { - /* - * Found it. - */ - blockif_complete(bc, be); - pthread_mutex_unlock(&bc->bc_mtx); - - return (0); - } - - /* - * Check in-flight requests. - */ - TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { - if (be->be_req == breq) - break; - } - if (be == NULL) { - /* - * Didn't find it. - */ - pthread_mutex_unlock(&bc->bc_mtx); - return (EINVAL); - } - - /* - * Interrupt the processing thread to force it return - * prematurely via it's normal callback path. - */ - while (be->be_status == BST_BUSY) { - struct blockif_sig_elem bse, *old_head; - - pthread_mutex_init(&bse.bse_mtx, NULL); - pthread_cond_init(&bse.bse_cond, NULL); - - bse.bse_pending = 1; - - do { - old_head = blockif_bse_head; - bse.bse_next = old_head; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, - (uintptr_t)old_head, - (uintptr_t)&bse)); - - pthread_kill(be->be_tid, SIGCONT); - - pthread_mutex_lock(&bse.bse_mtx); - while (bse.bse_pending) - pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); - pthread_mutex_unlock(&bse.bse_mtx); - } - - pthread_mutex_unlock(&bc->bc_mtx); - - /* - * The processing thread has been interrupted. Since it's not - * clear if the callback has been invoked yet, return EBUSY. - */ - return (EBUSY); + return ((bc->be)->bb_cancel(bc, breq)); } int -blockif_close(struct blockif_ctxt *bc) +blockif_close(blockif_ctxt_t *bc) { - void *jval; - int i; - - assert(bc->bc_magic == BLOCKIF_SIG); - - /* - * Stop the block i/o thread - */ - pthread_mutex_lock(&bc->bc_mtx); - bc->bc_closing = 1; - pthread_mutex_unlock(&bc->bc_mtx); - pthread_cond_broadcast(&bc->bc_cond); - for (i = 0; i < BLOCKIF_NUMTHR; i++) - pthread_join(bc->bc_btid[i], &jval); - - /* XXX Cancel queued i/o's ??? */ - - /* - * Release resources - */ - bc->bc_magic = 0; - close(bc->bc_fd); - free(bc); - - return (0); + return ((bc->be)->bb_close(bc)); } -/* - * Return virtual C/H/S values for a given block. Use the algorithm - * outlined in the VHD specification to calculate values. - */ void -blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) +blockif_chs(blockif_ctxt_t *bc, uint16_t *c, uint8_t *h, uint8_t *s) { - off_t sectors; /* total sectors of the block dev */ - off_t hcyl; /* cylinders times heads */ - uint16_t secpt; /* sectors per track */ - uint8_t heads; - - assert(bc->bc_magic == BLOCKIF_SIG); - - sectors = bc->bc_size / bc->bc_sectsz; - - /* Clamp the size to the largest possible with CHS */ - if (sectors > 65535UL*16*255) - sectors = 65535UL*16*255; - - if (sectors >= 65536UL*16*63) { - secpt = 255; - heads = 16; - hcyl = sectors / secpt; - } else { - secpt = 17; - hcyl = sectors / secpt; - heads = (hcyl + 1023) / 1024; - - if (heads < 4) - heads = 4; - - if (hcyl >= (heads * 1024) || heads > 16) { - secpt = 31; - heads = 16; - hcyl = sectors / secpt; - } - if (hcyl >= (heads * 1024)) { - secpt = 63; - heads = 16; - hcyl = sectors / secpt; - } - } - - *c = hcyl / heads; - *h = heads; - *s = secpt; + (bc->be)->bb_chs(bc, c, h, s); } -/* - * Accessors - */ off_t -blockif_size(struct blockif_ctxt *bc) +blockif_size(blockif_ctxt_t *bc) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_size); + return ((bc->be)->bb_size(bc)); } int -blockif_sectsz(struct blockif_ctxt *bc) +blockif_sectsz(blockif_ctxt_t *bc) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_sectsz); + return ((bc->be)->bb_sectsz(bc)); } void -blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) +blockif_psectsz(blockif_ctxt_t *bc, int *size, int *off) { - - assert(bc->bc_magic == BLOCKIF_SIG); - *size = bc->bc_psectsz; - *off = bc->bc_psectoff; + (bc->be)->bb_psectsz(bc, size, off); } int -blockif_queuesz(struct blockif_ctxt *bc) +blockif_queuesz(blockif_ctxt_t *bc) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (BLOCKIF_MAXREQ - 1); + return ((bc->be)->bb_queuesz(bc)); } int -blockif_is_ro(struct blockif_ctxt *bc) +blockif_is_ro(blockif_ctxt_t *bc) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_rdonly); + return ((bc->be)->bb_is_ro(bc)); } int -blockif_candelete(struct blockif_ctxt *bc) +blockif_candelete(blockif_ctxt_t *bc) { - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_candelete); + return ((bc->be)->bb_candelete(bc)); } Index: usr.sbin/bhyve/block_local.c =================================================================== --- usr.sbin/bhyve/block_local.c +++ usr.sbin/bhyve/block_local.c @@ -25,17 +25,16 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: head/usr.sbin/bhyve/block_if.c 356523 2020-01-08 22:55:22Z vmaffione $ + * $FreeBSD: head/usr.sbin/bhyve/block_local.c 356523 2020-01-08 22:55:22Z vmaffione $ */ #include -__FBSDID("$FreeBSD: head/usr.sbin/bhyve/block_if.c 356523 2020-01-08 22:55:22Z vmaffione $"); +__FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif -#include #include #include #include @@ -63,71 +62,21 @@ #include "mevent.h" #include "block_if.h" -#define BLOCKIF_SIG 0xb109b109 +#include -#define BLOCKIF_NUMTHR 8 -#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) +static pthread_once_t blocklocal_once = PTHREAD_ONCE_INIT; -enum blockop { - BOP_READ, - BOP_WRITE, - BOP_FLUSH, - BOP_DELETE -}; - -enum blockstat { - BST_FREE, - BST_BLOCK, - BST_PEND, - BST_BUSY, - BST_DONE -}; - -struct blockif_elem { - TAILQ_ENTRY(blockif_elem) be_link; - struct blockif_req *be_req; - enum blockop be_op; - enum blockstat be_status; - pthread_t be_tid; - off_t be_block; -}; - -struct blockif_ctxt { - int bc_magic; - int bc_fd; - int bc_ischr; - int bc_isgeom; - int bc_candelete; - int bc_rdonly; - off_t bc_size; - int bc_sectsz; - int bc_psectsz; - int bc_psectoff; - int bc_closing; - pthread_t bc_btid[BLOCKIF_NUMTHR]; - pthread_mutex_t bc_mtx; - pthread_cond_t bc_cond; - - /* Request elements and free/pending/busy queues */ - TAILQ_HEAD(, blockif_elem) bc_freeq; - TAILQ_HEAD(, blockif_elem) bc_pendq; - TAILQ_HEAD(, blockif_elem) bc_busyq; - struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; -}; - -static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; - -struct blockif_sig_elem { +struct blocklocal_sig_elem { pthread_mutex_t bse_mtx; pthread_cond_t bse_cond; int bse_pending; - struct blockif_sig_elem *bse_next; + struct blocklocal_sig_elem *bse_next; }; -static struct blockif_sig_elem *blockif_bse_head; +static struct blocklocal_sig_elem *blocklocal_bse_head; static int -blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, +blocklocal_enqueue(blockif_ctxt_t *bc, blockif_req_t *breq, enum blockop op) { struct blockif_elem *be, *tbe; @@ -171,7 +120,7 @@ } static int -blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) +blocklocal_dequeue(blockif_ctxt_t *bc, pthread_t t, struct blockif_elem **bep) { struct blockif_elem *be; @@ -191,7 +140,7 @@ } static void -blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) +blocklocal_complete(blockif_ctxt_t *bc, struct blockif_elem *be) { struct blockif_elem *tbe; @@ -210,9 +159,9 @@ } static void -blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) +blocklocal_proc(blockif_ctxt_t *bc, struct blockif_elem *be, uint8_t *buf) { - struct blockif_req *br; + blockif_req_t *br; off_t arg[2]; ssize_t clen, len, off, boff, voff; int i, err; @@ -331,9 +280,9 @@ } static void * -blockif_thr(void *arg) +blocklocal_thr(void *arg) { - struct blockif_ctxt *bc; + blockif_ctxt_t *bc; struct blockif_elem *be; pthread_t t; uint8_t *buf; @@ -347,11 +296,11 @@ pthread_mutex_lock(&bc->bc_mtx); for (;;) { - while (blockif_dequeue(bc, t, &be)) { + while (blocklocal_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); - blockif_proc(bc, be, buf); + blocklocal_proc(bc, be, buf); pthread_mutex_lock(&bc->bc_mtx); - blockif_complete(bc, be); + blocklocal_complete(bc, be); } /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) @@ -367,9 +316,9 @@ } static void -blockif_sigcont_handler(int signal, enum ev_type type, void *arg) +blocklocal_sigcont_handler(int signal, enum ev_type type, void *arg) { - struct blockif_sig_elem *bse; + struct blocklocal_sig_elem *bse; for (;;) { /* @@ -377,10 +326,10 @@ * this thread. */ do { - bse = blockif_bse_head; + bse = blocklocal_bse_head; if (bse == NULL) return; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, + } while (!atomic_cmpset_ptr((uintptr_t *)&blocklocal_bse_head, (uintptr_t)bse, (uintptr_t)bse->bse_next)); @@ -391,20 +340,29 @@ } } -static void -blockif_init(void) +void +blocklocal_init(void) { - mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); + mevent_add(SIGCONT, EVF_SIGNAL, blocklocal_sigcont_handler, NULL); (void) signal(SIGCONT, SIG_IGN); } -struct blockif_ctxt * -blockif_open(const char *optstr, const char *ident) +void +blocklocal_cleanup(blockif_ctxt_t *bc) +{ /* empty block + * currently no cleanup required. + */ +} + + + +blockif_ctxt_t * +blocklocal_open(const char *optstr, const char *ident) { char tname[MAXCOMLEN + 1]; char name[MAXPATHLEN]; char *nopt, *xopts, *cp; - struct blockif_ctxt *bc; + blockif_ctxt_t *bc; struct stat sbuf; struct diocgattr_arg arg; off_t size, psectsz, psectoff; @@ -415,7 +373,7 @@ cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; #endif - pthread_once(&blockif_once, blockif_init); + pthread_once(&blocklocal_once, blocklocal_init); fd = -1; ssopt = 0; @@ -541,7 +499,7 @@ psectoff = 0; } - bc = calloc(1, sizeof(struct blockif_ctxt)); + bc = calloc(1, sizeof(blockif_ctxt_t)); if (bc == NULL) { perror("calloc"); goto err; @@ -568,7 +526,7 @@ } for (i = 0; i < BLOCKIF_NUMTHR; i++) { - pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); + pthread_create(&bc->bc_btid[i], NULL, blocklocal_thr, bc); snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); pthread_set_name_np(bc->bc_btid[i], tname); } @@ -582,7 +540,7 @@ } static int -blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, +blockif_request(blockif_ctxt_t *bc, blockif_req_t *breq, enum blockop op) { int err; @@ -595,7 +553,7 @@ * Enqueue and inform the block i/o thread * that there is work available */ - if (blockif_enqueue(bc, breq, op)) + if (blocklocal_enqueue(bc, breq, op)) pthread_cond_signal(&bc->bc_cond); } else { /* @@ -612,7 +570,7 @@ } int -blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) +blocklocal_read(blockif_ctxt_t *bc, blockif_req_t *breq) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -620,7 +578,7 @@ } int -blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) +blocklocal_write(blockif_ctxt_t *bc, blockif_req_t *breq) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -628,7 +586,7 @@ } int -blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) +blocklocal_flush(blockif_ctxt_t *bc, blockif_req_t *breq) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -636,7 +594,7 @@ } int -blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) +blocklocal_delete(blockif_ctxt_t *bc, blockif_req_t *breq) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -644,7 +602,7 @@ } int -blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) +blocklocal_cancel(blockif_ctxt_t *bc, blockif_req_t *breq) { struct blockif_elem *be; @@ -662,7 +620,7 @@ /* * Found it. */ - blockif_complete(bc, be); + blocklocal_complete(bc, be); pthread_mutex_unlock(&bc->bc_mtx); return (0); @@ -688,7 +646,7 @@ * prematurely via it's normal callback path. */ while (be->be_status == BST_BUSY) { - struct blockif_sig_elem bse, *old_head; + struct blocklocal_sig_elem bse, *old_head; pthread_mutex_init(&bse.bse_mtx, NULL); pthread_cond_init(&bse.bse_cond, NULL); @@ -696,9 +654,9 @@ bse.bse_pending = 1; do { - old_head = blockif_bse_head; + old_head = blocklocal_bse_head; bse.bse_next = old_head; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, + } while (!atomic_cmpset_ptr((uintptr_t *)&blocklocal_bse_head, (uintptr_t)old_head, (uintptr_t)&bse)); @@ -720,7 +678,7 @@ } int -blockif_close(struct blockif_ctxt *bc) +blocklocal_close(blockif_ctxt_t *bc) { void *jval; int i; @@ -754,7 +712,7 @@ * outlined in the VHD specification to calculate values. */ void -blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) +blocklocal_chs(blockif_ctxt_t *bc, uint16_t *c, uint8_t *h, uint8_t *s) { off_t sectors; /* total sectors of the block dev */ off_t hcyl; /* cylinders times heads */ @@ -802,7 +760,7 @@ * Accessors */ off_t -blockif_size(struct blockif_ctxt *bc) +blocklocal_size(blockif_ctxt_t *bc) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -810,7 +768,7 @@ } int -blockif_sectsz(struct blockif_ctxt *bc) +blocklocal_sectsz(blockif_ctxt_t *bc) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -818,7 +776,7 @@ } void -blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) +blocklocal_psectsz(blockif_ctxt_t *bc, int *size, int *off) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -827,7 +785,7 @@ } int -blockif_queuesz(struct blockif_ctxt *bc) +blocklocal_queuesz(blockif_ctxt_t *bc) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -835,7 +793,7 @@ } int -blockif_is_ro(struct blockif_ctxt *bc) +blocklocal_is_ro(blockif_ctxt_t *bc) { assert(bc->bc_magic == BLOCKIF_SIG); @@ -843,9 +801,32 @@ } int -blockif_candelete(struct blockif_ctxt *bc) +blocklocal_candelete(blockif_ctxt_t *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_candelete); } + +static +block_backend_t blocklocal_backend = { + .bb_prefix = "blk-local", + .bb_scheme = "file:", + .bb_init = blocklocal_init, + .bb_cleanup = blocklocal_cleanup, + .bb_open = blocklocal_open, + .bb_size = blocklocal_size, + .bb_chs = blocklocal_chs, + .bb_sectsz = blocklocal_sectsz, + .bb_psectsz = blocklocal_psectsz, + .bb_queuesz = blocklocal_queuesz, + .bb_is_ro = blocklocal_is_ro, + .bb_candelete = blocklocal_candelete, + .bb_read = blocklocal_read, + .bb_write = blocklocal_write, + .bb_flush = blocklocal_flush, + .bb_delete = blocklocal_delete, + .bb_cancel = blocklocal_cancel, + .bb_close = blocklocal_close, +}; +DATA_SET(block_backend_set, blocklocal_backend);