Page MenuHomeFreeBSD

D23010.id67203.diff
No OneTemporary

D23010.id67203.diff

Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -19,6 +19,7 @@
audio.c \
bhyvegc.c \
bhyverun.c \
+ block_backends.c \
block_if.c \
bootrom.c \
console.c \
Index: usr.sbin/bhyve/block_backends.h
===================================================================
--- usr.sbin/bhyve/block_backends.h
+++ usr.sbin/bhyve/block_backends.h
@@ -25,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/usr.sbin/bhyve/block_if.h 347033 2019-05-02 22:46:37Z jhb $
+ * $FreeBSD$
*/
/*
@@ -35,12 +35,16 @@
* another thread.
*/
-#ifndef _BLOCK_IF_H_
-#define _BLOCK_IF_H_
+#ifndef _BLOCK_BACKENDS_H_
+#define _BLOCK_BACKENDS_H_
+#include <sys/queue.h>
#include <sys/uio.h>
#include <sys/unistd.h>
+/* Opaque type representing a block device backend. */
+typedef struct block_backend block_backend_t;
+
/*
* BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in
* a single request. BLOCKIF_RING_MAX is the maxmimum number of
@@ -49,6 +53,26 @@
#define BLOCKIF_IOV_MAX 128 /* not practical to be IOV_MAX */
#define BLOCKIF_RING_MAX 128
+#define BLOCKIF_SIG 0xb109b109
+
+#define BLOCKIF_NUMTHR 8
+#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR)
+
+enum blockop {
+ BOP_READ,
+ BOP_WRITE,
+ BOP_FLUSH,
+ BOP_DELETE
+};
+
+enum blockstat {
+ BST_FREE,
+ BST_BLOCK,
+ BST_PEND,
+ BST_BUSY,
+ BST_DONE
+};
+
struct blockif_req {
int br_iovcnt;
off_t br_offset;
@@ -58,21 +82,78 @@
struct iovec br_iov[BLOCKIF_IOV_MAX];
};
-struct blockif_ctxt;
-struct blockif_ctxt *blockif_open(const char *optstr, const char *ident);
-off_t blockif_size(struct blockif_ctxt *bc);
-void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h,
- uint8_t *s);
-int blockif_sectsz(struct blockif_ctxt *bc);
-void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off);
-int blockif_queuesz(struct blockif_ctxt *bc);
-int blockif_is_ro(struct blockif_ctxt *bc);
-int blockif_candelete(struct blockif_ctxt *bc);
-int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_close(struct blockif_ctxt *bc);
+/* Interface between block device frontends and backends. */
+int blockbe_open(block_backend_t **ret, const char *optstr,
+ const char *pci_ident);
+off_t blockbe_size(block_backend_t *be);
+void blockbe_chs(block_backend_t *be, uint16_t *c, uint8_t *h,
+ uint8_t *s);
+int blockbe_sectsz(block_backend_t *be);
+void blockbe_psectsz(block_backend_t *be, int *size, int *off);
+int blockbe_queuesz(block_backend_t *be);
+int blockbe_is_ro(block_backend_t *be);
+int blockbe_candelete(block_backend_t *be);
+int blockbe_read(block_backend_t *be, struct blockif_req *breq);
+int blockbe_write(block_backend_t *be, struct blockif_req *breq);
+int blockbe_flush(block_backend_t *be, struct blockif_req *breq);
+int blockbe_delete(block_backend_t *be, struct blockif_req *breq);
+int blockbe_cancel(block_backend_t *be, struct blockif_req *breq);
+int blockbe_close(block_backend_t *be);
-#endif /* _BLOCK_IF_H_ */
+/*
+ * Each block device backend registers a set of function pointers that are
+ * used to implement the net backends API.
+ */
+struct block_backend {
+ const char *prefix; /* prefix matching this backend */
+
+ /*
+ * Routines used to initialize and cleanup the resources needed
+ * by a backend. The cleanup function is used internally,
+ * and should not be called by the frontend.
+ */
+ void (*init)(void);
+
+ void (*cleanup)(block_backend_t **be);
+
+ int (*open)(block_backend_t **be, const char *optstr,
+ const char *pci_ident, const char *be_ident);
+
+ off_t (*size)(block_backend_t *be);
+
+ void (*chs)(block_backend_t *be, uint16_t *c, uint8_t *h,
+ uint8_t *s);
+
+ int (*sectsz)(block_backend_t *be);
+
+ void (*psectsz)(block_backend_t *be, int *size, int *off);
+
+ int (*queuesz)(block_backend_t *be);
+
+ int (*is_ro)(block_backend_t *be);
+
+ int (*candelete)(block_backend_t *be);
+
+ int (*read)(block_backend_t *be, struct blockif_req *breq);
+
+ int (*write)(block_backend_t *be, struct blockif_req *breq);
+
+ int (*flush)(block_backend_t *be, struct blockif_req *breq);
+
+ int (*delete)(block_backend_t *be, struct blockif_req *breq);
+
+ int (*cancel)(block_backend_t *be, struct blockif_req *breq);
+
+ int (*close)(block_backend_t *be);
+
+ struct pci_vtblk_softc *sc;
+ struct blockif_ctxt *bc;
+
+ /* Size of backend-specific private data. */
+ size_t priv_size;
+
+ /* Room for backend-specific data. */
+ char opaque[0];
+};
+
+#endif /* _BLOCK_BACKENDS_H_ */
Index: usr.sbin/bhyve/block_backends.c
===================================================================
--- usr.sbin/bhyve/block_backends.c
+++ usr.sbin/bhyve/block_backends.c
@@ -58,794 +58,154 @@
#include <machine/atomic.h>
+#include <sys/linker_set.h>
+
#include "bhyverun.h"
#include "debug.h"
#include "mevent.h"
-#include "block_if.h"
+#include "block_backends.h"
-#define BLOCKIF_SIG 0xb109b109
+SET_DECLARE(block_backend_set, block_backend_t);
-#define BLOCKIF_NUMTHR 8
-#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR)
-
-enum blockop {
- BOP_READ,
- BOP_WRITE,
- BOP_FLUSH,
- BOP_DELETE
-};
-
-enum blockstat {
- BST_FREE,
- BST_BLOCK,
- BST_PEND,
- BST_BUSY,
- BST_DONE
-};
-
-struct blockif_elem {
- TAILQ_ENTRY(blockif_elem) be_link;
- struct blockif_req *be_req;
- enum blockop be_op;
- enum blockstat be_status;
- pthread_t be_tid;
- off_t be_block;
-};
-
-struct blockif_ctxt {
- int bc_magic;
- int bc_fd;
- int bc_ischr;
- int bc_isgeom;
- int bc_candelete;
- int bc_rdonly;
- off_t bc_size;
- int bc_sectsz;
- int bc_psectsz;
- int bc_psectoff;
- int bc_closing;
- pthread_t bc_btid[BLOCKIF_NUMTHR];
- pthread_mutex_t bc_mtx;
- pthread_cond_t bc_cond;
-
- /* Request elements and free/pending/busy queues */
- TAILQ_HEAD(, blockif_elem) bc_freeq;
- TAILQ_HEAD(, blockif_elem) bc_pendq;
- TAILQ_HEAD(, blockif_elem) bc_busyq;
- struct blockif_elem bc_reqs[BLOCKIF_MAXREQ];
-};
-
-static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
-
-struct blockif_sig_elem {
- pthread_mutex_t bse_mtx;
- pthread_cond_t bse_cond;
- int bse_pending;
- struct blockif_sig_elem *bse_next;
-};
-
-static struct blockif_sig_elem *blockif_bse_head;
-
-static int
-blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
- enum blockop op)
+/*
+ * Initialize a backend and attach to the frontend.
+ * This is called during frontend initialization.
+ * @pbe is a pointer to the backend to be initialized
+ * @devname is the backend-name as supplied on the command line,
+ * e.g. -s 2:0,frontend-name,backend-name[,other-args]
+ * @cb is the receive callback supplied by the frontend,
+ * and it is invoked in the event loop when a receive
+ * event is generated in the hypervisor,
+ * @param is a pointer to the frontend, and normally used as
+ * the argument for the callback.
+ */
+int
+blockbe_open(block_backend_t **ret, const char *optstr,
+ const char *pci_ident);
{
- struct blockif_elem *be, *tbe;
- off_t off;
- int i;
+ block_backend_t **bbe = NULL;
+ int err;
+ *ret = NULL;
- be = TAILQ_FIRST(&bc->bc_freeq);
- assert(be != NULL);
- assert(be->be_status == BST_FREE);
- TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
- be->be_req = breq;
- be->be_op = op;
- switch (op) {
- case BOP_READ:
- case BOP_WRITE:
- case BOP_DELETE:
- off = breq->br_offset;
- for (i = 0; i < breq->br_iovcnt; i++)
- off += breq->br_iov[i].iov_len;
- break;
- default:
- off = OFF_MAX;
- }
- be->be_block = off;
- TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
- if (tbe->be_block == breq->br_offset)
- break;
- }
- if (tbe == NULL) {
- TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
- if (tbe->be_block == breq->br_offset)
- break;
- }
- }
- if (tbe == NULL)
- be->be_status = BST_PEND;
- else
- be->be_status = BST_BLOCK;
- TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
- return (be->be_status == BST_PEND);
-}
-
-static int
-blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
-{
- struct blockif_elem *be;
-
- TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
- if (be->be_status == BST_PEND)
- break;
- assert(be->be_status == BST_BLOCK);
- }
- if (be == NULL)
- return (0);
- TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
- be->be_status = BST_BUSY;
- be->be_tid = t;
- TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
- *bep = be;
- return (1);
-}
-
-static void
-blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
-{
- struct blockif_elem *tbe;
-
- if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
- TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
- else
- TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
- TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
- if (tbe->be_req->br_offset == be->be_block)
- tbe->be_status = BST_PEND;
- }
- be->be_tid = 0;
- be->be_status = BST_FREE;
- be->be_req = NULL;
- TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
-}
-
-static void
-blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
-{
- struct blockif_req *br;
- off_t arg[2];
- ssize_t clen, len, off, boff, voff;
- int i, err;
-
- br = be->be_req;
- if (br->br_iovcnt <= 1)
- buf = NULL;
- err = 0;
- switch (be->be_op) {
- case BOP_READ:
- if (buf == NULL) {
- if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
- br->br_offset)) < 0)
- err = errno;
- else
- br->br_resid -= len;
- break;
- }
- i = 0;
- off = voff = 0;
- while (br->br_resid > 0) {
- len = MIN(br->br_resid, MAXPHYS);
- if (pread(bc->bc_fd, buf, len, br->br_offset +
- off) < 0) {
- err = errno;
- break;
- }
- boff = 0;
- do {
- clen = MIN(len - boff, br->br_iov[i].iov_len -
- voff);
- memcpy(br->br_iov[i].iov_base + voff,
- buf + boff, clen);
- if (clen < br->br_iov[i].iov_len - voff)
- voff += clen;
- else {
- i++;
- voff = 0;
- }
- boff += clen;
- } while (boff < len);
- off += len;
- br->br_resid -= len;
- }
- break;
- case BOP_WRITE:
- if (bc->bc_rdonly) {
- err = EROFS;
- break;
- }
- if (buf == NULL) {
- if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
- br->br_offset)) < 0)
- err = errno;
- else
- br->br_resid -= len;
- break;
- }
- i = 0;
- off = voff = 0;
- while (br->br_resid > 0) {
- len = MIN(br->br_resid, MAXPHYS);
- boff = 0;
- do {
- clen = MIN(len - boff, br->br_iov[i].iov_len -
- voff);
- memcpy(buf + boff,
- br->br_iov[i].iov_base + voff, clen);
- if (clen < br->br_iov[i].iov_len - voff)
- voff += clen;
- else {
- i++;
- voff = 0;
- }
- boff += clen;
- } while (boff < len);
- if (pwrite(bc->bc_fd, buf, len, br->br_offset +
- off) < 0) {
- err = errno;
- break;
- }
- off += len;
- br->br_resid -= len;
- }
- break;
- case BOP_FLUSH:
- if (bc->bc_ischr) {
- if (ioctl(bc->bc_fd, DIOCGFLUSH))
- err = errno;
- } else if (fsync(bc->bc_fd))
- err = errno;
- break;
- case BOP_DELETE:
- if (!bc->bc_candelete)
- err = EOPNOTSUPP;
- else if (bc->bc_rdonly)
- err = EROFS;
- else if (bc->bc_ischr) {
- arg[0] = br->br_offset;
- arg[1] = br->br_resid;
- if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
- err = errno;
- else
- br->br_resid = 0;
- } else
- err = EOPNOTSUPP;
- break;
- default:
- err = EINVAL;
- break;
- }
-
- be->be_status = BST_DONE;
-
- (*br->br_callback)(br, err);
-}
-
-static void *
-blockif_thr(void *arg)
-{
- struct blockif_ctxt *bc;
- struct blockif_elem *be;
- pthread_t t;
- uint8_t *buf;
-
- bc = arg;
- if (bc->bc_isgeom)
- buf = malloc(MAXPHYS);
- else
- buf = NULL;
- t = pthread_self();
-
- pthread_mutex_lock(&bc->bc_mtx);
- for (;;) {
- while (blockif_dequeue(bc, t, &be)) {
- pthread_mutex_unlock(&bc->bc_mtx);
- blockif_proc(bc, be, buf);
- pthread_mutex_lock(&bc->bc_mtx);
- blockif_complete(bc, be);
- }
- /* Check ctxt status here to see if exit requested */
- if (bc->bc_closing)
- break;
- pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
- }
- pthread_mutex_unlock(&bc->bc_mtx);
-
- if (buf)
- free(buf);
- pthread_exit(NULL);
- return (NULL);
-}
-
-static void
-blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
-{
- struct blockif_sig_elem *bse;
-
- for (;;) {
- /*
- * Process the entire list even if not intended for
- * this thread.
- */
- do {
- bse = blockif_bse_head;
- if (bse == NULL)
- return;
- } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
- (uintptr_t)bse,
- (uintptr_t)bse->bse_next));
-
- pthread_mutex_lock(&bse->bse_mtx);
- bse->bse_pending = 0;
- pthread_cond_signal(&bse->bse_cond);
- pthread_mutex_unlock(&bse->bse_mtx);
- }
-}
-
-static void
-blockif_init(void)
-{
- mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
- (void) signal(SIGCONT, SIG_IGN);
-}
-
-struct blockif_ctxt *
-blockif_open(const char *optstr, const char *ident)
-{
- char tname[MAXCOMLEN + 1];
- char name[MAXPATHLEN];
- char *nopt, *xopts, *cp;
- struct blockif_ctxt *bc;
- struct stat sbuf;
- struct diocgattr_arg arg;
- off_t size, psectsz, psectoff;
- int extra, fd, i, sectsz;
- int nocache, sync, ro, candelete, geom, ssopt, pssopt;
-#ifndef WITHOUT_CAPSICUM
- cap_rights_t rights;
- cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE };
-#endif
-
- pthread_once(&blockif_once, blockif_init);
-
- fd = -1;
- ssopt = 0;
- nocache = 0;
- sync = 0;
- ro = 0;
-
+ /*
+ * Find the block device backend that matches the user-provided
+ * device name. block_backend_set is built using a linker set.
+ */
+ SET_FOREACH(bbe, block_backend_set) {
+
+ /*
+ * How do we find the appropriate open for each backend?
+ * We should itterate over all block_open() functions until one
+ * returns true as an indication that it accepts the give descriptor
+ * in opstr
+ */
/*
- * The first element in the optstring is always a pathname.
- * Optional elements follow
+ * Local access has a pattern like:
+ * 3:0,virtio-blk,file/somewhere/guest.img
+ * 3:0,virtio-blk,/dev/xxxx
+ * The file part should be in optstr so we check if optstr exists as a file
+ * in which case it would be access to the local filesystem.
+ * This is handled by the blk-local backend.
+ * If this does not match then other backends in the block_backend_set have
+ * their open() called. The first one returning 0 is a match and is used
+ * with the specification in optstr
+ *
*/
- nopt = xopts = strdup(optstr);
- while (xopts != NULL) {
- cp = strsep(&xopts, ",");
- if (cp == nopt) /* file or device pathname */
- continue;
- else if (!strcmp(cp, "nocache"))
- nocache = 1;
- else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
- sync = 1;
- else if (!strcmp(cp, "ro"))
- ro = 1;
- else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
- ;
- else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
- pssopt = ssopt;
- else {
- EPRINTLN("Invalid device option \"%s\"", cp);
- goto err;
- }
- }
-
- extra = 0;
- if (nocache)
- extra |= O_DIRECT;
- if (sync)
- extra |= O_SYNC;
-
- fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
- if (fd < 0 && !ro) {
- /* Attempt a r/w fail with a r/o open */
- fd = open(nopt, O_RDONLY | extra);
- ro = 1;
- }
-
- if (fd < 0) {
- warn("Could not open backing file: %s", nopt);
- goto err;
- }
-
- if (fstat(fd, &sbuf) < 0) {
- warn("Could not stat backing file %s", nopt);
- goto err;
+ if ((err = (*bbe)->open(ret, optstr, pci_ident, be_ident)) == 0) {
+ break;
}
-
-#ifndef WITHOUT_CAPSICUM
- cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK,
- CAP_WRITE);
- if (ro)
- cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE);
-
- if (caph_rights_limit(fd, &rights) == -1)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-#endif
-
- /*
- * Deal with raw devices
- */
- size = sbuf.st_size;
- sectsz = DEV_BSIZE;
- psectsz = psectoff = 0;
- candelete = geom = 0;
- if (S_ISCHR(sbuf.st_mode)) {
- if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
- ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
- perror("Could not fetch dev blk/sector size");
- goto err;
- }
- assert(size != 0);
- assert(sectsz != 0);
- if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
- ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
- strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
- arg.len = sizeof(arg.value.i);
- if (ioctl(fd, DIOCGATTR, &arg) == 0)
- candelete = arg.value.i;
- if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
- geom = 1;
- } else
- psectsz = sbuf.st_blksize;
-
-#ifndef WITHOUT_CAPSICUM
- if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-#endif
-
- if (ssopt != 0) {
- if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
- ssopt > pssopt) {
- EPRINTLN("Invalid sector size %d/%d",
- ssopt, pssopt);
- goto err;
- }
-
- /*
- * Some backend drivers (e.g. cd0, ada0) require that the I/O
- * size be a multiple of the device's sector size.
- *
- * Validate that the emulated sector size complies with this
- * requirement.
- */
- if (S_ISCHR(sbuf.st_mode)) {
- if (ssopt < sectsz || (ssopt % sectsz) != 0) {
- EPRINTLN("Sector size %d incompatible "
- "with underlying device sector size %d",
- ssopt, sectsz);
- goto err;
- }
- }
-
- sectsz = ssopt;
- psectsz = pssopt;
- psectoff = 0;
- }
-
- bc = calloc(1, sizeof(struct blockif_ctxt));
- if (bc == NULL) {
- perror("calloc");
- goto err;
- }
-
- bc->bc_magic = BLOCKIF_SIG;
- bc->bc_fd = fd;
- bc->bc_ischr = S_ISCHR(sbuf.st_mode);
- bc->bc_isgeom = geom;
- bc->bc_candelete = candelete;
- bc->bc_rdonly = ro;
- bc->bc_size = size;
- bc->bc_sectsz = sectsz;
- bc->bc_psectsz = psectsz;
- bc->bc_psectoff = psectoff;
- pthread_mutex_init(&bc->bc_mtx, NULL);
- pthread_cond_init(&bc->bc_cond, NULL);
- TAILQ_INIT(&bc->bc_freeq);
- TAILQ_INIT(&bc->bc_pendq);
- TAILQ_INIT(&bc->bc_busyq);
- for (i = 0; i < BLOCKIF_MAXREQ; i++) {
- bc->bc_reqs[i].be_status = BST_FREE;
- TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
- }
-
- for (i = 0; i < BLOCKIF_NUMTHR; i++) {
- pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
- snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
- pthread_set_name_np(bc->bc_btid[i], tname);
- }
-
- return (bc);
-err:
- if (fd >= 0)
- close(fd);
- free(nopt);
- return (NULL);
+ }
+ return err;
}
-static int
-blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
- enum blockop op)
+off_t
+blockbe_size(block_backend_t *be)
{
- int err;
-
- err = 0;
-
- pthread_mutex_lock(&bc->bc_mtx);
- if (!TAILQ_EMPTY(&bc->bc_freeq)) {
- /*
- * Enqueue and inform the block i/o thread
- * that there is work available
- */
- if (blockif_enqueue(bc, breq, op))
- pthread_cond_signal(&bc->bc_cond);
- } else {
- /*
- * Callers are not allowed to enqueue more than
- * the specified blockif queue limit. Return an
- * error to indicate that the queue length has been
- * exceeded.
- */
- err = E2BIG;
- }
- pthread_mutex_unlock(&bc->bc_mtx);
-
- return (err);
+ assert(be != NULL);
+ return (be->size(be));
}
-int
-blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
+void
+blockbe_chs(block_backend_t *be, uint16_t *c, uint8_t *h,
+ uint8_t *s)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_READ));
+ assert(be != NULL);
+ (be->chs(be, c, h, s));
}
int
-blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
+blockbe_sectsz(block_backend_t *be)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_WRITE));
+ assert(be != NULL);
+ return (be->sectsz(be));
}
-int
-blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
+void
+blockbe_psectsz(block_backend_t *be, int *size, int *off)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_FLUSH));
+ assert(be != NULL);
+ return (be->psectsz(be, size, off));
}
int
-blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
+blockbe_queuesz(block_backend_t *be)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_DELETE));
+ assert(be != NULL);
+ return (be->queuesz(be));
}
int
-blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
+blockbe_is_ro(block_backend_t *be)
{
- struct blockif_elem *be;
-
- assert(bc->bc_magic == BLOCKIF_SIG);
-
- pthread_mutex_lock(&bc->bc_mtx);
- /*
- * Check pending requests.
- */
- TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
- if (be->be_req == breq)
- break;
- }
- if (be != NULL) {
- /*
- * Found it.
- */
- blockif_complete(bc, be);
- pthread_mutex_unlock(&bc->bc_mtx);
-
- return (0);
- }
-
- /*
- * Check in-flight requests.
- */
- TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
- if (be->be_req == breq)
- break;
- }
- if (be == NULL) {
- /*
- * Didn't find it.
- */
- pthread_mutex_unlock(&bc->bc_mtx);
- return (EINVAL);
- }
-
- /*
- * Interrupt the processing thread to force it return
- * prematurely via it's normal callback path.
- */
- while (be->be_status == BST_BUSY) {
- struct blockif_sig_elem bse, *old_head;
-
- pthread_mutex_init(&bse.bse_mtx, NULL);
- pthread_cond_init(&bse.bse_cond, NULL);
-
- bse.bse_pending = 1;
-
- do {
- old_head = blockif_bse_head;
- bse.bse_next = old_head;
- } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
- (uintptr_t)old_head,
- (uintptr_t)&bse));
-
- pthread_kill(be->be_tid, SIGCONT);
-
- pthread_mutex_lock(&bse.bse_mtx);
- while (bse.bse_pending)
- pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
- pthread_mutex_unlock(&bse.bse_mtx);
- }
-
- pthread_mutex_unlock(&bc->bc_mtx);
-
- /*
- * The processing thread has been interrupted. Since it's not
- * clear if the callback has been invoked yet, return EBUSY.
- */
- return (EBUSY);
+ assert(be != NULL);
+ return (be->is_ro(be));
}
int
-blockif_close(struct blockif_ctxt *bc)
+blockbe_candelete(block_backend_t *be)
{
- void *jval;
- int i;
-
- assert(bc->bc_magic == BLOCKIF_SIG);
-
- /*
- * Stop the block i/o thread
- */
- pthread_mutex_lock(&bc->bc_mtx);
- bc->bc_closing = 1;
- pthread_mutex_unlock(&bc->bc_mtx);
- pthread_cond_broadcast(&bc->bc_cond);
- for (i = 0; i < BLOCKIF_NUMTHR; i++)
- pthread_join(bc->bc_btid[i], &jval);
-
- /* XXX Cancel queued i/o's ??? */
-
- /*
- * Release resources
- */
- bc->bc_magic = 0;
- close(bc->bc_fd);
- free(bc);
-
- return (0);
+ assert(be != NULL);
+ return (be->candelete(be));
}
-/*
- * Return virtual C/H/S values for a given block. Use the algorithm
- * outlined in the VHD specification to calculate values.
- */
-void
-blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
+int
+blockbe_read(block_backend_t *be, struct blockif_req *breq)
{
- off_t sectors; /* total sectors of the block dev */
- off_t hcyl; /* cylinders times heads */
- uint16_t secpt; /* sectors per track */
- uint8_t heads;
-
- assert(bc->bc_magic == BLOCKIF_SIG);
-
- sectors = bc->bc_size / bc->bc_sectsz;
-
- /* Clamp the size to the largest possible with CHS */
- if (sectors > 65535UL*16*255)
- sectors = 65535UL*16*255;
-
- if (sectors >= 65536UL*16*63) {
- secpt = 255;
- heads = 16;
- hcyl = sectors / secpt;
- } else {
- secpt = 17;
- hcyl = sectors / secpt;
- heads = (hcyl + 1023) / 1024;
-
- if (heads < 4)
- heads = 4;
-
- if (hcyl >= (heads * 1024) || heads > 16) {
- secpt = 31;
- heads = 16;
- hcyl = sectors / secpt;
- }
- if (hcyl >= (heads * 1024)) {
- secpt = 63;
- heads = 16;
- hcyl = sectors / secpt;
- }
- }
-
- *c = hcyl / heads;
- *h = heads;
- *s = secpt;
+ assert(be != NULL);
+ return (be->read(be, breq));
}
-/*
- * Accessors
- */
-off_t
-blockif_size(struct blockif_ctxt *bc)
+int
+blockbe_write(block_backend_t *be, struct blockif_req *breq)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (bc->bc_size);
+ assert(be != NULL);
+ return (be->write(be, breq));
}
int
-blockif_sectsz(struct blockif_ctxt *bc)
+blockbe_flush(block_backend_t *be, struct blockif_req *breq)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (bc->bc_sectsz);
+ assert(be != NULL);
+ return (be->flush(be, breq));
}
-void
-blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
-{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- *size = bc->bc_psectsz;
- *off = bc->bc_psectoff;
-}
-
int
-blockif_queuesz(struct blockif_ctxt *bc)
+blockbe_delete(block_backend_t *be, struct blockif_req *breq)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (BLOCKIF_MAXREQ - 1);
+ assert(be != NULL);
+ return (be->delete(be, breq));
}
int
-blockif_is_ro(struct blockif_ctxt *bc)
+blockbe_cancel(block_backend_t *be, struct blockif_req *breq)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (bc->bc_rdonly);
+ assert(be != NULL);
+ return (be->cancel(be, breq));
}
int
-blockif_candelete(struct blockif_ctxt *bc)
+blockbe_close(block_backend_t *be)
{
-
- assert(bc->bc_magic == BLOCKIF_SIG);
- return (bc->bc_candelete);
+ assert(be != NULL);
+ return (be->close(be));
}
Index: usr.sbin/bhyve/block_if.h
===================================================================
--- usr.sbin/bhyve/block_if.h
+++ usr.sbin/bhyve/block_if.h
@@ -38,41 +38,45 @@
#ifndef _BLOCK_IF_H_
#define _BLOCK_IF_H_
+#include <sys/queue.h>
#include <sys/uio.h>
#include <sys/unistd.h>
-/*
- * BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in
- * a single request. BLOCKIF_RING_MAX is the maxmimum number of
- * pending requests that can be queued.
- */
-#define BLOCKIF_IOV_MAX 128 /* not practical to be IOV_MAX */
-#define BLOCKIF_RING_MAX 128
+#include "block_backends.h"
-struct blockif_req {
- int br_iovcnt;
- off_t br_offset;
- ssize_t br_resid;
- void (*br_callback)(struct blockif_req *req, int err);
- void *br_param;
- struct iovec br_iov[BLOCKIF_IOV_MAX];
+struct locblk_elem {
+ TAILQ_ENTRY(locblk_elem) be_link;
+ struct blockif_req *be_req;
+ enum blockop be_op;
+ enum blockstat be_status;
+ pthread_t be_tid;
+ off_t be_block;
};
+typedef struct locblk_elem locblk_elem_t;
-struct blockif_ctxt;
-struct blockif_ctxt *blockif_open(const char *optstr, const char *ident);
-off_t blockif_size(struct blockif_ctxt *bc);
-void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h,
- uint8_t *s);
-int blockif_sectsz(struct blockif_ctxt *bc);
-void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off);
-int blockif_queuesz(struct blockif_ctxt *bc);
-int blockif_is_ro(struct blockif_ctxt *bc);
-int blockif_candelete(struct blockif_ctxt *bc);
-int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
-int blockif_close(struct blockif_ctxt *bc);
+struct locblk_ctxt {
+ int bc_magic;
+ int bc_fd;
+ int bc_ischr;
+ int bc_isgeom;
+ int bc_candelete;
+ int bc_rdonly;
+ off_t bc_size;
+ int bc_sectsz;
+ int bc_psectsz;
+ int bc_psectoff;
+ int bc_closing;
+ pthread_t bc_btid[BLOCKIF_NUMTHR];
+ pthread_mutex_t bc_mtx;
+ pthread_cond_t bc_cond;
+ /* Request elements and free/pending/busy queues */
+ TAILQ_HEAD(, locblk_elem) bc_freeq;
+ TAILQ_HEAD(, locblk_elem) bc_pendq;
+ TAILQ_HEAD(, locblk_elem) bc_busyq;
+
+ /* */
+ struct locblk_elem bc_reqs[BLOCKIF_MAXREQ];
+};
+typedef struct locblk_ctxt locblk_ctxt_t;
#endif /* _BLOCK_IF_H_ */
Index: usr.sbin/bhyve/block_if.c
===================================================================
--- usr.sbin/bhyve/block_if.c
+++ usr.sbin/bhyve/block_if.c
@@ -58,79 +58,31 @@
#include <machine/atomic.h>
+#include <sys/linker_set.h>
+
#include "bhyverun.h"
#include "debug.h"
#include "mevent.h"
#include "block_if.h"
-#define BLOCKIF_SIG 0xb109b109
+SET_DECLARE(block_backend_set, block_backend_t);
-#define BLOCKIF_NUMTHR 8
-#define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR)
+static pthread_once_t locblk_once = PTHREAD_ONCE_INIT;
-enum blockop {
- BOP_READ,
- BOP_WRITE,
- BOP_FLUSH,
- BOP_DELETE
-};
-
-enum blockstat {
- BST_FREE,
- BST_BLOCK,
- BST_PEND,
- BST_BUSY,
- BST_DONE
-};
-
-struct blockif_elem {
- TAILQ_ENTRY(blockif_elem) be_link;
- struct blockif_req *be_req;
- enum blockop be_op;
- enum blockstat be_status;
- pthread_t be_tid;
- off_t be_block;
-};
-
-struct blockif_ctxt {
- int bc_magic;
- int bc_fd;
- int bc_ischr;
- int bc_isgeom;
- int bc_candelete;
- int bc_rdonly;
- off_t bc_size;
- int bc_sectsz;
- int bc_psectsz;
- int bc_psectoff;
- int bc_closing;
- pthread_t bc_btid[BLOCKIF_NUMTHR];
- pthread_mutex_t bc_mtx;
- pthread_cond_t bc_cond;
-
- /* Request elements and free/pending/busy queues */
- TAILQ_HEAD(, blockif_elem) bc_freeq;
- TAILQ_HEAD(, blockif_elem) bc_pendq;
- TAILQ_HEAD(, blockif_elem) bc_busyq;
- struct blockif_elem bc_reqs[BLOCKIF_MAXREQ];
-};
-
-static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
-
-struct blockif_sig_elem {
+struct locblk_sig_elem {
pthread_mutex_t bse_mtx;
pthread_cond_t bse_cond;
int bse_pending;
- struct blockif_sig_elem *bse_next;
+ struct locblk_sig_elem *bse_next;
};
-static struct blockif_sig_elem *blockif_bse_head;
+static struct locblk_sig_elem *locblk_bse_head;
static int
-blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
+locblk_enqueue(struct locblk_ctxt *bc, struct blockif_req *breq,
enum blockop op)
{
- struct blockif_elem *be, *tbe;
+ struct locblk_elem *be, *tbe;
off_t off;
int i;
@@ -171,9 +123,9 @@
}
static int
-blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
+locblk_dequeue(struct locblk_ctxt *bc, pthread_t t, struct locblk_elem **bep)
{
- struct blockif_elem *be;
+ struct locblk_elem *be;
TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
if (be->be_status == BST_PEND)
@@ -191,9 +143,9 @@
}
static void
-blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
+locblk_complete(struct locblk_ctxt *bc, struct locblk_elem *be)
{
- struct blockif_elem *tbe;
+ struct locblk_elem *tbe;
if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
@@ -210,7 +162,7 @@
}
static void
-blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
+locblk_proc(struct locblk_ctxt *bc, struct locblk_elem *be, uint8_t *buf)
{
struct blockif_req *br;
off_t arg[2];
@@ -331,10 +283,10 @@
}
static void *
-blockif_thr(void *arg)
+locblk_thr(void *arg)
{
- struct blockif_ctxt *bc;
- struct blockif_elem *be;
+ struct locblk_ctxt *bc;
+ struct locblk_elem *be;
pthread_t t;
uint8_t *buf;
@@ -347,11 +299,11 @@
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
- while (blockif_dequeue(bc, t, &be)) {
+ while (locblk_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
- blockif_proc(bc, be, buf);
+ locblk_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
- blockif_complete(bc, be);
+ locblk_complete(bc, be);
}
/* Check ctxt status here to see if exit requested */
if (bc->bc_closing)
@@ -367,9 +319,9 @@
}
static void
-blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
+locblk_sigcont_handler(int signal, enum ev_type type, void *arg)
{
- struct blockif_sig_elem *bse;
+ struct locblk_sig_elem *bse;
for (;;) {
/*
@@ -377,10 +329,10 @@
* this thread.
*/
do {
- bse = blockif_bse_head;
+ bse = locblk_bse_head;
if (bse == NULL)
return;
- } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
+ } while (!atomic_cmpset_ptr((uintptr_t *)&locblk_bse_head,
(uintptr_t)bse,
(uintptr_t)bse->bse_next));
@@ -391,20 +343,63 @@
}
}
+static int
+locblk_request(struct locblk_ctxt *bc, struct blockif_req *breq,
+ enum blockop op)
+{
+ int err = 0;
+
+ pthread_mutex_lock(&bc->bc_mtx);
+ if (!TAILQ_EMPTY(&bc->bc_freeq)) {
+ /*
+ * Enqueue and inform the block i/o thread
+ * that there is work available
+ */
+ if (locblk_enqueue(bc, breq, op))
+ pthread_cond_signal(&bc->bc_cond);
+ } else {
+ /*
+ * Callers are not allowed to enqueue more than
+ * the specified blockif queue limit. Return an
+ * error to indicate that the queue length has been
+ * exceeded.
+ */
+ err = E2BIG;
+ }
+ pthread_mutex_unlock(&bc->bc_mtx);
+
+ return (err);
+}
+
+/*
+ * Routines for access blockdevices on the local system.
+ * be it either a file, or a block device
+ */
static void
-blockif_init(void)
+locblk_init(void)
{
- mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
+ mevent_add(SIGCONT, EVF_SIGNAL, locblk_sigcont_handler, NULL);
(void) signal(SIGCONT, SIG_IGN);
}
-struct blockif_ctxt *
-blockif_open(const char *optstr, const char *ident)
+static void
+locblk_cleanup(block_backend_t **be)
+{ /* empty block
+ * currently no cleanup required.
+ */
+}
+
+static block_backend_t locblk_backend;
+
+static int
+locblk_open(block_backend_t **be, const char *optstr, const char *pci_ident,
+ const char *be_ident)
{
+
char tname[MAXCOMLEN + 1];
char name[MAXPATHLEN];
char *nopt, *xopts, *cp;
- struct blockif_ctxt *bc;
+ struct locblk_ctxt *bc;
struct stat sbuf;
struct diocgattr_arg arg;
off_t size, psectsz, psectoff;
@@ -415,7 +410,7 @@
cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE };
#endif
- pthread_once(&blockif_once, blockif_init);
+ pthread_once(&locblk_once, locblk_init);
fd = -1;
ssopt = 0;
@@ -454,6 +449,14 @@
if (sync)
extra |= O_SYNC;
+ /*
+ * device and option parsing completed correctly so set the
+ * backend return value to this backend.
+ * Anything that returns an error below is due to errors in
+ * handling of the device. But this is the correct backend.
+ */
+ *be = &locblk_backend;
+
fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
if (fd < 0 && !ro) {
/* Attempt a r/w fail with a r/o open */
@@ -462,14 +465,14 @@
}
if (fd < 0) {
- warn("Could not open backing file: %s", nopt);
+ warn("blk-local: Could not open backing file: %s", nopt);
goto err;
}
- if (fstat(fd, &sbuf) < 0) {
+ if (fstat(fd, &sbuf) < 0) {
warn("Could not stat backing file %s", nopt);
goto err;
- }
+ }
#ifndef WITHOUT_CAPSICUM
cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK,
@@ -481,10 +484,10 @@
errx(EX_OSERR, "Unable to apply rights for sandbox");
#endif
- /*
+ /*
* Deal with raw devices
*/
- size = sbuf.st_size;
+ size = sbuf.st_size;
sectsz = DEV_BSIZE;
psectsz = psectoff = 0;
candelete = geom = 0;
@@ -541,11 +544,13 @@
psectoff = 0;
}
- bc = calloc(1, sizeof(struct blockif_ctxt));
+ bc = calloc(1, sizeof(struct locblk_ctxt));
if (bc == NULL) {
perror("calloc");
goto err;
}
+ /* And update the backend descriptor for this */
+ (*be)->bc = (struct blockif_ctxt*)bc;
bc->bc_magic = BLOCKIF_SIG;
bc->bc_fd = fd;
@@ -568,101 +573,76 @@
}
for (i = 0; i < BLOCKIF_NUMTHR; i++) {
- pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
- snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
+ pthread_create(&bc->bc_btid[i], NULL, locblk_thr, bc);
+ snprintf(tname, sizeof(tname), "blk-%s-%d", pci_ident, i);
pthread_set_name_np(bc->bc_btid[i], tname);
}
- return (bc);
+ return (0);
err:
if (fd >= 0)
close(fd);
free(nopt);
- return (NULL);
+ return (-1);
}
-static int
-blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
- enum blockop op)
-{
- int err;
- err = 0;
-
- pthread_mutex_lock(&bc->bc_mtx);
- if (!TAILQ_EMPTY(&bc->bc_freeq)) {
- /*
- * Enqueue and inform the block i/o thread
- * that there is work available
- */
- if (blockif_enqueue(bc, breq, op))
- pthread_cond_signal(&bc->bc_cond);
- } else {
- /*
- * Callers are not allowed to enqueue more than
- * the specified blockif queue limit. Return an
- * error to indicate that the queue length has been
- * exceeded.
- */
- err = E2BIG;
- }
- pthread_mutex_unlock(&bc->bc_mtx);
-
- return (err);
-}
-
int
-blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
+locblk_read(block_backend_t *be, struct blockif_req *breq)
{
-
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_READ));
+ return (locblk_request(bc, breq, BOP_READ));
}
int
-blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
+locblk_write(block_backend_t *be, struct blockif_req *breq)
{
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_WRITE));
+ return (locblk_request(bc, breq, BOP_WRITE));
}
int
-blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
+locblk_flush(block_backend_t *be, struct blockif_req *breq)
{
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_FLUSH));
+ return (locblk_request(bc, breq, BOP_FLUSH));
}
int
-blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
+locblk_delete(block_backend_t *be, struct blockif_req *breq)
{
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_DELETE));
+ return (locblk_request(bc, breq, BOP_DELETE));
}
int
-blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
+locblk_cancel(block_backend_t *be, struct blockif_req *breq)
{
- struct blockif_elem *be;
+ struct locblk_elem *belem;
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
pthread_mutex_lock(&bc->bc_mtx);
/*
* Check pending requests.
*/
- TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
- if (be->be_req == breq)
+ TAILQ_FOREACH(belem, &bc->bc_pendq, be_link) {
+ if (belem->be_req == breq)
break;
}
if (be != NULL) {
/*
* Found it.
*/
- blockif_complete(bc, be);
+ locblk_complete(bc, belem);
pthread_mutex_unlock(&bc->bc_mtx);
return (0);
@@ -671,11 +651,11 @@
/*
* Check in-flight requests.
*/
- TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
- if (be->be_req == breq)
+ TAILQ_FOREACH(belem, &bc->bc_busyq, be_link) {
+ if (belem->be_req == breq)
break;
}
- if (be == NULL) {
+ if (belem == NULL) {
/*
* Didn't find it.
*/
@@ -687,8 +667,8 @@
* Interrupt the processing thread to force it return
* prematurely via it's normal callback path.
*/
- while (be->be_status == BST_BUSY) {
- struct blockif_sig_elem bse, *old_head;
+ while (belem->be_status == BST_BUSY) {
+ struct locblk_sig_elem bse, *old_head;
pthread_mutex_init(&bse.bse_mtx, NULL);
pthread_cond_init(&bse.bse_cond, NULL);
@@ -696,13 +676,13 @@
bse.bse_pending = 1;
do {
- old_head = blockif_bse_head;
+ old_head = locblk_bse_head;
bse.bse_next = old_head;
- } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
+ } while (!atomic_cmpset_ptr((uintptr_t *)&locblk_bse_head,
(uintptr_t)old_head,
(uintptr_t)&bse));
- pthread_kill(be->be_tid, SIGCONT);
+ pthread_kill(belem->be_tid, SIGCONT);
pthread_mutex_lock(&bse.bse_mtx);
while (bse.bse_pending)
@@ -720,11 +700,12 @@
}
int
-blockif_close(struct blockif_ctxt *bc)
+locblk_close(block_backend_t *be)
{
void *jval;
int i;
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
/*
@@ -754,13 +735,14 @@
* outlined in the VHD specification to calculate values.
*/
void
-blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
+locblk_chs(block_backend_t *be, uint16_t *c, uint8_t *h, uint8_t *s)
{
off_t sectors; /* total sectors of the block dev */
off_t hcyl; /* cylinders times heads */
uint16_t secpt; /* sectors per track */
uint8_t heads;
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
sectors = bc->bc_size / bc->bc_sectsz;
@@ -802,50 +784,73 @@
* Accessors
*/
off_t
-blockif_size(struct blockif_ctxt *bc)
+locblk_size(block_backend_t *be)
{
-
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_size);
}
int
-blockif_sectsz(struct blockif_ctxt *bc)
+locblk_sectsz(block_backend_t *be)
{
-
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_sectsz);
}
void
-blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
+locblk_psectsz(block_backend_t *be, int *size, int *off)
{
-
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
*size = bc->bc_psectsz;
*off = bc->bc_psectoff;
}
int
-blockif_queuesz(struct blockif_ctxt *bc)
+locblk_queuesz(block_backend_t *be)
{
-
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
return (BLOCKIF_MAXREQ - 1);
}
int
-blockif_is_ro(struct blockif_ctxt *bc)
+locblk_is_ro(block_backend_t *be)
{
-
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_rdonly);
}
int
-blockif_candelete(struct blockif_ctxt *bc)
+locblk_candelete(block_backend_t *be)
{
-
+ locblk_ctxt_t *bc = (locblk_ctxt_t *)(be->bc);
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_candelete);
}
+
+static
+block_backend_t locblk_backend = {
+ .prefix = "blk-local",
+ .init = locblk_init,
+ .cleanup = locblk_cleanup,
+ .open = locblk_open,
+ .size = locblk_size,
+ .chs = locblk_chs,
+ .sectsz = locblk_sectsz,
+ .psectsz = locblk_psectsz,
+ .queuesz = locblk_queuesz,
+ .is_ro = locblk_is_ro,
+ .candelete = locblk_candelete,
+ .read = locblk_read,
+ .write = locblk_write,
+ .flush = locblk_flush,
+ .delete = locblk_delete,
+ .cancel = locblk_cancel,
+ .close = locblk_close,
+ .priv_size = 0,
+};
+DATA_SET(block_backend_set, locblk_backend);
Index: usr.sbin/bhyve/pci_ahci.c
===================================================================
--- usr.sbin/bhyve/pci_ahci.c
+++ usr.sbin/bhyve/pci_ahci.c
@@ -134,6 +134,7 @@
};
struct ahci_port {
+ block_backend_t *be;
struct blockif_ctxt *bctx;
struct pci_ahci_softc *pr_sc;
uint8_t *cmd_lst;
@@ -492,7 +493,7 @@
/*
* Try to cancel the outstanding blockif request.
*/
- error = blockif_cancel(p->bctx, &aior->io_req);
+ error = blockbe_cancel(p->be, &aior->io_req);
if (error != 0)
continue;
@@ -639,7 +640,7 @@
/* If we got limited by IOV length, round I/O down to sector size. */
if (j == BLOCKIF_IOV_MAX) {
- extra = todo % blockif_sectsz(p->bctx);
+ extra = todo % blockbe_sectsz(p->be);
todo -= extra;
assert(todo > 0);
while (extra > 0) {
@@ -712,8 +713,8 @@
if (!len)
len = 256;
}
- lba *= blockif_sectsz(p->bctx);
- len *= blockif_sectsz(p->bctx);
+ lba *= blockbe_sectsz(p->be);
+ len *= blockbe_sectsz(p->be);
/* Pull request off free list */
aior = STAILQ_FIRST(&p->iofhd);
@@ -738,9 +739,9 @@
ahci_write_fis_d2h_ncq(p, slot);
if (readop)
- err = blockif_read(p->bctx, breq);
+ err = blockbe_read(p->be, breq);
else
- err = blockif_write(p->bctx, breq);
+ err = blockbe_write(p->be, breq);
assert(err == 0);
}
@@ -774,7 +775,7 @@
*/
TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
- err = blockif_flush(p->bctx, breq);
+ err = blockbe_flush(p->be, breq);
assert(err == 0);
}
@@ -872,8 +873,8 @@
aior->more = (len != done);
breq = &aior->io_req;
- breq->br_offset = elba * blockif_sectsz(p->bctx);
- breq->br_resid = elen * blockif_sectsz(p->bctx);
+ breq->br_offset = elba * blockbe_sectsz(p->be);
+ breq->br_resid = elen * blockbe_sectsz(p->be);
/*
* Mark this command in-flight.
@@ -888,7 +889,7 @@
if (ncq && first)
ahci_write_fis_d2h_ncq(p, slot);
- err = blockif_delete(p->bctx, breq);
+ err = blockbe_delete(p->be, breq);
assert(err == 0);
}
@@ -957,7 +958,7 @@
memcpy(buf8, p->err_cfis, sizeof(p->err_cfis));
ahci_checksum(buf8, sizeof(buf));
} else if (cfis[4] == 0x13) { /* SATA NCQ Send and Receive Log */
- if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) {
+ if (blockbe_candelete(p->be) && !blockbe_is_ro(p->be)) {
buf[0x00] = 1; /* SFQ DSM supported */
buf[0x01] = 1; /* SFQ DSM TRIM supported */
}
@@ -989,12 +990,12 @@
uint16_t cyl;
uint8_t sech, heads;
- ro = blockif_is_ro(p->bctx);
- candelete = blockif_candelete(p->bctx);
- sectsz = blockif_sectsz(p->bctx);
- sectors = blockif_size(p->bctx) / sectsz;
- blockif_chs(p->bctx, &cyl, &heads, &sech);
- blockif_psectsz(p->bctx, &psectsz, &psectoff);
+ ro = blockbe_is_ro(p->be);
+ candelete = blockbe_candelete(p->be);
+ sectsz = blockbe_sectsz(p->be);
+ sectors = blockbe_size(p->be) / sectsz;
+ blockbe_chs(p->be, &cyl, &heads, &sech);
+ blockbe_psectsz(p->be, &psectsz, &psectoff);
memset(buf, 0, sizeof(buf));
buf[0] = 0x0040;
buf[1] = cyl;
@@ -1181,7 +1182,7 @@
uint8_t buf[8];
uint64_t sectors;
- sectors = blockif_size(p->bctx) / 2048;
+ sectors = blockbe_size(p->be) / 2048;
be32enc(buf, sectors - 1);
be32enc(buf + 4, 2048);
cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
@@ -1241,7 +1242,7 @@
*bp++ = 0x14;
*bp++ = 0xaa;
*bp++ = 0;
- sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
+ sectors = blockbe_size(p->be) / blockbe_sectsz(p->be);
sectors >>= 2;
if (msf) {
*bp++ = 0;
@@ -1317,7 +1318,7 @@
*bp++ = 0;
*bp++ = 0;
*bp++ = 0;
- sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
+ sectors = blockbe_size(p->be) / blockbe_sectsz(p->be);
sectors >>= 2;
if (msf) {
*bp++ = 0;
@@ -1430,7 +1431,7 @@
/* Stuff request onto busy list. */
TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
- err = blockif_read(p->bctx, breq);
+ err = blockbe_read(p->be, breq);
assert(err == 0);
}
@@ -2020,7 +2021,7 @@
struct ahci_ioreq *vr;
int i;
- pr->ioqsz = blockif_queuesz(pr->bctx);
+ pr->ioqsz = blockbe_queuesz(pr->be);
pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
STAILQ_INIT(&pr->iofhd);
@@ -2358,12 +2359,19 @@
*/
snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot,
pi->pi_func, p);
- bctxt = blockif_open(opts, bident);
- if (bctxt == NULL) {
+
+ ret = blockbe_open(&(sc->port[p].be),opts, bident);
+ if (ret != 0) {
sc->ports = p;
ret = 1;
goto open_fail;
}
+
+ bctxt = calloc(1, sizeof(struct locblk_ctxt));
+ if (bctxt == NULL) {
+ perror("calloc");
+ goto open_fail;
+ }
sc->port[p].bctx = bctxt;
sc->port[p].pr_sc = sc;
sc->port[p].port = p;
@@ -2424,7 +2432,7 @@
if (ret) {
for (p = 0; p < sc->ports; p++) {
if (sc->port[p].bctx != NULL)
- blockif_close(sc->port[p].bctx);
+ blockbe_close(sc->port[p].be);
}
free(sc);
}
Index: usr.sbin/bhyve/pci_nvme.c
===================================================================
--- usr.sbin/bhyve/pci_nvme.c
+++ usr.sbin/bhyve/pci_nvme.c
@@ -169,6 +169,7 @@
};
struct pci_nvme_blockstore {
+ block_backend_t *be;
enum nvme_storage_type type;
void *ctx;
uint64_t size;
@@ -490,7 +491,7 @@
}
} else
sc->submit_queues = calloc(sc->num_squeues + 1,
- sizeof(struct nvme_submission_queue));
+ sizeof(struct nvme_submission_queue));
if (sc->compl_queues != NULL) {
for (int i = 0; i < sc->num_cqueues + 1; i++) {
@@ -505,7 +506,7 @@
}
} else {
sc->compl_queues = calloc(sc->num_cqueues + 1,
- sizeof(struct nvme_completion_queue));
+ sizeof(struct nvme_completion_queue));
for (int i = 0; i < sc->num_cqueues + 1; i++)
pthread_mutex_init(&sc->compl_queues[i].mtx, NULL);
@@ -530,7 +531,7 @@
asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1;
sc->submit_queues[0].size = asqs;
sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq,
- sizeof(struct nvme_command) * asqs);
+ sizeof(struct nvme_command) * asqs);
DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p",
__func__, sc->regs.asq, sc->submit_queues[0].qbase));
@@ -1171,10 +1172,10 @@
req->io_req.br_callback = pci_nvme_io_partial;
if (!do_write)
- err = blockif_read(sc->nvstore.ctx,
+ err = blockbe_read(sc->nvstore.be,
&req->io_req);
else
- err = blockif_write(sc->nvstore.ctx,
+ err = blockbe_write(sc->nvstore.be,
&req->io_req);
/* wait until req completes before cont */
@@ -1517,10 +1518,10 @@
err = 0;
switch (cmd->opc) {
case NVME_OPC_READ:
- err = blockif_read(sc->nvstore.ctx, &req->io_req);
+ err = blockbe_read(sc->nvstore.be, &req->io_req);
break;
case NVME_OPC_WRITE:
- err = blockif_write(sc->nvstore.ctx, &req->io_req);
+ err = blockbe_write(sc->nvstore.be, &req->io_req);
break;
default:
WPRINTF(("%s unhandled io command 0x%x",
@@ -1842,10 +1843,10 @@
static int
pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts)
{
- char bident[sizeof("XX:X:X")];
+ char pci_ident[sizeof("XX:X:X")];
char *uopt, *xopts, *config;
uint32_t sectsz;
- int optidx;
+ int optidx, res;
sc->max_queues = NVME_QUEUES;
sc->max_qentries = NVME_MAX_QENTRIES;
@@ -1897,16 +1898,21 @@
} else if (!strcmp("eui64", xopts)) {
sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0));
} else if (optidx == 0) {
- snprintf(bident, sizeof(bident), "%d:%d",
+ snprintf(pci_ident, sizeof(pci_ident), "%d:%d",
sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
- sc->nvstore.ctx = blockif_open(xopts, bident);
- if (sc->nvstore.ctx == NULL) {
+ res = blockbe_open(&(sc->nvstore.be), xopts, pci_ident);
+ if (res != 0) {
perror("Could not open backing file");
free(uopt);
return (-1);
}
+ sc->nvstore.ctx = calloc(1, sizeof(struct locblk_ctxt));
+ if ( sc->nvstore.ctx == NULL) {
+ perror("calloc");
+ return(-1);
+ }
sc->nvstore.type = NVME_STOR_BLOCKIF;
- sc->nvstore.size = blockif_size(sc->nvstore.ctx);
+ sc->nvstore.size = blockbe_size(sc->nvstore.be);
} else {
EPRINTLN("Invalid option %s", xopts);
free(uopt);
@@ -1924,7 +1930,7 @@
if (sectsz == 512 || sectsz == 4096 || sectsz == 8192)
sc->nvstore.sectsz = sectsz;
else if (sc->nvstore.type != NVME_STOR_RAM)
- sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx);
+ sc->nvstore.sectsz = blockbe_sectsz(sc->nvstore.be);
for (sc->nvstore.sectsz_bits = 9;
(1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz;
sc->nvstore.sectsz_bits++);
Index: usr.sbin/bhyve/pci_virtio_block.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_block.c
+++ usr.sbin/bhyve/pci_virtio_block.c
@@ -123,7 +123,7 @@
/*
* Debug printf
*/
-static int pci_vtblk_debug;
+static int pci_vtblk_debug = 0 ;
#define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
#define WPRINTF(params) PRINTLN params
@@ -140,6 +140,7 @@
struct pci_vtblk_softc {
struct virtio_softc vbsc_vs;
pthread_mutex_t vsc_mtx;
+ block_backend_t *vbsc_be;
struct vqueue_info vbsc_vq;
struct vtblk_config vbsc_cfg;
struct blockif_ctxt *bc;
@@ -259,14 +260,14 @@
switch (type) {
case VBH_OP_READ:
- err = blockif_read(sc->bc, &io->io_req);
+ err = blockbe_read(sc->vbsc_be, &io->io_req);
break;
case VBH_OP_WRITE:
- err = blockif_write(sc->bc, &io->io_req);
+ err = blockbe_write(sc->vbsc_be, &io->io_req);
break;
case VBH_OP_FLUSH:
case VBH_OP_FLUSH_OUT:
- err = blockif_flush(sc->bc, &io->io_req);
+ err = blockbe_flush(sc->vbsc_be, &io->io_req);
break;
case VBH_OP_IDENT:
/* Assume a single buffer */
@@ -295,35 +296,36 @@
static int
pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
{
- char bident[sizeof("XX:X:X")];
- struct blockif_ctxt *bctxt;
+ char pci_ident[sizeof("XX:X:X")];
+ char be_ident[16];
MD5_CTX mdctx;
u_char digest[16];
struct pci_vtblk_softc *sc;
off_t size;
- int i, sectsz, sts, sto;
+ int i, res, sectsz, sts, sto;
if (opts == NULL) {
WPRINTF(("virtio-block: backing device required"));
return (1);
}
-
- /*
+ sc = calloc(1, sizeof(struct pci_vtblk_softc));
+
+ /*
* The supplied backing file has to exist
*/
- snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
- bctxt = blockif_open(opts, bident);
- if (bctxt == NULL) {
- perror("Could not open backing file");
+ snprintf(pci_ident, sizeof(pci_ident), "%d:%d", pi->pi_slot, pi->pi_func);
+ snprintf(be_ident, sizeof(be_ident), "%s", "blk-local");
+ res = blockbe_open(&sc->vbsc_be, opts, pci_ident);
+ if (res != 0) {
+ perror("virtio_block:_Could not open backing file");
return (1);
+ free(sc);
}
- size = blockif_size(bctxt);
- sectsz = blockif_sectsz(bctxt);
- blockif_psectsz(bctxt, &sts, &sto);
-
- sc = calloc(1, sizeof(struct pci_vtblk_softc));
- sc->bc = bctxt;
+ size = blockbe_size(sc->vbsc_be);
+ sectsz = blockbe_sectsz(sc->vbsc_be);
+ blockbe_psectsz(sc->vbsc_be, &sts, &sto);
+
for (i = 0; i < VTBLK_RINGSZ; i++) {
struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
io->io_req.br_callback = pci_vtblk_done;
@@ -388,7 +390,7 @@
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
- blockif_close(sc->bc);
+ blockbe_close(sc->vbsc_be);
free(sc);
return (1);
}

File Metadata

Mime Type
text/plain
Expires
Tue, Feb 17, 2:06 AM (11 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28797160
Default Alt Text
D23010.id67203.diff (53 KB)

Event Timeline