diff --git a/usr.sbin/bhyve/iov.h b/usr.sbin/bhyve/iov.h --- a/usr.sbin/bhyve/iov.h +++ b/usr.sbin/bhyve/iov.h @@ -31,12 +31,12 @@ #ifndef _IOV_H_ #define _IOV_H_ -void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, - int *niov2, size_t seek); -void truncate_iov(struct iovec *iov, int *niov, size_t length); -size_t count_iov(const struct iovec *iov, int niov); -ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); -ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, - int niov, size_t seek); +#include + +void split_iov(struct iovec *, int *, struct iovec **, int *, size_t); +size_t count_iov(const struct iovec *, int); +bool check_iov_len(const struct iovec *, int, size_t); +ssize_t iov_to_buf(const struct iovec *, int, void **); +ssize_t buf_to_iov(const void *, size_t, const struct iovec *, int); #endif /* _IOV_H_ */ diff --git a/usr.sbin/bhyve/iov.c b/usr.sbin/bhyve/iov.c --- a/usr.sbin/bhyve/iov.c +++ b/usr.sbin/bhyve/iov.c @@ -32,38 +32,64 @@ #include #include +#include #include #include #include "iov.h" +/* + * Given an array of iovecs iov1[niov1] and an offset, truncate iov1 at offset + * and return the address and count of the remaining iovecs in iov2 and niov2. + */ void -seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, int *niov2, - size_t seek) +split_iov(struct iovec *iov1, int *niov1, struct iovec **iov2, int *niov2, + size_t offset) { size_t remainder = 0; - size_t left = seek; int i, j; - for (i = 0; i < niov1; i++) { - size_t toseek = MIN(left, iov1[i].iov_len); - left -= toseek; - - if (toseek == iov1[i].iov_len) + /* Seek to the requested offset and truncate the final iovec. */ + for (i = 0; i < *niov1; i++) { + if (offset > iov1[i].iov_len) { + /* + * We're seeking past this iovec. Adjust the offset and + * move on. + */ + offset -= iov1[i].iov_len; continue; - - if (left == 0) { - remainder = toseek; - break; } - } - for (j = i; j < niov1; j++) { - iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder; - iov2[j - i].iov_len = iov1[j].iov_len - remainder; - remainder = 0; + /* + * We're seeking within this iovec. Calculate the remainder, + * truncate at offset, and update iov2, niov2, iov1, and niov1. + */ + remainder = iov1[i].iov_len - offset; + iov1[i].iov_len = offset; + + *iov2 = &iov1[i + 1]; /* iov2 starts after the final iov1 */ + *niov2 = *niov1 - i - 1; /* remaining iovecs in iov2 */ + *niov1 = i + 1; /* iovecs counted so far */ + break; } - *niov2 = j - i; + /* + * Check for the (unlikely, ideally) case where there is a remainder + * from the final iovec, and deal with it if necessary. + */ + if (remainder > 0) { + /* + * Make room for a new iovec covering the remainder by moving + * all following iovecs up. It is the caller's responsibility + * that there is enough spare space for this extra iovec. + */ + for (j = *niov2; j > 0; j--) + *iov2[j] = *iov2[j - 1]; + + /* Fill in the iovec covering the remainder. */ + iov2[0]->iov_len = remainder; + iov2[0]->iov_base = + (char *)iov1[*niov1 - 1].iov_base + offset; + } } size_t @@ -78,22 +104,19 @@ return (total); } -void -truncate_iov(struct iovec *iov, int *niov, size_t length) +bool +check_iov_len(const struct iovec *iov, int niov, size_t len) { - size_t done = 0; + size_t total = 0; int i; - for (i = 0; i < *niov; i++) { - size_t toseek = MIN(length - done, iov[i].iov_len); - done += toseek; - - if (toseek <= iov[i].iov_len) { - iov[i].iov_len = toseek; - *niov = i + 1; - return; - } + for (i = 0; i < niov; i++) { + total += iov[i].iov_len; + if (total >= len) + return (true); } + + return (false); } ssize_t @@ -116,31 +139,16 @@ } ssize_t -buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov, - size_t seek) +buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov) { - struct iovec *diov; size_t off = 0, len; int i; - if (seek > 0) { - int ndiov; - - diov = malloc(sizeof(struct iovec) * niov); - seek_iov(iov, niov, diov, &ndiov, seek); - iov = diov; - niov = ndiov; - } - for (i = 0; i < niov && off < buflen; i++) { len = MIN(iov[i].iov_len, buflen - off); memcpy(iov[i].iov_base, (const uint8_t *)buf + off, len); off += len; } - if (seek > 0) - free(diov); - return ((ssize_t)off); } - diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c --- a/usr.sbin/bhyve/net_backends.c +++ b/usr.sbin/bhyve/net_backends.c @@ -197,7 +197,7 @@ * we read it from there. */ ret = buf_to_iov(priv->bbuf, priv->bbuflen, - iov, iovcnt, 0); + iov, iovcnt); /* Mark the bounce buffer as empty. */ priv->bbuflen = 0; diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c --- a/usr.sbin/bhyve/pci_virtio_scsi.c +++ b/usr.sbin/bhyve/pci_virtio_scsi.c @@ -120,9 +120,10 @@ struct pci_vtscsi_request { struct pci_vtscsi_queue * vsr_queue; - struct iovec vsr_iov_in[VTSCSI_MAXSEG]; + struct iovec vsr_iov[VTSCSI_MAXSEG + 2]; + struct iovec * vsr_iov_in; + struct iovec * vsr_iov_out; int vsr_niov_in; - struct iovec vsr_iov_out[VTSCSI_MAXSEG]; int vsr_niov_out; uint32_t vsr_idx; STAILQ_ENTRY(pci_vtscsi_request) vsr_link; @@ -230,10 +231,10 @@ static int pci_vtscsi_cfgread(void *, int, int, uint32_t *); static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t); static inline int pci_vtscsi_get_lun(uint8_t *); -static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); -static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_tmf *); -static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *, +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_an *); static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *, int, struct iovec *, int); @@ -353,7 +354,7 @@ return (((lun[2] << 8) | lun[3]) & 0x3fff); } -static int +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, size_t bufsize) { @@ -363,7 +364,7 @@ if (bufsize < sizeof(uint32_t)) { WPRINTF("ignoring truncated control request"); - return (0); + return; } type = *(uint32_t *)buf; @@ -371,25 +372,21 @@ if (type == VIRTIO_SCSI_T_TMF) { if (bufsize != sizeof(*tmf)) { WPRINTF("ignoring tmf request with size %zu", bufsize); - return (0); + return; } tmf = (struct pci_vtscsi_ctrl_tmf *)buf; - return (pci_vtscsi_tmf_handle(sc, tmf)); - } - - if (type == VIRTIO_SCSI_T_AN_QUERY) { + pci_vtscsi_tmf_handle(sc, tmf); + } else if (type == VIRTIO_SCSI_T_AN_QUERY) { if (bufsize != sizeof(*an)) { WPRINTF("ignoring AN request with size %zu", bufsize); - return (0); + return; } an = (struct pci_vtscsi_ctrl_an *)buf; - return (pci_vtscsi_an_handle(sc, an)); + pci_vtscsi_an_handle(sc, an); } - - return (0); } -static int +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, struct pci_vtscsi_ctrl_tmf *tmf) { @@ -454,14 +451,12 @@ tmf->response = io->taskio.task_status; ctl_scsi_free_io(io); - return (1); } -static int +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc __unused, struct pci_vtscsi_ctrl_an *an __unused) { - return (0); } static int @@ -471,29 +466,58 @@ struct pci_vtscsi_softc *sc = q->vsq_sc; struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL; struct pci_vtscsi_req_cmd_wr *cmd_wr; - struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; + struct iovec *data_iov_in, *data_iov_out; union ctl_io *io; int data_niov_in, data_niov_out; void *ext_data_ptr = NULL; uint32_t ext_data_len = 0, ext_sg_entries = 0; int err, nxferred; - if (count_iov(iov_out, niov_out) < VTSCSI_OUT_HEADER_LEN(sc)) { + /* + * Make sure we got at least enough space for the VirtIO-SCSI + * command headers. If not, return this request immediately. + */ + if (check_iov_len(iov_out, niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with insufficient output"); return (0); } - if (count_iov(iov_in, niov_in) < VTSCSI_IN_HEADER_LEN(sc)) { + + if (check_iov_len(iov_in, niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with incomplete header"); return (0); } - seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in, - VTSCSI_IN_HEADER_LEN(sc)); - seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, - VTSCSI_OUT_HEADER_LEN(sc)); + /* + * We have to split the iovec array in a header and data portion each + * for input and output. + * + * We need to start with the output section (at the end) in case the + * iovec covering the final part of the output header needs splitting, + * in which case split_iov() will move all reamaining iovecs up by one + * to make room for a new iovec covering the first part of the output + * data portion. + */ + split_iov(iov_out, &niov_out, &data_iov_out, &data_niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc)); + + /* + * Similarly, to not overwrite the first iovec of the output section, + * the 2nd call to split_iov() to split the input section must actually + * cover the entire iovec array (both input and the already split output + * sections). + */ + niov_in += niov_out + data_niov_out; + + split_iov(iov_in, &niov_in, &data_iov_in, &data_niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc)); + + /* + * And of course we now have to adjust data_niov_in accordingly. + */ + data_niov_in -= niov_out + data_niov_out; - truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); - truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); cmd_wr = calloc(1, VTSCSI_OUT_HEADER_LEN(sc)); @@ -564,7 +588,7 @@ cmd_wr->sense_len); } - buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0); + buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out); nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled; free(cmd_rd); free(cmd_wr); @@ -580,7 +604,7 @@ struct vi_req req; void *buf = NULL; size_t bufsize; - int iolen, n; + int n; sc = vsc; @@ -589,14 +613,13 @@ assert(n >= 1 && n <= VTSCSI_MAXSEG); bufsize = iov_to_buf(iov, n, &buf); - iolen = pci_vtscsi_control_handle(sc, buf, bufsize); - buf_to_iov((uint8_t *)buf + bufsize - iolen, iolen, iov, n, - bufsize - iolen); + pci_vtscsi_control_handle(sc, buf, bufsize); + buf_to_iov((uint8_t *)buf, bufsize, iov, n); /* * Release this chain and handle more */ - vq_relchain(vq, req.idx, iolen); + vq_relchain(vq, req.idx, bufsize); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ free(buf); @@ -614,7 +637,6 @@ struct pci_vtscsi_softc *sc; struct pci_vtscsi_queue *q; struct pci_vtscsi_request *req; - struct iovec iov[VTSCSI_MAXSEG]; struct vi_req vireq; int n; @@ -622,18 +644,17 @@ q = &sc->vss_queues[vq->vq_num - 2]; while (vq_has_descs(vq)) { - n = vq_getchain(vq, iov, VTSCSI_MAXSEG, &vireq); + req = calloc(1, sizeof(struct pci_vtscsi_request)); + + n = vq_getchain(vq, req->vsr_iov, VTSCSI_MAXSEG, &vireq); assert(n >= 1 && n <= VTSCSI_MAXSEG); - req = calloc(1, sizeof(struct pci_vtscsi_request)); req->vsr_idx = vireq.idx; req->vsr_queue = q; + req->vsr_iov_in = &req->vsr_iov[0]; req->vsr_niov_in = vireq.readable; + req->vsr_iov_out = &req->vsr_iov[vireq.readable]; req->vsr_niov_out = vireq.writable; - memcpy(req->vsr_iov_in, iov, - req->vsr_niov_in * sizeof(struct iovec)); - memcpy(req->vsr_iov_out, iov + vireq.readable, - req->vsr_niov_out * sizeof(struct iovec)); pthread_mutex_lock(&q->vsq_mtx); STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);