diff --git a/usr.sbin/bhyve/iov.h b/usr.sbin/bhyve/iov.h --- a/usr.sbin/bhyve/iov.h +++ b/usr.sbin/bhyve/iov.h @@ -31,12 +31,12 @@ #ifndef _IOV_H_ #define _IOV_H_ -void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, - int *niov2, size_t seek); -void truncate_iov(struct iovec *iov, int *niov, size_t length); -size_t count_iov(const struct iovec *iov, int niov); -ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); -ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, - int niov, size_t seek); +#include + +struct iovec *split_iov(struct iovec *, int *, size_t, int *); +size_t count_iov(const struct iovec *, int); +bool check_iov_len(const struct iovec *, int, size_t); +ssize_t iov_to_buf(const struct iovec *, int, void **); +ssize_t buf_to_iov(const void *, size_t, const struct iovec *, int); #endif /* _IOV_H_ */ diff --git a/usr.sbin/bhyve/iov.c b/usr.sbin/bhyve/iov.c --- a/usr.sbin/bhyve/iov.c +++ b/usr.sbin/bhyve/iov.c @@ -32,38 +32,72 @@ #include #include +#include #include #include #include "iov.h" -void -seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, int *niov2, - size_t seek) +/* + * Given an array of iovecs iov, the number of valid iovecs niov, and an + * offset, truncate iov at offset. If necessary, split the final iovec, + * moving the remaining iovecs up by one in iov. Return a pointer to the + * iovec beginning at offset, and the total number of remaining iovecs. + * + * The caller must take care that iov contains enough space for at least + * niov+1 iovecs so the remainder of the iovec array may be moved up by one. + */ +struct iovec * +split_iov(struct iovec *iov, int *niov, size_t offset, int *niov_rem) { size_t remainder = 0; - size_t left = seek; - int i, j; - - for (i = 0; i < niov1; i++) { - size_t toseek = MIN(left, iov1[i].iov_len); - left -= toseek; + struct iovec *iov_rem; + int i; - if (toseek == iov1[i].iov_len) - continue; + /* Seek to the requested offset and truncate the final iovec. */ + for (i = 0; i < *niov && offset > iov[i].iov_len; i++) { + /* + * We're seeking past this iovec. Adjust the offset and move on. + */ + offset -= iov[i].iov_len; + } - if (left == 0) { - remainder = toseek; - break; - } + /* We've reached the end of the array without reaching the offset. */ + if (i == *niov) { + *niov_rem = 0; + return (NULL); } - for (j = i; j < niov1; j++) { - iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder; - iov2[j - i].iov_len = iov1[j].iov_len - remainder; - remainder = 0; + /* + * We found the iovec covering offset. Calculate the remainder, + * truncate at offset, and get iov_rem. + */ + remainder = iov[i].iov_len - offset; + iov[i].iov_len = offset; + *niov_rem = *niov - i - 1; + *niov = i + 1; + iov_rem = &iov[*niov]; + + /* + * Check for the (unlikely, ideally) case where there is a remainder + * from the final iovec before offset, and deal with it if necessary. + */ + if (remainder > 0) { + /* + * Make room for a new iovec covering the remainder by moving + * all following iovecs up. It is the caller's responsibility + * that there is enough spare space for this extra iovec. + */ + for (struct iovec *tmp = &iov_rem[*niov_rem]; + tmp != iov_rem; + tmp[0] = tmp[-1], tmp--); + + /* Fill in the first iovec covering the remainder. */ + iov_rem[0].iov_len = remainder; + iov_rem[0].iov_base = (char *)iov[i].iov_base + offset; + (*niov_rem)++; } - *niov2 = j - i; + return (iov_rem); } size_t @@ -78,22 +112,19 @@ return (total); } -void -truncate_iov(struct iovec *iov, int *niov, size_t length) +bool +check_iov_len(const struct iovec *iov, int niov, size_t len) { - size_t done = 0; + size_t total = 0; int i; - for (i = 0; i < *niov; i++) { - size_t toseek = MIN(length - done, iov[i].iov_len); - done += toseek; - - if (toseek <= iov[i].iov_len) { - iov[i].iov_len = toseek; - *niov = i + 1; - return; - } + for (i = 0; i < niov; i++) { + total += iov[i].iov_len; + if (total >= len) + return (true); } + + return (false); } ssize_t @@ -116,31 +147,16 @@ } ssize_t -buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov, - size_t seek) +buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov) { - struct iovec *diov; size_t off = 0, len; int i; - if (seek > 0) { - int ndiov; - - diov = malloc(sizeof(struct iovec) * niov); - seek_iov(iov, niov, diov, &ndiov, seek); - iov = diov; - niov = ndiov; - } - for (i = 0; i < niov && off < buflen; i++) { len = MIN(iov[i].iov_len, buflen - off); memcpy(iov[i].iov_base, (const uint8_t *)buf + off, len); off += len; } - if (seek > 0) - free(diov); - return ((ssize_t)off); } - diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c --- a/usr.sbin/bhyve/net_backends.c +++ b/usr.sbin/bhyve/net_backends.c @@ -197,7 +197,7 @@ * we read it from there. */ ret = buf_to_iov(priv->bbuf, priv->bbuflen, - iov, iovcnt, 0); + iov, iovcnt); /* Mark the bounce buffer as empty. */ priv->bbuflen = 0; diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c --- a/usr.sbin/bhyve/pci_virtio_scsi.c +++ b/usr.sbin/bhyve/pci_virtio_scsi.c @@ -120,9 +120,10 @@ struct pci_vtscsi_request { struct pci_vtscsi_queue * vsr_queue; - struct iovec vsr_iov_in[VTSCSI_MAXSEG]; + struct iovec vsr_iov[VTSCSI_MAXSEG + 2]; + struct iovec * vsr_iov_in; + struct iovec * vsr_iov_out; int vsr_niov_in; - struct iovec vsr_iov_out[VTSCSI_MAXSEG]; int vsr_niov_out; uint32_t vsr_idx; STAILQ_ENTRY(pci_vtscsi_request) vsr_link; @@ -230,10 +231,10 @@ static int pci_vtscsi_cfgread(void *, int, int, uint32_t *); static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t); static inline int pci_vtscsi_get_lun(uint8_t *); -static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); -static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_tmf *); -static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *, +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_an *); static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *, int, struct iovec *, int); @@ -353,7 +354,7 @@ return (((lun[2] << 8) | lun[3]) & 0x3fff); } -static int +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, size_t bufsize) { @@ -363,7 +364,7 @@ if (bufsize < sizeof(uint32_t)) { WPRINTF("ignoring truncated control request"); - return (0); + return; } type = *(uint32_t *)buf; @@ -371,25 +372,21 @@ if (type == VIRTIO_SCSI_T_TMF) { if (bufsize != sizeof(*tmf)) { WPRINTF("ignoring tmf request with size %zu", bufsize); - return (0); + return; } tmf = (struct pci_vtscsi_ctrl_tmf *)buf; - return (pci_vtscsi_tmf_handle(sc, tmf)); - } - - if (type == VIRTIO_SCSI_T_AN_QUERY) { + pci_vtscsi_tmf_handle(sc, tmf); + } else if (type == VIRTIO_SCSI_T_AN_QUERY) { if (bufsize != sizeof(*an)) { WPRINTF("ignoring AN request with size %zu", bufsize); - return (0); + return; } an = (struct pci_vtscsi_ctrl_an *)buf; - return (pci_vtscsi_an_handle(sc, an)); + pci_vtscsi_an_handle(sc, an); } - - return (0); } -static int +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, struct pci_vtscsi_ctrl_tmf *tmf) { @@ -454,14 +451,12 @@ tmf->response = io->taskio.task_status; ctl_scsi_free_io(io); - return (1); } -static int +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc __unused, struct pci_vtscsi_ctrl_an *an __unused) { - return (0); } static int @@ -471,29 +466,58 @@ struct pci_vtscsi_softc *sc = q->vsq_sc; struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL; struct pci_vtscsi_req_cmd_wr *cmd_wr; - struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; + struct iovec *data_iov_in, *data_iov_out; union ctl_io *io; int data_niov_in, data_niov_out; void *ext_data_ptr = NULL; uint32_t ext_data_len = 0, ext_sg_entries = 0; int err, nxferred; - if (count_iov(iov_out, niov_out) < VTSCSI_OUT_HEADER_LEN(sc)) { + /* + * Make sure we got at least enough space for the VirtIO-SCSI + * command headers. If not, return this request immediately. + */ + if (check_iov_len(iov_out, niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with insufficient output"); return (0); } - if (count_iov(iov_in, niov_in) < VTSCSI_IN_HEADER_LEN(sc)) { + + if (check_iov_len(iov_in, niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with incomplete header"); return (0); } - seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in, - VTSCSI_IN_HEADER_LEN(sc)); - seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, - VTSCSI_OUT_HEADER_LEN(sc)); + /* + * We have to split the iovec array in a header and data portion each + * for input and output. + * + * We need to start with the output section (at the end of iov) in case + * the iovec covering the final part of the output header needs to be + * split, in which case split_iov() will move all reamaining iovecs up + * by one to make room for a new iovec covering the first part of the + * output data portion. + */ + data_iov_out = split_iov(iov_out, &niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc), &data_niov_out); + + /* + * Similarly, to not overwrite the first iovec of the output section, + * the 2nd call to split_iov() to split the input section must actually + * cover the entire iovec array (both input and the already split output + * sections). + */ + niov_in += niov_out + data_niov_out; + + data_iov_in = split_iov(iov_in, &niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc), &data_niov_in); + + /* + * And of course we now have to adjust data_niov_in accordingly. + */ + data_niov_in -= niov_out + data_niov_out; - truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); - truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); cmd_wr = calloc(1, VTSCSI_OUT_HEADER_LEN(sc)); @@ -564,7 +588,7 @@ cmd_wr->sense_len); } - buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0); + buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out); nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled; free(cmd_rd); free(cmd_wr); @@ -580,7 +604,7 @@ struct vi_req req; void *buf = NULL; size_t bufsize; - int iolen, n; + int n; sc = vsc; @@ -589,14 +613,13 @@ assert(n >= 1 && n <= VTSCSI_MAXSEG); bufsize = iov_to_buf(iov, n, &buf); - iolen = pci_vtscsi_control_handle(sc, buf, bufsize); - buf_to_iov((uint8_t *)buf + bufsize - iolen, iolen, iov, n, - bufsize - iolen); + pci_vtscsi_control_handle(sc, buf, bufsize); + buf_to_iov((uint8_t *)buf, bufsize, iov, n); /* * Release this chain and handle more */ - vq_relchain(vq, req.idx, iolen); + vq_relchain(vq, req.idx, bufsize); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ free(buf); @@ -614,7 +637,6 @@ struct pci_vtscsi_softc *sc; struct pci_vtscsi_queue *q; struct pci_vtscsi_request *req; - struct iovec iov[VTSCSI_MAXSEG]; struct vi_req vireq; int n; @@ -622,18 +644,17 @@ q = &sc->vss_queues[vq->vq_num - 2]; while (vq_has_descs(vq)) { - n = vq_getchain(vq, iov, VTSCSI_MAXSEG, &vireq); + req = calloc(1, sizeof(struct pci_vtscsi_request)); + + n = vq_getchain(vq, req->vsr_iov, VTSCSI_MAXSEG, &vireq); assert(n >= 1 && n <= VTSCSI_MAXSEG); - req = calloc(1, sizeof(struct pci_vtscsi_request)); req->vsr_idx = vireq.idx; req->vsr_queue = q; + req->vsr_iov_in = &req->vsr_iov[0]; req->vsr_niov_in = vireq.readable; + req->vsr_iov_out = &req->vsr_iov[vireq.readable]; req->vsr_niov_out = vireq.writable; - memcpy(req->vsr_iov_in, iov, - req->vsr_niov_in * sizeof(struct iovec)); - memcpy(req->vsr_iov_out, iov + vireq.readable, - req->vsr_niov_out * sizeof(struct iovec)); pthread_mutex_lock(&q->vsq_mtx); STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);