diff --git a/usr.sbin/bhyve/iov.h b/usr.sbin/bhyve/iov.h --- a/usr.sbin/bhyve/iov.h +++ b/usr.sbin/bhyve/iov.h @@ -3,6 +3,7 @@ * * Copyright (c) 2016 Jakub Klama . * Copyright (c) 2018 Alexander Motin + * Copyright (c) 2026 Hans Rosenfeld * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,12 +32,15 @@ #ifndef _IOV_H_ #define _IOV_H_ -void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, - int *niov2, size_t seek); -void truncate_iov(struct iovec *iov, int *niov, size_t length); -size_t count_iov(const struct iovec *iov, int niov); -ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); -ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, - int niov, size_t seek); +#include + +/* Number of additional iovecs required for split_iov() */ +#define SPLIT_IOV_ADDL_IOV 2 + +struct iovec *split_iov(struct iovec *, size_t *, size_t, size_t *); +size_t count_iov(const struct iovec *, size_t); +bool check_iov_len(const struct iovec *, size_t, size_t); +ssize_t iov_to_buf(const struct iovec *, size_t, void **); +ssize_t buf_to_iov(const void *, size_t, const struct iovec *, size_t); #endif /* _IOV_H_ */ diff --git a/usr.sbin/bhyve/iov.c b/usr.sbin/bhyve/iov.c --- a/usr.sbin/bhyve/iov.c +++ b/usr.sbin/bhyve/iov.c @@ -3,6 +3,7 @@ * * Copyright (c) 2016 Jakub Klama . * Copyright (c) 2018 Alexander Motin + * Copyright (c) 2026 Hans Rosenfeld * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,70 +33,88 @@ #include #include +#include +#include +#include #include #include #include "iov.h" -void -seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, int *niov2, - size_t seek) -{ - size_t remainder = 0; - size_t left = seek; - int i, j; - - for (i = 0; i < niov1; i++) { - size_t toseek = MIN(left, iov1[i].iov_len); - left -= toseek; - if (toseek == iov1[i].iov_len) - continue; +/* + * Given an iovec and an offset, split the iovec into two at the offset and + * return a pointer to the beginning of the second iovec. + * + * The caller is responsible for providing an extra iovec in the array for the + * split. That is, there should be space for *niov1 + 1 iovecs in the array. + */ +struct iovec * +split_iov(struct iovec *iov, size_t *niov1, size_t offset, size_t *niov2) +{ + size_t count, resid; - if (left == 0) { - remainder = toseek; + /* Find the iovec entry that contains the offset. */ + resid = offset; + for (count = 0; count < *niov1; count++) { + if (resid < iov[count].iov_len) break; - } + resid -= iov[count].iov_len; } - for (j = i; j < niov1; j++) { - iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder; - iov2[j - i].iov_len = iov1[j].iov_len - remainder; - remainder = 0; + if (resid == 0 || count == *niov1) { + /* Easy case, we don't have to split an iovec entry. */ + *niov2 = *niov1 - count; + *niov1 = count; + if (*niov2 == 0) + return (NULL); + return (&iov[count]); } - *niov2 = j - i; + /* The entry iov[count] needs to be split. */ + *niov1 = count + 1; + *niov2 = *niov1 - count; + memmove(&iov[count + 1], &iov[count], sizeof(struct iovec) * (*niov2)); + iov[count].iov_len = resid; + iov[count + 1].iov_base = (char *)iov[count].iov_base + resid; + iov[count + 1].iov_len -= resid; + return (&iov[count + 1]); } size_t -count_iov(const struct iovec *iov, int niov) +count_iov(const struct iovec *iov, size_t niov) { size_t total = 0; - int i; + size_t i; - for (i = 0; i < niov; i++) + for (i = 0; i < niov; i++) { + assert(total <= SIZE_MAX - iov[i].iov_len); total += iov[i].iov_len; + } return (total); } -void -truncate_iov(struct iovec *iov, int *niov, size_t length) +bool +check_iov_len(const struct iovec *iov, size_t niov, size_t len) { - int i; + size_t total = 0; + size_t i; - for (i = 0; i < *niov && length > 0; i++) { - if (length < iov[i].iov_len) - iov[i].iov_len = length; - length -= iov[i].iov_len; + for (i = 0; i < niov; i++) { + assert(total <= SIZE_MAX - iov[i].iov_len); + total += iov[i].iov_len; + if (total >= len) + return (true); } - *niov = i; + + return (false); } ssize_t -iov_to_buf(const struct iovec *iov, int niov, void **buf) +iov_to_buf(const struct iovec *iov, size_t niov, void **buf) { size_t ptr, total; - int i; + size_t i; total = count_iov(iov, niov); *buf = realloc(*buf, total); @@ -111,21 +130,10 @@ } ssize_t -buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov, - size_t seek) +buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, size_t niov) { - struct iovec *diov; size_t off = 0, len; - int i; - - if (seek > 0) { - int ndiov; - - diov = malloc(sizeof(struct iovec) * niov); - seek_iov(iov, niov, diov, &ndiov, seek); - iov = diov; - niov = ndiov; - } + size_t i; for (i = 0; i < niov && off < buflen; i++) { len = MIN(iov[i].iov_len, buflen - off); @@ -133,9 +141,5 @@ off += len; } - if (seek > 0) - free(diov); - return ((ssize_t)off); } - diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c --- a/usr.sbin/bhyve/net_backends.c +++ b/usr.sbin/bhyve/net_backends.c @@ -198,7 +198,7 @@ * we read it from there. */ ret = buf_to_iov(priv->bbuf, priv->bbuflen, - iov, iovcnt, 0); + iov, iovcnt); /* Mark the bounce buffer as empty. */ priv->bbuflen = 0; diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c --- a/usr.sbin/bhyve/pci_virtio_scsi.c +++ b/usr.sbin/bhyve/pci_virtio_scsi.c @@ -3,6 +3,7 @@ * * Copyright (c) 2016 Jakub Klama . * Copyright (c) 2018 Marcelo Araujo . + * Copyright (c) 2026 Hans Rosenfeld * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -120,10 +121,11 @@ struct pci_vtscsi_request { struct pci_vtscsi_queue * vsr_queue; - struct iovec vsr_iov_in[VTSCSI_MAXSEG]; - int vsr_niov_in; - struct iovec vsr_iov_out[VTSCSI_MAXSEG]; - int vsr_niov_out; + struct iovec vsr_iov[VTSCSI_MAXSEG + SPLIT_IOV_ADDL_IOV]; + struct iovec * vsr_iov_in; + struct iovec * vsr_iov_out; + size_t vsr_niov_in; + size_t vsr_niov_out; uint32_t vsr_idx; STAILQ_ENTRY(pci_vtscsi_request) vsr_link; }; @@ -230,13 +232,13 @@ static int pci_vtscsi_cfgread(void *, int, int, uint32_t *); static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t); static inline int pci_vtscsi_get_lun(uint8_t *); -static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); -static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_tmf *); -static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *, +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_an *); static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *, - int, struct iovec *, int); + size_t, struct iovec *, size_t); static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *); static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *); static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *); @@ -353,7 +355,7 @@ return (((lun[2] << 8) | lun[3]) & 0x3fff); } -static int +static void pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, size_t bufsize) { @@ -363,7 +365,7 @@ if (bufsize < sizeof(uint32_t)) { WPRINTF("ignoring truncated control request"); - return (0); + return; } type = *(uint32_t *)buf; @@ -371,25 +373,21 @@ if (type == VIRTIO_SCSI_T_TMF) { if (bufsize != sizeof(*tmf)) { WPRINTF("ignoring tmf request with size %zu", bufsize); - return (0); + return; } tmf = (struct pci_vtscsi_ctrl_tmf *)buf; - return (pci_vtscsi_tmf_handle(sc, tmf)); - } - - if (type == VIRTIO_SCSI_T_AN_QUERY) { + pci_vtscsi_tmf_handle(sc, tmf); + } else if (type == VIRTIO_SCSI_T_AN_QUERY) { if (bufsize != sizeof(*an)) { WPRINTF("ignoring AN request with size %zu", bufsize); - return (0); + return; } an = (struct pci_vtscsi_ctrl_an *)buf; - return (pci_vtscsi_an_handle(sc, an)); + pci_vtscsi_an_handle(sc, an); } - - return (0); } -static int +static void pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, struct pci_vtscsi_ctrl_tmf *tmf) { @@ -454,46 +452,73 @@ tmf->response = io->taskio.task_status; ctl_scsi_free_io(io); - return (1); } -static int +static void pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc __unused, struct pci_vtscsi_ctrl_an *an __unused) { - return (0); } static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, - int niov_in, struct iovec *iov_out, int niov_out) + size_t niov_in, struct iovec *iov_out, size_t niov_out) { struct pci_vtscsi_softc *sc = q->vsq_sc; struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL; struct pci_vtscsi_req_cmd_wr *cmd_wr; - struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; + struct iovec *data_iov_in, *data_iov_out; union ctl_io *io; - int data_niov_in, data_niov_out; + size_t data_niov_in, data_niov_out; void *ext_data_ptr = NULL; uint32_t ext_data_len = 0, ext_sg_entries = 0; int err, nxferred; - if (count_iov(iov_out, niov_out) < VTSCSI_OUT_HEADER_LEN(sc)) { + /* + * Make sure we got at least enough space for the VirtIO-SCSI + * command headers. If not, return this request immediately. + */ + if (check_iov_len(iov_out, niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with insufficient output"); return (0); } - if (count_iov(iov_in, niov_in) < VTSCSI_IN_HEADER_LEN(sc)) { + + if (check_iov_len(iov_in, niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc)) == false) { WPRINTF("ignoring request with incomplete header"); return (0); } - seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in, - VTSCSI_IN_HEADER_LEN(sc)); - seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, - VTSCSI_OUT_HEADER_LEN(sc)); + /* + * We have to split the iovec array into a header and data portion each + * for input and output. + * + * We need to start with the output section (at the end of iov) in case + * the iovec covering the final part of the output header needs to be + * split, in which case split_iov() will move all reamaining iovecs up + * by one to make room for a new iovec covering the first part of the + * output data portion. + */ + data_iov_out = split_iov(iov_out, &niov_out, + VTSCSI_OUT_HEADER_LEN(q->vsq_sc), &data_niov_out); + + /* + * Similarly, to not overwrite the first iovec of the output section, + * the 2nd call to split_iov() to split the input section must actually + * cover the entire iovec array (both input and the already split output + * sections). + */ + niov_in += niov_out + data_niov_out; + + data_iov_in = split_iov(iov_in, &niov_in, + VTSCSI_IN_HEADER_LEN(q->vsq_sc), &data_niov_in); + + /* + * And of course we now have to adjust data_niov_in accordingly. + */ + data_niov_in -= niov_out + data_niov_out; - truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); - truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); cmd_wr = calloc(1, VTSCSI_OUT_HEADER_LEN(sc)); @@ -564,7 +589,7 @@ cmd_wr->sense_len); } - buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0); + buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out); nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled; free(cmd_rd); free(cmd_wr); @@ -580,7 +605,7 @@ struct vi_req req; void *buf = NULL; size_t bufsize; - int iolen, n; + int n; sc = vsc; @@ -589,14 +614,13 @@ assert(n >= 1 && n <= VTSCSI_MAXSEG); bufsize = iov_to_buf(iov, n, &buf); - iolen = pci_vtscsi_control_handle(sc, buf, bufsize); - buf_to_iov((uint8_t *)buf + bufsize - iolen, iolen, iov, n, - bufsize - iolen); + pci_vtscsi_control_handle(sc, buf, bufsize); + buf_to_iov((uint8_t *)buf, bufsize, iov, n); /* * Release this chain and handle more */ - vq_relchain(vq, req.idx, iolen); + vq_relchain(vq, req.idx, bufsize); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ free(buf); @@ -614,7 +638,6 @@ struct pci_vtscsi_softc *sc; struct pci_vtscsi_queue *q; struct pci_vtscsi_request *req; - struct iovec iov[VTSCSI_MAXSEG]; struct vi_req vireq; int n; @@ -622,18 +645,17 @@ q = &sc->vss_queues[vq->vq_num - 2]; while (vq_has_descs(vq)) { - n = vq_getchain(vq, iov, VTSCSI_MAXSEG, &vireq); + req = calloc(1, sizeof(struct pci_vtscsi_request)); + + n = vq_getchain(vq, req->vsr_iov, VTSCSI_MAXSEG, &vireq); assert(n >= 1 && n <= VTSCSI_MAXSEG); - req = calloc(1, sizeof(struct pci_vtscsi_request)); req->vsr_idx = vireq.idx; req->vsr_queue = q; + req->vsr_iov_in = &req->vsr_iov[0]; req->vsr_niov_in = vireq.readable; + req->vsr_iov_out = &req->vsr_iov[vireq.readable]; req->vsr_niov_out = vireq.writable; - memcpy(req->vsr_iov_in, iov, - req->vsr_niov_in * sizeof(struct iovec)); - memcpy(req->vsr_iov_out, iov + vireq.readable, - req->vsr_niov_out * sizeof(struct iovec)); pthread_mutex_lock(&q->vsq_mtx); STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link);