Index: head/usr.sbin/bhyve/net_backends.c =================================================================== --- head/usr.sbin/bhyve/net_backends.c +++ head/usr.sbin/bhyve/net_backends.c @@ -328,7 +328,8 @@ #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ - VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO) + VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \ + VIRTIO_NET_F_MRG_RXBUF) struct netmap_priv { char ifname[IFNAMSIZ]; Index: head/usr.sbin/bhyve/pci_virtio_console.c =================================================================== --- head/usr.sbin/bhyve/pci_virtio_console.c +++ head/usr.sbin/bhyve/pci_virtio_console.c @@ -423,7 +423,7 @@ len = readv(sock->vss_conn_fd, &iov, n); if (len == 0 || (len < 0 && errno == EWOULDBLOCK)) { - vq_retchain(vq); + vq_retchains(vq, 1); vq_endchains(vq, 0); if (len == 0) goto close; Index: head/usr.sbin/bhyve/pci_virtio_net.c =================================================================== --- head/usr.sbin/bhyve/pci_virtio_net.c +++ head/usr.sbin/bhyve/pci_virtio_net.c @@ -58,11 +58,14 @@ #include "virtio.h" #include "net_utils.h" #include "net_backends.h" +#include "iov.h" #define VTNET_RINGSZ 1024 #define VTNET_MAXSEGS 256 +#define VTNET_MAX_PKT_LEN (65536 + 64) + #define VTNET_S_HOSTCAPS \ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) @@ -170,59 +173,119 @@ pthread_mutex_unlock(&sc->rx_mtx); } +struct virtio_mrg_rxbuf_info { + uint16_t idx; + uint16_t pad; + uint32_t len; +}; + static void pci_vtnet_rx(struct pci_vtnet_softc *sc) { + struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; struct iovec iov[VTNET_MAXSEGS + 1]; struct vqueue_info *vq; - int len, n; - uint16_t idx; + uint32_t cur_iov_bytes; + struct iovec *cur_iov; + uint16_t cur_iov_len; + uint32_t ulen; + int n_chains; + int len; vq = &sc->vsc_queues[VTNET_RXQ]; for (;;) { /* - * Check for available rx buffers. + * Get a descriptor chain to store the next ingress + * packet. In case of mergeable rx buffers, get as + * many chains as necessary in order to make room + * for a maximum sized LRO packet. */ - if (!vq_has_descs(vq)) { - /* No rx buffers. Enable RX kicks and double check. */ - vq_kick_enable(vq); - if (!vq_has_descs(vq)) { + cur_iov_bytes = 0; + cur_iov_len = 0; + cur_iov = iov; + n_chains = 0; + do { + int n = vq_getchain(vq, &info[n_chains].idx, cur_iov, + VTNET_MAXSEGS - cur_iov_len, NULL); + + if (n == 0) { /* - * Still no buffers. Interrupt if needed - * (including for NOTIFY_ON_EMPTY), and - * disable the backend until the next kick. + * No rx buffers. Enable RX kicks and double + * check. */ - vq_endchains(vq, /*used_all_avail=*/1); - netbe_rx_disable(sc->vsc_be); - return; + vq_kick_enable(vq); + if (!vq_has_descs(vq)) { + /* + * Still no buffers. Return the unused + * chains (if any), interrupt if needed + * (including for NOTIFY_ON_EMPTY), and + * disable the backend until the next + * kick. + */ + vq_retchains(vq, n_chains); + vq_endchains(vq, /*used_all_avail=*/1); + netbe_rx_disable(sc->vsc_be); + return; + } + + /* More rx buffers found, so keep going. */ + vq_kick_disable(vq); + continue; } + assert(n >= 1 && cur_iov_len + n <= VTNET_MAXSEGS); + cur_iov_len += n; + if (!sc->rx_merge) { + n_chains = 1; + break; + } + info[n_chains].len = (uint32_t)count_iov(cur_iov, n); + cur_iov_bytes += info[n_chains].len; + cur_iov += n; + n_chains++; + } while (cur_iov_bytes < VTNET_MAX_PKT_LEN && + cur_iov_len < VTNET_MAXSEGS); - /* More rx buffers found, so keep going. */ - vq_kick_disable(vq); - } + len = netbe_recv(sc->vsc_be, iov, cur_iov_len); - /* - * Get descriptor chain. - */ - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); - assert(n >= 1 && n <= VTNET_MAXSEGS); - - len = netbe_recv(sc->vsc_be, iov, n); - if (len <= 0) { /* * No more packets (len == 0), or backend errored * (err < 0). Return unused available buffers * and stop. */ - vq_retchain(vq); + vq_retchains(vq, n_chains); /* Interrupt if needed/appropriate and stop. */ vq_endchains(vq, /*used_all_avail=*/0); return; } - /* Publish the info to the guest */ - vq_relchain(vq, idx, (uint32_t)len); + ulen = (uint32_t)len; /* avoid too many casts below */ + + /* Publish the used buffers to the guest. */ + if (!sc->rx_merge) { + vq_relchain(vq, info[0].idx, ulen); + } else { + struct virtio_net_rxhdr *hdr = iov[0].iov_base; + uint32_t iolen; + int i = 0; + + assert(iov[0].iov_len >= sizeof(*hdr)); + + do { + iolen = info[i].len; + if (iolen > ulen) { + iolen = ulen; + } + vq_relchain_prepare(vq, info[i].idx, iolen); + ulen -= iolen; + i++; + assert(i <= n_chains); + } while (ulen > 0); + + hdr->vrh_bufs = i; + vq_relchain_publish(vq); + vq_retchains(vq, n_chains - i); + } } } Index: head/usr.sbin/bhyve/virtio.h =================================================================== --- head/usr.sbin/bhyve/virtio.h +++ head/usr.sbin/bhyve/virtio.h @@ -392,6 +392,7 @@ uint16_t vq_flags; /* flags (see above) */ uint16_t vq_last_avail; /* a recent value of vq_avail->va_idx */ + uint16_t vq_next_used; /* index of the next used slot to be filled */ uint16_t vq_save_used; /* saved vq_used->vu_idx; see vq_endchains */ uint16_t vq_msix_idx; /* MSI-X index, or VIRTIO_MSI_NO_VECTOR */ @@ -479,7 +480,10 @@ int vq_getchain(struct vqueue_info *vq, uint16_t *pidx, struct iovec *iov, int n_iov, uint16_t *flags); -void vq_retchain(struct vqueue_info *vq); +void vq_retchains(struct vqueue_info *vq, uint16_t n_chains); +void vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, + uint32_t iolen); +void vq_relchain_publish(struct vqueue_info *vq); void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen); void vq_endchains(struct vqueue_info *vq, int used_all_avail); Index: head/usr.sbin/bhyve/virtio.c =================================================================== --- head/usr.sbin/bhyve/virtio.c +++ head/usr.sbin/bhyve/virtio.c @@ -102,6 +102,7 @@ for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) { vq->vq_flags = 0; vq->vq_last_avail = 0; + vq->vq_next_used = 0; vq->vq_save_used = 0; vq->vq_pfn = 0; vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR; @@ -199,6 +200,7 @@ /* Mark queue as allocated, and start at 0 when we use it. */ vq->vq_flags = VQ_ALLOC; vq->vq_last_avail = 0; + vq->vq_next_used = 0; vq->vq_save_used = 0; } @@ -279,7 +281,7 @@ * the guest has written are valid (including all their * vd_next fields and vd_flags). * - * Compute (last_avail - va_idx) in integers mod 2**16. This is + * Compute (va_idx - last_avail) in integers mod 2**16. This is * the number of descriptors the device has made available * since the last time we updated vq->vq_last_avail. * @@ -382,38 +384,30 @@ } /* - * Return the currently-first request chain back to the available queue. + * Return the first n_chain request chains back to the available queue. * - * (This chain is the one you handled when you called vq_getchain() + * (These chains are the ones you handled when you called vq_getchain() * and used its positive return value.) */ void -vq_retchain(struct vqueue_info *vq) +vq_retchains(struct vqueue_info *vq, uint16_t n_chains) { - vq->vq_last_avail--; + vq->vq_last_avail -= n_chains; } -/* - * Return specified request chain to the guest, setting its I/O length - * to the provided value. - * - * (This chain is the one you handled when you called vq_getchain() - * and used its positive return value.) - */ void -vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) +vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) { - uint16_t uidx, mask; volatile struct vring_used *vuh; volatile struct virtio_used *vue; + uint16_t mask; /* * Notes: * - mask is N-1 where N is a power of 2 so computes x % N * - vuh points to the "used" data shared with guest * - vue points to the "used" ring entry we want to update - * - head is the same value we compute in vq_iovecs(). * * (I apologize for the two fields named vu_idx; the * virtio spec calls the one that vue points to, "id"...) @@ -421,18 +415,35 @@ mask = vq->vq_qsize - 1; vuh = vq->vq_used; - uidx = vuh->vu_idx; - vue = &vuh->vu_ring[uidx++ & mask]; + vue = &vuh->vu_ring[vq->vq_next_used++ & mask]; vue->vu_idx = idx; vue->vu_tlen = iolen; +} +void +vq_relchain_publish(struct vqueue_info *vq) +{ /* * Ensure the used descriptor is visible before updating the index. * This is necessary on ISAs with memory ordering less strict than x86 * (and even on x86 to act as a compiler barrier). */ atomic_thread_fence_rel(); - vuh->vu_idx = uidx; + vq->vq_used->vu_idx = vq->vq_next_used; +} + +/* + * Return specified request chain to the guest, setting its I/O length + * to the provided value. + * + * (This chain is the one you handled when you called vq_getchain() + * and used its positive return value.) + */ +void +vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) +{ + vq_relchain_prepare(vq, idx, iolen); + vq_relchain_publish(vq); } /*