Index: usr.sbin/bhyve/pci_virtio_net.c =================================================================== --- usr.sbin/bhyve/pci_virtio_net.c +++ usr.sbin/bhyve/pci_virtio_net.c @@ -392,85 +392,106 @@ } static __inline int -pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) +pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize) { - int r, i; - int len = 0; + char *buf; + int i; + int frag_size; + int iov_off; + int len; + int nm_off; + int nm_buf_size; - for (r = nmd->cur_tx_ring; ; ) { - struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); - uint32_t cur, idx; - char *buf; + struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, nmd->cur_tx_ring); - if (nm_ring_empty(ring)) { - r++; - if (r > nmd->last_tx_ring) - r = nmd->first_tx_ring; - if (r == nmd->cur_tx_ring) - break; - continue; + if ((nm_ring_space(ring) * ring->nr_buf_size) < iovsize) { + /* + * No more avail space in TX ring, try to flush it. + */ + ioctl(nmd->fd, NIOCTXSYNC, NULL); + return (0); + } + + i = ring->cur; + buf = NETMAP_BUF(ring, ring->slot[i].buf_idx); + iov_off = 0; + len = iovsize; + nm_buf_size = ring->nr_buf_size; + nm_off = 0; + + while (iovsize) { + + if (unlikely(iov_off == iov->iov_len)) { + iov++; + iov_off = 0; } - cur = ring->cur; - idx = ring->slot[cur].buf_idx; - buf = NETMAP_BUF(ring, idx); - for (i = 0; i < iovcnt; i++) { - if (len + iov[i].iov_len > 2048) - break; - memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); - len += iov[i].iov_len; + if (unlikely(nm_off == nm_buf_size)) { + ring->slot[i].flags = NS_MOREFRAG; + i = nm_ring_next(ring, i); + buf = NETMAP_BUF(ring, ring->slot[i].buf_idx); + nm_off = 0; } - ring->slot[cur].len = len; - ring->head = ring->cur = nm_ring_next(ring, cur); - nmd->cur_tx_ring = r; - ioctl(nmd->fd, NIOCTXSYNC, NULL); - break; + + frag_size = MIN(nm_buf_size - nm_off, iov->iov_len - iov_off); + memcpy(buf + nm_off, iov->iov_base + iov_off, frag_size); + + iovsize -= frag_size; + iov_off += frag_size; + nm_off += frag_size; + + ring->slot[i].len = nm_off; } + /* The last slot must not have NS_MOREFRAG set. */ + ring->slot[i].flags &= ~NS_MOREFRAG; + ring->head = ring->cur = nm_ring_next(ring, i); + ioctl(nmd->fd, NIOCTXSYNC, NULL); + return (len); } static __inline int -pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt) +pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize) { - int len = 0; - int i = 0; - int r; + char *buf; + int i; + int iov_off; + int frag_size; + int len; + int nm_off; - for (r = nmd->cur_rx_ring; ; ) { - struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r); - uint32_t cur, idx; - char *buf; - size_t left; + struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring); - if (nm_ring_empty(ring)) { - r++; - if (r > nmd->last_rx_ring) - r = nmd->first_rx_ring; - if (r == nmd->cur_rx_ring) - break; - continue; + i = r->head; + buf = NETMAP_BUF(r, r->slot[i].buf_idx); + iov_off = 0; + nm_off = 0; + len = iovsize; + + while (iovsize) { + + if (unlikely(iov_off == iov->iov_len)) { + iov++; + iov_off = 0; } - cur = ring->cur; - idx = ring->slot[cur].buf_idx; - buf = NETMAP_BUF(ring, idx); - left = ring->slot[cur].len; - for (i = 0; i < iovcnt && left > 0; i++) { - if (iov[i].iov_len > left) - iov[i].iov_len = left; - memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len); - len += iov[i].iov_len; - left -= iov[i].iov_len; + if (unlikely(nm_off == r->slot[i].len)) { + i = nm_ring_next(r, i); + buf = NETMAP_BUF(r, r->slot[i].buf_idx); + nm_off = 0; } - ring->head = ring->cur = nm_ring_next(ring, cur); - nmd->cur_rx_ring = r; - ioctl(nmd->fd, NIOCRXSYNC, NULL); - break; + + frag_size = MIN(r->slot[i].len - nm_off, iov->iov_len - iov_off); + memcpy(iov->iov_base + iov_off, buf + nm_off, frag_size); + + iovsize -= frag_size; + iov_off += frag_size; + nm_off += frag_size; } - for (; i < iovcnt; i++) - iov[i].iov_len = 0; + r->head = r->cur = nm_ring_next(r, i); + return (len); } @@ -481,32 +502,53 @@ pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, int len) { - static char pad[60]; /* all zero bytes */ - if (sc->vsc_nmd == NULL) return; - /* - * If the length is < 60, pad out to that and add the - * extra zero'd segment to the iov. It is guaranteed that - * there is always an extra iov available by the caller. - */ - if (len < 60) { - iov[iovcnt].iov_base = pad; - iov[iovcnt].iov_len = 60 - len; - iovcnt++; + (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt, len); +} + +static __inline int +netmap_next_pkt_len(struct nm_desc *nmd) +{ + int i; + int len; + struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring); + + len = 0; + + for (i = r->head; i != r->tail; i = nm_ring_next(r, i)) { + len += r->slot[i].len; + if (!(r->slot[i].flags & NS_MOREFRAG)) + break; } - (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt); + + return (len); } +static __inline void +netmap_drop_pkt(struct nm_desc *nmd) +{ + int i; + struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring); + + for (i = r->head; i != r->tail; i = nm_ring_next(r, i)) { + if (!(r->slot[i].flags & NS_MOREFRAG)) { + r->head = r->cur = nm_ring_next(r, i); + return; + } + } +} + static void pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc) { struct iovec iov[VTNET_MAXSEGS], *riov; + struct virtio_used used[VTNET_MAXSEGS]; + struct virtio_net_rxhdr *vrxh; struct vqueue_info *vq; - void *vrx; - int len, n; uint16_t idx; + int bufs, len, n; /* * Should never be called without a valid netmap descriptor @@ -521,7 +563,7 @@ /* * Drop the packet and try later. */ - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); + netmap_drop_pkt(sc->vsc_nmd); return; } @@ -534,58 +576,67 @@ * Drop the packet and try later. Interrupt on * empty, if that's negotiated. */ - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); + netmap_drop_pkt(sc->vsc_nmd); vq_endchains(vq, 1); return; } do { - /* - * Get descriptor chain. - */ - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); - assert(n >= 1 && n <= VTNET_MAXSEGS); + len = netmap_next_pkt_len(sc->vsc_nmd); - /* - * Get a pointer to the rx header, and use the - * data immediately following it for the packet buffer. - */ - vrx = iov[0].iov_base; - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); - - len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n); - - if (len == 0) { + if (unlikely(len == 0)) { /* * No more packets, but still some avail ring * entries. Interrupt if needed/appropriate. */ - vq_retchain(vq); vq_endchains(vq, 0); return; } + if (sc->rx_merge) { + /* + * Get mergable buffers. + */ + n = vq_getbufs_mrgrx(vq, iov, VTNET_MAXSEGS, len + sc->rx_vhdrlen, + used, &bufs); + } else { + /* + * Get descriptor chain. + */ + n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); + } + + if (n <= 0) { + vq_endchains(vq, 0); + return; + } + /* - * The only valid field in the rx packet header is the - * number of buffers if merged rx bufs were negotiated. + * Get a pointer to the rx header, and use the + * data immediately following it for the packet buffer. */ - memset(vrx, 0, sc->rx_vhdrlen); + vrxh = iov[0].iov_base; + memset(vrxh, 0, sc->rx_vhdrlen); - if (sc->rx_merge) { - struct virtio_net_rxhdr *vrxh; + riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); - vrxh = vrx; - vrxh->vrh_bufs = 1; - } + (void)pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n, len); /* - * Release this chain and handle more chains. + * Release used descriptors. */ - vq_relchain(vq, idx, len + sc->rx_vhdrlen); + if (sc->rx_merge) { + vrxh->vrh_bufs = bufs; + vq_relbufs_mrgrx(vq, bufs, used); + } else { + vq_relchain(vq, idx, len + sc->rx_vhdrlen); + } + } while (vq_has_descs(vq)); /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ vq_endchains(vq, 1); + } static void Index: usr.sbin/bhyve/virtio.h =================================================================== --- usr.sbin/bhyve/virtio.h +++ usr.sbin/bhyve/virtio.h @@ -457,8 +457,12 @@ int vq_getchain(struct vqueue_info *vq, uint16_t *pidx, struct iovec *iov, int n_iov, uint16_t *flags); +int vq_getbufs_mrgrx(struct vqueue_info *vq, struct iovec *iov, + int n_iov, int len, struct virtio_used *used, int *u_cnt); void vq_retchain(struct vqueue_info *vq); void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen); +void vq_relbufs_mrgrx(struct vqueue_info *vq, int nbufs, + struct virtio_used *used); void vq_endchains(struct vqueue_info *vq, int used_all_avail); uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, Index: usr.sbin/bhyve/virtio.c =================================================================== --- usr.sbin/bhyve/virtio.c +++ usr.sbin/bhyve/virtio.c @@ -41,6 +41,7 @@ #include #include "bhyverun.h" +#include "iov.h" #include "pci_emul.h" #include "virtio.h" @@ -381,6 +382,56 @@ return (-1); } +int vq_getbufs_mrgrx(struct vqueue_info *vq, struct iovec *iov, + int n_iov, int len, struct virtio_used *used, int *u_cnt) +{ + uint16_t idx; + int i, iov_len; + int bufs, last_avail_saved, n; + int total_len; + + i = 0; + bufs = 0; + total_len = 0; + + /* + * vq_getchain() increment the last avail index. + * Save it to restore if there are no enough buffers to store packet. + */ + last_avail_saved = vq->vq_last_avail; + while (1) { + n = vq_getchain(vq, &idx, &iov[i], n_iov - i, NULL); + + if (n <= 0) { + /* Restore the last avail index. */ + vq->vq_last_avail = last_avail_saved; + *u_cnt = 0; + return (n); + } + + iov_len = count_iov(&iov[i], n); + i += n; + total_len += iov_len; + + used[bufs].vu_idx = idx; + + if (total_len < len) { + used[bufs].vu_tlen = iov_len; + bufs++; + } else { + used[bufs].vu_tlen = iov_len - (total_len - len); + bufs++; + break; + } + + }; + + *u_cnt = bufs; + + return i; + +} + /* * Return the currently-first request chain back to the available queue. * @@ -430,6 +481,39 @@ * Ensure the used descriptor is visible before updating the index. * This is necessary on ISAs with memory ordering less strict than x86. */ + atomic_thread_fence_rel(); + vuh->vu_idx = uidx; +} + +/* + * Return specified merged rx buffers to the guest, setting its I/O length. + */ +void +vq_relbufs_mrgrx(struct vqueue_info *vq, int nbufs, struct virtio_used *used) +{ + int i; + uint16_t uidx, mask; + volatile struct vring_used *vuh; + volatile struct virtio_used *vue; + + mask = vq->vq_qsize - 1; + vuh = vq->vq_used; + + uidx = vuh->vu_idx; + + if (nbufs == 1) { + vue = &vuh->vu_ring[uidx++ & mask]; + vue->vu_idx = used[0].vu_idx; + vue->vu_tlen = used[0].vu_tlen; + } else { + for (i = 0; i < nbufs; i++) { + vue = &vuh->vu_ring[(uidx + i) & mask]; + vue->vu_idx = used[i].vu_idx; + vue->vu_tlen = used[i].vu_tlen; + } + uidx += nbufs; + } + atomic_thread_fence_rel(); vuh->vu_idx = uidx; }