Page MenuHomeFreeBSD

D20276.id57769.diff
No OneTemporary

D20276.id57769.diff

Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -73,6 +73,8 @@
#define VTNET_MAXSEGS 256
+#define VTNET_MIN_AVAIL_DESC 64
+
/*
* Host capabilities. Note that we only offer a few of these.
*/
@@ -392,85 +394,107 @@
}
static __inline int
-pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
{
- int r, i;
- int len = 0;
+ char *buf;
+ int i;
+ int frag_size;
+ int iov_off;
+ int len;
+ int nm_off;
+ int nm_buf_size;
- for (r = nmd->cur_tx_ring; ; ) {
- struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
- uint32_t cur, idx;
- char *buf;
+ struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, nmd->cur_tx_ring);
- if (nm_ring_empty(ring)) {
- r++;
- if (r > nmd->last_tx_ring)
- r = nmd->first_tx_ring;
- if (r == nmd->cur_tx_ring)
- break;
- continue;
+ if ((nm_ring_space(ring) * ring->nr_buf_size) < iovsize) {
+ /*
+ * No more avail space in TX ring, try to flush it.
+ */
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+ return (0);
+ }
+
+ i = ring->cur;
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+ iov_off = 0;
+ len = iovsize;
+ nm_buf_size = ring->nr_buf_size;
+ nm_off = 0;
+
+ while (iovsize) {
+
+ if (unlikely(iov_off == iov->iov_len)) {
+ iov++;
+ iov_off = 0;
}
- cur = ring->cur;
- idx = ring->slot[cur].buf_idx;
- buf = NETMAP_BUF(ring, idx);
- for (i = 0; i < iovcnt; i++) {
- if (len + iov[i].iov_len > 2048)
- break;
- memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
- len += iov[i].iov_len;
+ if (unlikely(nm_off == nm_buf_size)) {
+ ring->slot[i].flags = NS_MOREFRAG;
+ i = nm_ring_next(ring, i);
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+ nm_off = 0;
}
- ring->slot[cur].len = len;
- ring->head = ring->cur = nm_ring_next(ring, cur);
- nmd->cur_tx_ring = r;
- ioctl(nmd->fd, NIOCTXSYNC, NULL);
- break;
+
+ frag_size = MIN(nm_buf_size - nm_off, iov->iov_len - iov_off);
+ memcpy(buf + nm_off, iov->iov_base + iov_off, frag_size);
+
+ iovsize -= frag_size;
+ iov_off += frag_size;
+ nm_off += frag_size;
+
+ ring->slot[i].len = nm_off;
}
+ /* The last slot must not have NS_MOREFRAG set. */
+ ring->slot[i].flags &= ~NS_MOREFRAG;
+ ring->head = ring->cur = nm_ring_next(ring, i);
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+
return (len);
}
static __inline int
-pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
{
- int len = 0;
- int i = 0;
- int r;
+ char *buf;
+ int i;
+ int iov_off;
+ int frag_size;
+ int len;
+ int nm_off;
- for (r = nmd->cur_rx_ring; ; ) {
- struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
- uint32_t cur, idx;
- char *buf;
- size_t left;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
- if (nm_ring_empty(ring)) {
- r++;
- if (r > nmd->last_rx_ring)
- r = nmd->first_rx_ring;
- if (r == nmd->cur_rx_ring)
- break;
- continue;
+ i = r->head;
+ buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+ iov_off = 0;
+ nm_off = 0;
+ len = iovsize;
+
+ while (iovsize) {
+
+ if (unlikely(iov_off == iov->iov_len)) {
+ iov++;
+ iov_off = 0;
}
- cur = ring->cur;
- idx = ring->slot[cur].buf_idx;
- buf = NETMAP_BUF(ring, idx);
- left = ring->slot[cur].len;
- for (i = 0; i < iovcnt && left > 0; i++) {
- if (iov[i].iov_len > left)
- iov[i].iov_len = left;
- memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
- len += iov[i].iov_len;
- left -= iov[i].iov_len;
+ if (unlikely(nm_off == r->slot[i].len)) {
+ i = nm_ring_next(r, i);
+ buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+ nm_off = 0;
}
- ring->head = ring->cur = nm_ring_next(ring, cur);
- nmd->cur_rx_ring = r;
- ioctl(nmd->fd, NIOCRXSYNC, NULL);
- break;
+
+ frag_size = MIN(r->slot[i].len - nm_off, iov->iov_len - iov_off);
+ memcpy(iov->iov_base + iov_off, buf + nm_off, frag_size);
+
+ iovsize -= frag_size;
+ iov_off += frag_size;
+ nm_off += frag_size;
}
- for (; i < iovcnt; i++)
- iov[i].iov_len = 0;
+ r->head = r->cur = nm_ring_next(r, i);
+ ioctl(nmd->fd, NIOCRXSYNC, NULL);
+
return (len);
}
@@ -481,32 +505,102 @@
pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
int len)
{
- static char pad[60]; /* all zero bytes */
-
if (sc->vsc_nmd == NULL)
return;
- /*
- * If the length is < 60, pad out to that and add the
- * extra zero'd segment to the iov. It is guaranteed that
- * there is always an extra iov available by the caller.
- */
- if (len < 60) {
- iov[iovcnt].iov_base = pad;
- iov[iovcnt].iov_len = 60 - len;
- iovcnt++;
+ (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt, len);
+}
+
+static __inline int
+vq_avail_to_iovec(struct vqueue_info *vq, struct iovec *iov, int len, int start,
+ int minavail)
+{
+ int idx;
+ uint16_t mask = vq->vq_qsize - 1;
+ volatile struct virtio_desc *vdir;
+ struct vmctx *ctx = vq->vq_vs->vs_pi->pi_vmctx;
+
+ uint16_t ndesc = (uint16_t)(vq->vq_avail->va_idx - vq->vq_last_avail - start);
+
+ if (ndesc < minavail)
+ return (0);
+
+ int off = 0;
+ int uidx = vq->vq_used->vu_idx + start;
+
+ for (int i = 0; i < ndesc; i++) {
+ idx = vq->vq_avail->va_ring[(vq->vq_last_avail + i + start) & mask];
+ vdir = &vq->vq_desc[idx];
+
+ iov[i].iov_base = paddr_guest2host(ctx,
+ vdir->vd_addr, vdir->vd_len);
+ iov[i].iov_len = vdir->vd_len;
+
+ off += vdir->vd_len;
+
+ vq->vq_used->vu_ring[uidx & mask].vu_idx = idx;
+ vq->vq_used->vu_ring[uidx & mask].vu_tlen =
+ (off >= len) ? vdir->vd_len - (off - len) : vdir->vd_len;
+
+ uidx++;
+
+ if (off >= len) {
+ return (i + 1);
+ }
}
- (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+
+ return (0);
}
+static __inline void
+vq_inc_used_idx_and_last_avail(struct vqueue_info *vq, int n)
+{
+ if (n > 0) {
+ vq->vq_last_avail += n;
+ vq->vq_used->vu_idx += n;
+ }
+}
+
+static __inline int
+netmap_next_pkt_len(struct nm_desc *nmd)
+{
+ int i;
+ int len;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+ len = 0;
+
+ for (i = r->head; i != r->tail; i = nm_ring_next(r, i)) {
+ len += r->slot[i].len;
+ if (!(r->slot[i].flags & NS_MOREFRAG))
+ break;
+ }
+
+ return (len);
+}
+
+static __inline void
+netmap_drop_pkt(struct nm_desc *nmd)
+{
+ int i;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+ for (i = r->head; i != r->tail; i = nm_ring_next(r, i)) {
+ if (!(r->slot[i].flags & NS_MOREFRAG)) {
+ r->head = r->cur = nm_ring_next(r, i);
+ return;
+ }
+ }
+}
+
static void
pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
{
- struct iovec iov[VTNET_MAXSEGS], *riov;
+ struct iovec iov[VTNET_RINGSZ], *riov;
struct vqueue_info *vq;
- void *vrx;
- int len, n;
- uint16_t idx;
+ int len;
+ int n;
+ int used;
/*
* Should never be called without a valid netmap descriptor
@@ -517,11 +611,11 @@
* But, will be called when the rx ring hasn't yet
* been set up or the guest is resetting the device.
*/
- if (!sc->vsc_rx_ready || sc->resetting) {
+ if (unlikely((!sc->vsc_rx_ready || sc->resetting))) {
/*
* Drop the packet and try later.
*/
- (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ netmap_drop_pkt(sc->vsc_nmd);
return;
}
@@ -529,63 +623,54 @@
* Check for available rx buffers
*/
vq = &sc->vsc_queues[VTNET_RXQ];
- if (!vq_has_descs(vq)) {
+ if (unlikely(!vq_has_descs(vq))) {
/*
* Drop the packet and try later. Interrupt on
* empty, if that's negotiated.
*/
- (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ netmap_drop_pkt(sc->vsc_nmd);
vq_endchains(vq, 1);
return;
}
+ used = 0;
+
do {
- /*
- * Get descriptor chain.
- */
- n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
- assert(n >= 1 && n <= VTNET_MAXSEGS);
+ len = netmap_next_pkt_len(sc->vsc_nmd);
- /*
- * Get a pointer to the rx header, and use the
- * data immediately following it for the packet buffer.
- */
- vrx = iov[0].iov_base;
- riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+ if (unlikely(len == 0)) {
+ vq_inc_used_idx_and_last_avail(vq, used);
+ vq_endchains(vq, 0);
+ return;
+ }
- len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
+ n = vq_avail_to_iovec(vq, iov, len + sc->rx_vhdrlen, used,
+ VTNET_MIN_AVAIL_DESC);
- if (len == 0) {
- /*
- * No more packets, but still some avail ring
- * entries. Interrupt if needed/appropriate.
- */
- vq_retchain(vq);
+ if (unlikely(n == 0)) {
+ vq_inc_used_idx_and_last_avail(vq, used);
vq_endchains(vq, 0);
return;
}
- /*
- * The only valid field in the rx packet header is the
- * number of buffers if merged rx bufs were negotiated.
- */
- memset(vrx, 0, sc->rx_vhdrlen);
-
if (sc->rx_merge) {
- struct virtio_net_rxhdr *vrxh;
-
- vrxh = vrx;
- vrxh->vrh_bufs = 1;
+ struct virtio_net_rxhdr *vrxh = iov[0].iov_base;
+ memset(vrxh, 0, sc->rx_vhdrlen);
+ vrxh->vrh_bufs = n;
}
- /*
- * Release this chain and handle more chains.
- */
- vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+ riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+
+ (void)pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n, len);
+
+ used += n;
+
} while (vq_has_descs(vq));
- /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+ vq_inc_used_idx_and_last_avail(vq, used);
vq_endchains(vq, 1);
+
+ return;
}
static void

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 15, 6:03 PM (6 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28751217
Default Alt Text
D20276.id57769.diff (9 KB)

Event Timeline