Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144983663
D20276.id57439.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
9 KB
Referenced Files
None
Subscribers
None
D20276.id57439.diff
View Options
Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -73,6 +73,8 @@
#define VTNET_MAXSEGS 256
+#define VTNET_MIN_AVAIL_DESC 64
+
/*
* Host capabilities. Note that we only offer a few of these.
*/
@@ -392,85 +394,107 @@
}
static __inline int
-pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
{
- int r, i;
- int len = 0;
+ char *buf;
+ int i;
+ int frag_size;
+ int iov_off;
+ int len;
+ int nm_off;
+ int nm_buf_size;
- for (r = nmd->cur_tx_ring; ; ) {
- struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
- uint32_t cur, idx;
- char *buf;
+ struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, nmd->cur_rx_ring);
- if (nm_ring_empty(ring)) {
- r++;
- if (r > nmd->last_tx_ring)
- r = nmd->first_tx_ring;
- if (r == nmd->cur_tx_ring)
- break;
- continue;
+ if ((nm_ring_space(ring) * ring->nr_buf_size) < iovsize) {
+ /*
+ * No more avail space in TX ring, try to flush it.
+ */
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+ return (0);
+ }
+
+ i = ring->cur;
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+ iov_off = 0;
+ len = iovsize;
+ nm_buf_size = ring->nr_buf_size;
+ nm_off = 0;
+
+ while (iovsize) {
+
+ if (unlikely(iov_off == iov->iov_len)) {
+ iov++;
+ iov_off = 0;
}
- cur = ring->cur;
- idx = ring->slot[cur].buf_idx;
- buf = NETMAP_BUF(ring, idx);
- for (i = 0; i < iovcnt; i++) {
- if (len + iov[i].iov_len > 2048)
- break;
- memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
- len += iov[i].iov_len;
+ if (unlikely(nm_off == nm_buf_size)) {
+ ring->slot[i].flags = NS_MOREFRAG;
+ i = nm_ring_next(ring, i);
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+ nm_off = 0;
}
- ring->slot[cur].len = len;
- ring->head = ring->cur = nm_ring_next(ring, cur);
- nmd->cur_tx_ring = r;
- ioctl(nmd->fd, NIOCTXSYNC, NULL);
- break;
+
+ frag_size = MIN(nm_buf_size - nm_off, iov->iov_len - iov_off);
+ memcpy(buf + nm_off, iov->iov_base + iov_off, frag_size);
+
+ iovsize -= frag_size;
+ iov_off += frag_size;
+ nm_off += frag_size;
+
+ ring->slot[i].len = nm_off;
}
+ /* The last slot must not have NS_MOREFRAG set. */
+ ring->slot[i].flags &= ~NS_MOREFRAG;
+ ring->head = ring->cur = nm_ring_next(ring, i);
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+
return (len);
}
static __inline int
-pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
{
- int len = 0;
- int i = 0;
- int r;
+ char *buf;
+ int i;
+ int iov_off;
+ int frag_size;
+ int len;
+ int nm_off;
- for (r = nmd->cur_rx_ring; ; ) {
- struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
- uint32_t cur, idx;
- char *buf;
- size_t left;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
- if (nm_ring_empty(ring)) {
- r++;
- if (r > nmd->last_rx_ring)
- r = nmd->first_rx_ring;
- if (r == nmd->cur_rx_ring)
- break;
- continue;
+ i = r->cur;
+ buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+ iov_off = 0;
+ nm_off = 0;
+ len = iovsize;
+
+ while (iovsize) {
+
+ if (unlikely(iov_off == iov->iov_len)) {
+ iov++;
+ iov_off = 0;
}
- cur = ring->cur;
- idx = ring->slot[cur].buf_idx;
- buf = NETMAP_BUF(ring, idx);
- left = ring->slot[cur].len;
- for (i = 0; i < iovcnt && left > 0; i++) {
- if (iov[i].iov_len > left)
- iov[i].iov_len = left;
- memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
- len += iov[i].iov_len;
- left -= iov[i].iov_len;
+ if (unlikely(nm_off == r->slot[i].len)) {
+ i = nm_ring_next(r, i);
+ buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+ nm_off = 0;
}
- ring->head = ring->cur = nm_ring_next(ring, cur);
- nmd->cur_rx_ring = r;
- ioctl(nmd->fd, NIOCRXSYNC, NULL);
- break;
+
+ frag_size = MIN(r->slot[i].len - nm_off, iov->iov_len - iov_off);
+ memcpy(iov->iov_base + iov_off, buf + nm_off, frag_size);
+
+ iovsize -= frag_size;
+ iov_off += frag_size;
+ nm_off += frag_size;
}
- for (; i < iovcnt; i++)
- iov[i].iov_len = 0;
+ r->head = r->cur = nm_ring_next(r, i);
+ ioctl(nmd->fd, NIOCRXSYNC, NULL);
+
return (len);
}
@@ -481,32 +505,102 @@
pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
int len)
{
- static char pad[60]; /* all zero bytes */
-
if (sc->vsc_nmd == NULL)
return;
- /*
- * If the length is < 60, pad out to that and add the
- * extra zero'd segment to the iov. It is guaranteed that
- * there is always an extra iov available by the caller.
- */
- if (len < 60) {
- iov[iovcnt].iov_base = pad;
- iov[iovcnt].iov_len = 60 - len;
- iovcnt++;
+ (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt, len);
+}
+
+static __inline int
+vq_avail_to_iovec(struct vqueue_info *vq, struct iovec *iov, int len, int start,
+ int minavail)
+{
+ int idx;
+ uint16_t mask = vq->vq_qsize - 1;
+ volatile struct virtio_desc *vdir;
+ struct vmctx *ctx = vq->vq_vs->vs_pi->pi_vmctx;
+
+ uint16_t ndesc = (uint16_t)(vq->vq_avail->va_idx - vq->vq_last_avail - start);
+
+ if (ndesc < minavail)
+ return (0);
+
+ int off = 0;
+ int uidx = vq->vq_used->vu_idx + start;
+
+ for (int i = 0; i < ndesc; i++) {
+ idx = vq->vq_avail->va_ring[(vq->vq_last_avail + i + start) & mask];
+ vdir = &vq->vq_desc[idx];
+
+ iov[i].iov_base = paddr_guest2host(ctx,
+ vdir->vd_addr, vdir->vd_len);
+ iov[i].iov_len = vdir->vd_len;
+
+ off += vdir->vd_len;
+
+ vq->vq_used->vu_ring[uidx & mask].vu_idx = idx;
+ vq->vq_used->vu_ring[uidx & mask].vu_tlen =
+ (off >= len) ? vdir->vd_len - (off - len) : vdir->vd_len;
+
+ uidx++;
+
+ if (off >= len) {
+ return (i + 1);
+ }
}
- (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+
+ return (0);
}
+static __inline void
+vq_inc_used_idx_and_last_avail(struct vqueue_info *vq, int n)
+{
+ if (n > 0) {
+ vq->vq_last_avail += n;
+ vq->vq_used->vu_idx += n;
+ }
+}
+
+static __inline int
+netmap_next_pkt_len(struct nm_desc *nmd)
+{
+ int i;
+ int len;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+ len = 0;
+
+ for (i = r->cur; i != r->tail; i = nm_ring_next(r, i)) {
+ len += r->slot[i].len;
+ if (!(r->slot[i].flags & NS_MOREFRAG))
+ break;
+ }
+
+ return (len);
+}
+
+static __inline void
+netmap_drop_pkt(struct nm_desc *nmd)
+{
+ int i;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+ for (i = r->cur; i != r->tail; i = nm_ring_next(r, i)) {
+ if (!(r->slot[i].flags & NS_MOREFRAG)) {
+ r->head = r->cur = nm_ring_next(r, i);
+ return;
+ }
+ }
+}
+
static void
pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
{
- struct iovec iov[VTNET_MAXSEGS], *riov;
+ struct iovec iov[VTNET_RINGSZ], *riov;
struct vqueue_info *vq;
- void *vrx;
- int len, n;
- uint16_t idx;
+ int len;
+ int n;
+ int used;
/*
* Should never be called without a valid netmap descriptor
@@ -517,11 +611,11 @@
* But, will be called when the rx ring hasn't yet
* been set up or the guest is resetting the device.
*/
- if (!sc->vsc_rx_ready || sc->resetting) {
+ if (unlikely((!sc->vsc_rx_ready || sc->resetting))) {
/*
* Drop the packet and try later.
*/
- (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ netmap_drop_pkt(sc->vsc_nmd);
return;
}
@@ -529,63 +623,54 @@
* Check for available rx buffers
*/
vq = &sc->vsc_queues[VTNET_RXQ];
- if (!vq_has_descs(vq)) {
+ if (unlikely(!vq_has_descs(vq))) {
/*
* Drop the packet and try later. Interrupt on
* empty, if that's negotiated.
*/
- (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ netmap_drop_pkt(sc->vsc_nmd);
vq_endchains(vq, 1);
return;
}
+ used = 0;
+
do {
- /*
- * Get descriptor chain.
- */
- n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
- assert(n >= 1 && n <= VTNET_MAXSEGS);
+ len = netmap_next_pkt_len(sc->vsc_nmd);
- /*
- * Get a pointer to the rx header, and use the
- * data immediately following it for the packet buffer.
- */
- vrx = iov[0].iov_base;
- riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+ if (unlikely(len == 0)) {
+ vq_inc_used_idx_and_last_avail(vq, used);
+ vq_endchains(vq, 0);
+ return;
+ }
- len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
+ n = vq_avail_to_iovec(vq, iov, len + sc->rx_vhdrlen, used,
+ VTNET_MIN_AVAIL_DESC);
- if (len == 0) {
- /*
- * No more packets, but still some avail ring
- * entries. Interrupt if needed/appropriate.
- */
- vq_retchain(vq);
+ if (unlikely(n == 0)) {
+ vq_inc_used_idx_and_last_avail(vq, used);
vq_endchains(vq, 0);
return;
}
- /*
- * The only valid field in the rx packet header is the
- * number of buffers if merged rx bufs were negotiated.
- */
- memset(vrx, 0, sc->rx_vhdrlen);
-
if (sc->rx_merge) {
- struct virtio_net_rxhdr *vrxh;
-
- vrxh = vrx;
- vrxh->vrh_bufs = 1;
+ struct virtio_net_rxhdr *vrxh = iov[0].iov_base;
+ memset(vrxh, 0, sc->rx_vhdrlen);
+ vrxh->vrh_bufs = n;
}
- /*
- * Release this chain and handle more chains.
- */
- vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+ riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+
+ (void)pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n, len);
+
+ used += n;
+
} while (vq_has_descs(vq));
- /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+ vq_inc_used_idx_and_last_avail(vq, used);
vq_endchains(vq, 1);
+
+ return;
}
static void
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 15, 5:58 PM (8 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28751008
Default Alt Text
D20276.id57439.diff (9 KB)
Attached To
Mode
D20276: [bhyve][virtio-net] Allow guest VM's to set JUMBO MTU in case of using the VALE switch.
Attached
Detach File
Event Timeline
Log In to Comment