Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142259683
D20276.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D20276.diff
View Options
Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -392,85 +392,106 @@
}
static __inline int
-pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
{
- int r, i;
- int len = 0;
+ char *buf;
+ int i;
+ int frag_size;
+ int iov_off;
+ int len;
+ int nm_off;
+ int nm_buf_size;
- for (r = nmd->cur_tx_ring; ; ) {
- struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
- uint32_t cur, idx;
- char *buf;
+ struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, nmd->cur_tx_ring);
- if (nm_ring_empty(ring)) {
- r++;
- if (r > nmd->last_tx_ring)
- r = nmd->first_tx_ring;
- if (r == nmd->cur_tx_ring)
- break;
- continue;
+ if ((nm_ring_space(ring) * ring->nr_buf_size) < iovsize) {
+ /*
+ * No more avail space in TX ring, try to flush it.
+ */
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+ return (0);
+ }
+
+ i = ring->cur;
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+ iov_off = 0;
+ len = iovsize;
+ nm_buf_size = ring->nr_buf_size;
+ nm_off = 0;
+
+ while (iovsize) {
+
+ if (unlikely(iov_off == iov->iov_len)) {
+ iov++;
+ iov_off = 0;
}
- cur = ring->cur;
- idx = ring->slot[cur].buf_idx;
- buf = NETMAP_BUF(ring, idx);
- for (i = 0; i < iovcnt; i++) {
- if (len + iov[i].iov_len > 2048)
- break;
- memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
- len += iov[i].iov_len;
+ if (unlikely(nm_off == nm_buf_size)) {
+ ring->slot[i].flags = NS_MOREFRAG;
+ i = nm_ring_next(ring, i);
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+ nm_off = 0;
}
- ring->slot[cur].len = len;
- ring->head = ring->cur = nm_ring_next(ring, cur);
- nmd->cur_tx_ring = r;
- ioctl(nmd->fd, NIOCTXSYNC, NULL);
- break;
+
+ frag_size = MIN(nm_buf_size - nm_off, iov->iov_len - iov_off);
+ memcpy(buf + nm_off, iov->iov_base + iov_off, frag_size);
+
+ iovsize -= frag_size;
+ iov_off += frag_size;
+ nm_off += frag_size;
+
+ ring->slot[i].len = nm_off;
}
+ /* The last slot must not have NS_MOREFRAG set. */
+ ring->slot[i].flags &= ~NS_MOREFRAG;
+ ring->head = ring->cur = nm_ring_next(ring, i);
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+
return (len);
}
static __inline int
-pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
{
- int len = 0;
- int i = 0;
- int r;
+ char *buf;
+ int i;
+ int iov_off;
+ int frag_size;
+ int len;
+ int nm_off;
- for (r = nmd->cur_rx_ring; ; ) {
- struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
- uint32_t cur, idx;
- char *buf;
- size_t left;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
- if (nm_ring_empty(ring)) {
- r++;
- if (r > nmd->last_rx_ring)
- r = nmd->first_rx_ring;
- if (r == nmd->cur_rx_ring)
- break;
- continue;
+ i = r->head;
+ buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+ iov_off = 0;
+ nm_off = 0;
+ len = iovsize;
+
+ while (iovsize) {
+
+ if (unlikely(iov_off == iov->iov_len)) {
+ iov++;
+ iov_off = 0;
}
- cur = ring->cur;
- idx = ring->slot[cur].buf_idx;
- buf = NETMAP_BUF(ring, idx);
- left = ring->slot[cur].len;
- for (i = 0; i < iovcnt && left > 0; i++) {
- if (iov[i].iov_len > left)
- iov[i].iov_len = left;
- memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
- len += iov[i].iov_len;
- left -= iov[i].iov_len;
+ if (unlikely(nm_off == r->slot[i].len)) {
+ i = nm_ring_next(r, i);
+ buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+ nm_off = 0;
}
- ring->head = ring->cur = nm_ring_next(ring, cur);
- nmd->cur_rx_ring = r;
- ioctl(nmd->fd, NIOCRXSYNC, NULL);
- break;
+
+ frag_size = MIN(r->slot[i].len - nm_off, iov->iov_len - iov_off);
+ memcpy(iov->iov_base + iov_off, buf + nm_off, frag_size);
+
+ iovsize -= frag_size;
+ iov_off += frag_size;
+ nm_off += frag_size;
}
- for (; i < iovcnt; i++)
- iov[i].iov_len = 0;
+ r->head = r->cur = nm_ring_next(r, i);
+
return (len);
}
@@ -481,32 +502,53 @@
pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
int len)
{
- static char pad[60]; /* all zero bytes */
-
if (sc->vsc_nmd == NULL)
return;
- /*
- * If the length is < 60, pad out to that and add the
- * extra zero'd segment to the iov. It is guaranteed that
- * there is always an extra iov available by the caller.
- */
- if (len < 60) {
- iov[iovcnt].iov_base = pad;
- iov[iovcnt].iov_len = 60 - len;
- iovcnt++;
+ (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt, len);
+}
+
+static __inline int
+netmap_next_pkt_len(struct nm_desc *nmd)
+{
+ int i;
+ int len;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+ len = 0;
+
+ for (i = r->head; i != r->tail; i = nm_ring_next(r, i)) {
+ len += r->slot[i].len;
+ if (!(r->slot[i].flags & NS_MOREFRAG))
+ break;
}
- (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+
+ return (len);
}
+static __inline void
+netmap_drop_pkt(struct nm_desc *nmd)
+{
+ int i;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+ for (i = r->head; i != r->tail; i = nm_ring_next(r, i)) {
+ if (!(r->slot[i].flags & NS_MOREFRAG)) {
+ r->head = r->cur = nm_ring_next(r, i);
+ return;
+ }
+ }
+}
+
static void
pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
{
struct iovec iov[VTNET_MAXSEGS], *riov;
+ struct virtio_used used[VTNET_MAXSEGS];
+ struct virtio_net_rxhdr *vrxh;
struct vqueue_info *vq;
- void *vrx;
- int len, n;
uint16_t idx;
+ int bufs, len, n;
/*
* Should never be called without a valid netmap descriptor
@@ -521,7 +563,7 @@
/*
* Drop the packet and try later.
*/
- (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ netmap_drop_pkt(sc->vsc_nmd);
return;
}
@@ -534,58 +576,67 @@
* Drop the packet and try later. Interrupt on
* empty, if that's negotiated.
*/
- (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ netmap_drop_pkt(sc->vsc_nmd);
vq_endchains(vq, 1);
return;
}
do {
- /*
- * Get descriptor chain.
- */
- n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
- assert(n >= 1 && n <= VTNET_MAXSEGS);
+ len = netmap_next_pkt_len(sc->vsc_nmd);
- /*
- * Get a pointer to the rx header, and use the
- * data immediately following it for the packet buffer.
- */
- vrx = iov[0].iov_base;
- riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
-
- len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
-
- if (len == 0) {
+ if (unlikely(len == 0)) {
/*
* No more packets, but still some avail ring
* entries. Interrupt if needed/appropriate.
*/
- vq_retchain(vq);
vq_endchains(vq, 0);
return;
}
+ if (sc->rx_merge) {
+ /*
+ * Get mergable buffers.
+ */
+ n = vq_getbufs_mrgrx(vq, iov, VTNET_MAXSEGS, len + sc->rx_vhdrlen,
+ used, &bufs);
+ } else {
+ /*
+ * Get descriptor chain.
+ */
+ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+ }
+
+ if (n <= 0) {
+ vq_endchains(vq, 0);
+ return;
+ }
+
/*
- * The only valid field in the rx packet header is the
- * number of buffers if merged rx bufs were negotiated.
+ * Get a pointer to the rx header, and use the
+ * data immediately following it for the packet buffer.
*/
- memset(vrx, 0, sc->rx_vhdrlen);
+ vrxh = iov[0].iov_base;
+ memset(vrxh, 0, sc->rx_vhdrlen);
- if (sc->rx_merge) {
- struct virtio_net_rxhdr *vrxh;
+ riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
- vrxh = vrx;
- vrxh->vrh_bufs = 1;
- }
+ (void)pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n, len);
/*
- * Release this chain and handle more chains.
+ * Release used descriptors.
*/
- vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+ if (sc->rx_merge) {
+ vrxh->vrh_bufs = bufs;
+ vq_relbufs_mrgrx(vq, bufs, used);
+ } else {
+ vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+ }
+
} while (vq_has_descs(vq));
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
vq_endchains(vq, 1);
+
}
static void
Index: usr.sbin/bhyve/virtio.h
===================================================================
--- usr.sbin/bhyve/virtio.h
+++ usr.sbin/bhyve/virtio.h
@@ -457,8 +457,12 @@
int vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
struct iovec *iov, int n_iov, uint16_t *flags);
+int vq_getbufs_mrgrx(struct vqueue_info *vq, struct iovec *iov,
+ int n_iov, int len, struct virtio_used *used, int *u_cnt);
void vq_retchain(struct vqueue_info *vq);
void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen);
+void vq_relbufs_mrgrx(struct vqueue_info *vq, int nbufs,
+ struct virtio_used *used);
void vq_endchains(struct vqueue_info *vq, int used_all_avail);
uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
Index: usr.sbin/bhyve/virtio.c
===================================================================
--- usr.sbin/bhyve/virtio.c
+++ usr.sbin/bhyve/virtio.c
@@ -41,6 +41,7 @@
#include <pthread_np.h>
#include "bhyverun.h"
+#include "iov.h"
#include "pci_emul.h"
#include "virtio.h"
@@ -381,6 +382,56 @@
return (-1);
}
+int vq_getbufs_mrgrx(struct vqueue_info *vq, struct iovec *iov,
+ int n_iov, int len, struct virtio_used *used, int *u_cnt)
+{
+ uint16_t idx;
+ int i, iov_len;
+ int bufs, last_avail_saved, n;
+ int total_len;
+
+ i = 0;
+ bufs = 0;
+ total_len = 0;
+
+ /*
+ * vq_getchain() increment the last avail index.
+ * Save it to restore if there are no enough buffers to store packet.
+ */
+ last_avail_saved = vq->vq_last_avail;
+ while (1) {
+ n = vq_getchain(vq, &idx, &iov[i], n_iov - i, NULL);
+
+ if (n <= 0) {
+ /* Restore the last avail index. */
+ vq->vq_last_avail = last_avail_saved;
+ *u_cnt = 0;
+ return (n);
+ }
+
+ iov_len = count_iov(&iov[i], n);
+ i += n;
+ total_len += iov_len;
+
+ used[bufs].vu_idx = idx;
+
+ if (total_len < len) {
+ used[bufs].vu_tlen = iov_len;
+ bufs++;
+ } else {
+ used[bufs].vu_tlen = iov_len - (total_len - len);
+ bufs++;
+ break;
+ }
+
+ };
+
+ *u_cnt = bufs;
+
+ return i;
+
+}
+
/*
* Return the currently-first request chain back to the available queue.
*
@@ -430,6 +481,39 @@
* Ensure the used descriptor is visible before updating the index.
* This is necessary on ISAs with memory ordering less strict than x86.
*/
+ atomic_thread_fence_rel();
+ vuh->vu_idx = uidx;
+}
+
+/*
+ * Return specified merged rx buffers to the guest, setting its I/O length.
+ */
+void
+vq_relbufs_mrgrx(struct vqueue_info *vq, int nbufs, struct virtio_used *used)
+{
+ int i;
+ uint16_t uidx, mask;
+ volatile struct vring_used *vuh;
+ volatile struct virtio_used *vue;
+
+ mask = vq->vq_qsize - 1;
+ vuh = vq->vq_used;
+
+ uidx = vuh->vu_idx;
+
+ if (nbufs == 1) {
+ vue = &vuh->vu_ring[uidx++ & mask];
+ vue->vu_idx = used[0].vu_idx;
+ vue->vu_tlen = used[0].vu_tlen;
+ } else {
+ for (i = 0; i < nbufs; i++) {
+ vue = &vuh->vu_ring[(uidx + i) & mask];
+ vue->vu_idx = used[i].vu_idx;
+ vue->vu_tlen = used[i].vu_tlen;
+ }
+ uidx += nbufs;
+ }
+
atomic_thread_fence_rel();
vuh->vu_idx = uidx;
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Jan 18, 9:53 PM (5 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27720530
Default Alt Text
D20276.diff (11 KB)
Attached To
Mode
D20276: [bhyve][virtio-net] Allow guest VM's to set JUMBO MTU in case of using the VALE switch.
Attached
Detach File
Event Timeline
Log In to Comment