Page MenuHomeFreeBSD

D20276.diff
No OneTemporary

D20276.diff

Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -392,85 +392,106 @@
}
static __inline int
-pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
{
- int r, i;
- int len = 0;
+ char *buf;
+ int i;
+ int frag_size;
+ int iov_off;
+ int len;
+ int nm_off;
+ int nm_buf_size;
- for (r = nmd->cur_tx_ring; ; ) {
- struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
- uint32_t cur, idx;
- char *buf;
+ struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, nmd->cur_tx_ring);
- if (nm_ring_empty(ring)) {
- r++;
- if (r > nmd->last_tx_ring)
- r = nmd->first_tx_ring;
- if (r == nmd->cur_tx_ring)
- break;
- continue;
+ if ((nm_ring_space(ring) * ring->nr_buf_size) < iovsize) {
+ /*
+ * No more avail space in TX ring, try to flush it.
+ */
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+ return (0);
+ }
+
+ i = ring->cur;
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+ iov_off = 0;
+ len = iovsize;
+ nm_buf_size = ring->nr_buf_size;
+ nm_off = 0;
+
+ while (iovsize) {
+
+ if (unlikely(iov_off == iov->iov_len)) {
+ iov++;
+ iov_off = 0;
}
- cur = ring->cur;
- idx = ring->slot[cur].buf_idx;
- buf = NETMAP_BUF(ring, idx);
- for (i = 0; i < iovcnt; i++) {
- if (len + iov[i].iov_len > 2048)
- break;
- memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
- len += iov[i].iov_len;
+ if (unlikely(nm_off == nm_buf_size)) {
+ ring->slot[i].flags = NS_MOREFRAG;
+ i = nm_ring_next(ring, i);
+ buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+ nm_off = 0;
}
- ring->slot[cur].len = len;
- ring->head = ring->cur = nm_ring_next(ring, cur);
- nmd->cur_tx_ring = r;
- ioctl(nmd->fd, NIOCTXSYNC, NULL);
- break;
+
+ frag_size = MIN(nm_buf_size - nm_off, iov->iov_len - iov_off);
+ memcpy(buf + nm_off, iov->iov_base + iov_off, frag_size);
+
+ iovsize -= frag_size;
+ iov_off += frag_size;
+ nm_off += frag_size;
+
+ ring->slot[i].len = nm_off;
}
+ /* The last slot must not have NS_MOREFRAG set. */
+ ring->slot[i].flags &= ~NS_MOREFRAG;
+ ring->head = ring->cur = nm_ring_next(ring, i);
+ ioctl(nmd->fd, NIOCTXSYNC, NULL);
+
return (len);
}
static __inline int
-pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
{
- int len = 0;
- int i = 0;
- int r;
+ char *buf;
+ int i;
+ int iov_off;
+ int frag_size;
+ int len;
+ int nm_off;
- for (r = nmd->cur_rx_ring; ; ) {
- struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
- uint32_t cur, idx;
- char *buf;
- size_t left;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
- if (nm_ring_empty(ring)) {
- r++;
- if (r > nmd->last_rx_ring)
- r = nmd->first_rx_ring;
- if (r == nmd->cur_rx_ring)
- break;
- continue;
+ i = r->head;
+ buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+ iov_off = 0;
+ nm_off = 0;
+ len = iovsize;
+
+ while (iovsize) {
+
+ if (unlikely(iov_off == iov->iov_len)) {
+ iov++;
+ iov_off = 0;
}
- cur = ring->cur;
- idx = ring->slot[cur].buf_idx;
- buf = NETMAP_BUF(ring, idx);
- left = ring->slot[cur].len;
- for (i = 0; i < iovcnt && left > 0; i++) {
- if (iov[i].iov_len > left)
- iov[i].iov_len = left;
- memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
- len += iov[i].iov_len;
- left -= iov[i].iov_len;
+ if (unlikely(nm_off == r->slot[i].len)) {
+ i = nm_ring_next(r, i);
+ buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+ nm_off = 0;
}
- ring->head = ring->cur = nm_ring_next(ring, cur);
- nmd->cur_rx_ring = r;
- ioctl(nmd->fd, NIOCRXSYNC, NULL);
- break;
+
+ frag_size = MIN(r->slot[i].len - nm_off, iov->iov_len - iov_off);
+ memcpy(iov->iov_base + iov_off, buf + nm_off, frag_size);
+
+ iovsize -= frag_size;
+ iov_off += frag_size;
+ nm_off += frag_size;
}
- for (; i < iovcnt; i++)
- iov[i].iov_len = 0;
+ r->head = r->cur = nm_ring_next(r, i);
+
return (len);
}
@@ -481,32 +502,53 @@
pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
int len)
{
- static char pad[60]; /* all zero bytes */
-
if (sc->vsc_nmd == NULL)
return;
- /*
- * If the length is < 60, pad out to that and add the
- * extra zero'd segment to the iov. It is guaranteed that
- * there is always an extra iov available by the caller.
- */
- if (len < 60) {
- iov[iovcnt].iov_base = pad;
- iov[iovcnt].iov_len = 60 - len;
- iovcnt++;
+ (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt, len);
+}
+
+static __inline int
+netmap_next_pkt_len(struct nm_desc *nmd)
+{
+ int i;
+ int len;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+ len = 0;
+
+ for (i = r->head; i != r->tail; i = nm_ring_next(r, i)) {
+ len += r->slot[i].len;
+ if (!(r->slot[i].flags & NS_MOREFRAG))
+ break;
}
- (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+
+ return (len);
}
+static __inline void
+netmap_drop_pkt(struct nm_desc *nmd)
+{
+ int i;
+ struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+ for (i = r->head; i != r->tail; i = nm_ring_next(r, i)) {
+ if (!(r->slot[i].flags & NS_MOREFRAG)) {
+ r->head = r->cur = nm_ring_next(r, i);
+ return;
+ }
+ }
+}
+
static void
pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
{
struct iovec iov[VTNET_MAXSEGS], *riov;
+ struct virtio_used used[VTNET_MAXSEGS];
+ struct virtio_net_rxhdr *vrxh;
struct vqueue_info *vq;
- void *vrx;
- int len, n;
uint16_t idx;
+ int bufs, len, n;
/*
* Should never be called without a valid netmap descriptor
@@ -521,7 +563,7 @@
/*
* Drop the packet and try later.
*/
- (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ netmap_drop_pkt(sc->vsc_nmd);
return;
}
@@ -534,58 +576,67 @@
* Drop the packet and try later. Interrupt on
* empty, if that's negotiated.
*/
- (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+ netmap_drop_pkt(sc->vsc_nmd);
vq_endchains(vq, 1);
return;
}
do {
- /*
- * Get descriptor chain.
- */
- n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
- assert(n >= 1 && n <= VTNET_MAXSEGS);
+ len = netmap_next_pkt_len(sc->vsc_nmd);
- /*
- * Get a pointer to the rx header, and use the
- * data immediately following it for the packet buffer.
- */
- vrx = iov[0].iov_base;
- riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
-
- len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
-
- if (len == 0) {
+ if (unlikely(len == 0)) {
/*
* No more packets, but still some avail ring
* entries. Interrupt if needed/appropriate.
*/
- vq_retchain(vq);
vq_endchains(vq, 0);
return;
}
+ if (sc->rx_merge) {
+ /*
+ * Get mergable buffers.
+ */
+ n = vq_getbufs_mrgrx(vq, iov, VTNET_MAXSEGS, len + sc->rx_vhdrlen,
+ used, &bufs);
+ } else {
+ /*
+ * Get descriptor chain.
+ */
+ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+ }
+
+ if (n <= 0) {
+ vq_endchains(vq, 0);
+ return;
+ }
+
/*
- * The only valid field in the rx packet header is the
- * number of buffers if merged rx bufs were negotiated.
+ * Get a pointer to the rx header, and use the
+ * data immediately following it for the packet buffer.
*/
- memset(vrx, 0, sc->rx_vhdrlen);
+ vrxh = iov[0].iov_base;
+ memset(vrxh, 0, sc->rx_vhdrlen);
- if (sc->rx_merge) {
- struct virtio_net_rxhdr *vrxh;
+ riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
- vrxh = vrx;
- vrxh->vrh_bufs = 1;
- }
+ (void)pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n, len);
/*
- * Release this chain and handle more chains.
+ * Release used descriptors.
*/
- vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+ if (sc->rx_merge) {
+ vrxh->vrh_bufs = bufs;
+ vq_relbufs_mrgrx(vq, bufs, used);
+ } else {
+ vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+ }
+
} while (vq_has_descs(vq));
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
vq_endchains(vq, 1);
+
}
static void
Index: usr.sbin/bhyve/virtio.h
===================================================================
--- usr.sbin/bhyve/virtio.h
+++ usr.sbin/bhyve/virtio.h
@@ -457,8 +457,12 @@
int vq_getchain(struct vqueue_info *vq, uint16_t *pidx,
struct iovec *iov, int n_iov, uint16_t *flags);
+int vq_getbufs_mrgrx(struct vqueue_info *vq, struct iovec *iov,
+ int n_iov, int len, struct virtio_used *used, int *u_cnt);
void vq_retchain(struct vqueue_info *vq);
void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen);
+void vq_relbufs_mrgrx(struct vqueue_info *vq, int nbufs,
+ struct virtio_used *used);
void vq_endchains(struct vqueue_info *vq, int used_all_avail);
uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
Index: usr.sbin/bhyve/virtio.c
===================================================================
--- usr.sbin/bhyve/virtio.c
+++ usr.sbin/bhyve/virtio.c
@@ -41,6 +41,7 @@
#include <pthread_np.h>
#include "bhyverun.h"
+#include "iov.h"
#include "pci_emul.h"
#include "virtio.h"
@@ -381,6 +382,56 @@
return (-1);
}
+int vq_getbufs_mrgrx(struct vqueue_info *vq, struct iovec *iov,
+ int n_iov, int len, struct virtio_used *used, int *u_cnt)
+{
+ uint16_t idx;
+ int i, iov_len;
+ int bufs, last_avail_saved, n;
+ int total_len;
+
+ i = 0;
+ bufs = 0;
+ total_len = 0;
+
+ /*
+ * vq_getchain() increment the last avail index.
+ * Save it to restore if there are no enough buffers to store packet.
+ */
+ last_avail_saved = vq->vq_last_avail;
+ while (1) {
+ n = vq_getchain(vq, &idx, &iov[i], n_iov - i, NULL);
+
+ if (n <= 0) {
+ /* Restore the last avail index. */
+ vq->vq_last_avail = last_avail_saved;
+ *u_cnt = 0;
+ return (n);
+ }
+
+ iov_len = count_iov(&iov[i], n);
+ i += n;
+ total_len += iov_len;
+
+ used[bufs].vu_idx = idx;
+
+ if (total_len < len) {
+ used[bufs].vu_tlen = iov_len;
+ bufs++;
+ } else {
+ used[bufs].vu_tlen = iov_len - (total_len - len);
+ bufs++;
+ break;
+ }
+
+ };
+
+ *u_cnt = bufs;
+
+ return i;
+
+}
+
/*
* Return the currently-first request chain back to the available queue.
*
@@ -430,6 +481,39 @@
* Ensure the used descriptor is visible before updating the index.
* This is necessary on ISAs with memory ordering less strict than x86.
*/
+ atomic_thread_fence_rel();
+ vuh->vu_idx = uidx;
+}
+
+/*
+ * Return specified merged rx buffers to the guest, setting its I/O length.
+ */
+void
+vq_relbufs_mrgrx(struct vqueue_info *vq, int nbufs, struct virtio_used *used)
+{
+ int i;
+ uint16_t uidx, mask;
+ volatile struct vring_used *vuh;
+ volatile struct virtio_used *vue;
+
+ mask = vq->vq_qsize - 1;
+ vuh = vq->vq_used;
+
+ uidx = vuh->vu_idx;
+
+ if (nbufs == 1) {
+ vue = &vuh->vu_ring[uidx++ & mask];
+ vue->vu_idx = used[0].vu_idx;
+ vue->vu_tlen = used[0].vu_tlen;
+ } else {
+ for (i = 0; i < nbufs; i++) {
+ vue = &vuh->vu_ring[(uidx + i) & mask];
+ vue->vu_idx = used[i].vu_idx;
+ vue->vu_tlen = used[i].vu_tlen;
+ }
+ uidx += nbufs;
+ }
+
atomic_thread_fence_rel();
vuh->vu_idx = uidx;
}

File Metadata

Mime Type
text/plain
Expires
Sun, Jan 18, 9:53 PM (5 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27720530
Default Alt Text
D20276.diff (11 KB)

Event Timeline