Page MenuHomeFreeBSD

D27906.id81529.diff
No OneTemporary

D27906.id81529.diff

Index: sys/dev/virtio/network/if_vtnet.c
===================================================================
--- sys/dev/virtio/network/if_vtnet.c
+++ sys/dev/virtio/network/if_vtnet.c
@@ -86,6 +86,10 @@
#include "opt_inet.h"
#include "opt_inet6.h"
+#if defined(INET) || defined(INET6)
+#include <machine/in_cksum.h>
+#endif
+
static int vtnet_modevent(module_t, int, void *);
static int vtnet_probe(device_t);
@@ -107,7 +111,7 @@
static void vtnet_free_rx_filters(struct vtnet_softc *);
static int vtnet_alloc_virtqueues(struct vtnet_softc *);
static int vtnet_setup_interface(struct vtnet_softc *);
-static int vtnet_change_mtu(struct vtnet_softc *, int);
+static int vtnet_ioctl_mtu(struct vtnet_softc *, int);
static int vtnet_ioctl(struct ifnet *, u_long, caddr_t);
static uint64_t vtnet_get_counter(struct ifnet *, ift_counter);
@@ -115,11 +119,15 @@
static void vtnet_rxq_free_mbufs(struct vtnet_rxq *);
static struct mbuf *
vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
-static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
+static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
struct mbuf *, int);
static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
static int vtnet_rxq_new_buf(struct vtnet_rxq *);
+static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
+ uint16_t, int, struct virtio_net_hdr *);
+static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
+ uint16_t, int, struct virtio_net_hdr *);
static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
struct virtio_net_hdr *);
static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
@@ -243,32 +251,42 @@
/* Tunables. */
static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
- "VNET driver parameters");
+ "VirtIO Net driver parameters");
+
static int vtnet_csum_disable = 0;
TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
&vtnet_csum_disable, 0, "Disables receive and send checksum offload");
+
+static int vtnet_fixup_needs_csum = 0;
+SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
+ &vtnet_fixup_needs_csum, 0,
+ "Calculate valid checksum for NEEDS_CSUM packets");
+
static int vtnet_tso_disable = 0;
TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
0, "Disables TCP Segmentation Offload");
+
static int vtnet_lro_disable = 0;
TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
0, "Disables TCP Large Receive Offload");
+
static int vtnet_mq_disable = 0;
TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
- 0, "Disables Multi Queue support");
+ 0, "Disables multiqueue support");
+
static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
- &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
+ &vtnet_mq_max_pairs, 0, "Sets the maximum number of multiqueue pairs");
+
static int vtnet_rx_process_limit = 512;
TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
- &vtnet_rx_process_limit, 0,
- "Limits the number RX segments processed in a single pass");
+ &vtnet_rx_process_limit, 0, "Limits RX segments processed in a single pass");
static uma_zone_t vtnet_tx_header_zone;
@@ -617,9 +635,8 @@
*/
if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
device_printf(dev,
- "LRO disabled due to both mergeable buffers and "
- "indirect descriptors not negotiated\n");
-
+ "LRO disabled since both mergeable buffers and "
+ "indirect descriptors were not negotiated\n");
features &= ~VTNET_LRO_FEATURES;
sc->vtnet_features =
virtio_negotiate_features(dev, features);
@@ -655,31 +672,24 @@
sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
} else if (vtnet_modern(sc)) {
- /*
- * The V1 header is the same size and layout as the mergeable
- * buffer header, but num_buffers will always be one. Depending
- * on the context, the driver uses the mergeable header for
- * either case.
- */
+ /* This is identical to the mergeable header. */
sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
} else
sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
- if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
- sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
- else if (vtnet_modern(sc)) /* TODO: And ANY_LAYOUT when supported */
- sc->vtnet_rx_nsegs = VTNET_MODERN_RX_SEGS;
+ if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
+ sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
- sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
+ sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
else
- sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
+ sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
- sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
+ sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
else
- sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
+ sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
sc->vtnet_max_vq_pairs = 1;
@@ -944,7 +954,7 @@
/*
* TODO: Enable interrupt binding if this is multiqueue. This will
- * only matter when per-vq MSIX is available.
+ * only matter when per-virtqueue MSIX is available.
*/
if (sc->vtnet_flags & VTNET_FLAG_MQ)
flags |= 0;
@@ -1024,6 +1034,10 @@
if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
+ if (vtnet_tunable_int(sc, "fixup_needs_csum",
+ vtnet_fixup_needs_csum) != 0)
+ sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
+
if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_with_feature(dev, VIRTIO_NET_F_GUEST_ECN))
@@ -1069,42 +1083,59 @@
return (0);
}
-/* BMV: This needs rethinking. */
static int
-vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
+vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
+{
+ int framesz;
+
+ if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
+ return (MJUMPAGESIZE);
+ else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
+ return (MCLBYTES);
+
+ /*
+ * Try to scale the receive mbuf cluster size from the MTU. Without
+ * the GUEST_TSO[46] features, the VirtIO specification says the
+ * driver must only be able to receive ~1500 byte frames. But if
+ * jumbo frames can be transmitted then try to receive jumbo.
+ */
+ if (vtnet_modern(sc)) {
+ MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
+ framesz = sizeof(struct virtio_net_hdr_v1);
+ } else
+ framesz = sizeof(struct vtnet_rx_header);
+ framesz += sizeof(struct ether_vlan_header) + mtu;
+
+ if (framesz <= MCLBYTES)
+ return (MCLBYTES);
+ else if (framesz <= MJUMPAGESIZE)
+ return (MJUMPAGESIZE);
+ else if (framesz <= MJUM9BYTES)
+ return (MJUM9BYTES);
+
+ /* Sane default; avoid 16KB clusters. */
+ return (MCLBYTES);
+}
+
+static int
+vtnet_ioctl_mtu(struct vtnet_softc *sc, int mtu)
{
struct ifnet *ifp;
- int frame_size, clustersz;
+ int clustersz;
ifp = sc->vtnet_ifp;
+ VTNET_CORE_LOCK_ASSERT(sc);
- if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
+ if (ifp->if_mtu == mtu)
+ return (0);
+ else if (mtu < ETHERMIN || mtu > VTNET_MAX_MTU)
return (EINVAL);
- frame_size = sc->vtnet_hdr_size;
- frame_size += sizeof(struct ether_vlan_header) + new_mtu;
+ ifp->if_mtu = mtu;
+ clustersz = vtnet_rx_cluster_size(sc, mtu);
- /*
- * Based on the new MTU, determine which cluster size is appropriate
- * for the receive queues.
- *
- * BMV: This likely needs rethinking wrt LRO enabled/disabled and
- * the size of the virtqueue.
- */
- if (frame_size <= MCLBYTES)
- clustersz = MCLBYTES;
- else if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
- clustersz = MJUMPAGESIZE;
- else {
- if (frame_size > MJUM9BYTES)
- return (EINVAL);
- clustersz = MJUM9BYTES;
- }
-
- ifp->if_mtu = new_mtu;
- sc->vtnet_rx_new_clustersz = clustersz;
-
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (clustersz != sc->vtnet_rx_clustersz &&
+ ifp->if_drv_flags & IFF_DRV_RUNNING) {
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
vtnet_init_locked(sc, 0);
}
@@ -1125,11 +1156,9 @@
switch (cmd) {
case SIOCSIFMTU:
- if (ifp->if_mtu != ifr->ifr_mtu) {
- VTNET_CORE_LOCK(sc);
- error = vtnet_change_mtu(sc, ifr->ifr_mtu);
- VTNET_CORE_UNLOCK(sc);
- }
+ VTNET_CORE_LOCK(sc);
+ error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
+ VTNET_CORE_UNLOCK(sc);
break;
case SIOCSIFFLAGS:
@@ -1160,7 +1189,7 @@
case SIOCADDMULTI:
case SIOCDELMULTI:
VTNET_CORE_LOCK(sc);
- if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
ifp->if_drv_flags & IFF_DRV_RUNNING)
vtnet_rx_filter_mac(sc);
VTNET_CORE_UNLOCK(sc);
@@ -1289,53 +1318,45 @@
vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
{
struct mbuf *m_head, *m_tail, *m;
- int i, clustersz;
+ int i, size;
- clustersz = sc->vtnet_rx_clustersz;
+ m_head = NULL;
+ size = sc->vtnet_rx_clustersz;
KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
- ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
+ ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
- m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clustersz);
- if (m_head == NULL)
- goto fail;
-
- m_head->m_len = clustersz;
- m_tail = m_head;
-
- /* Allocate the rest of the chain. */
- for (i = 1; i < nbufs; i++) {
- m = m_getjcl(M_NOWAIT, MT_DATA, 0, clustersz);
- if (m == NULL)
- goto fail;
+ for (i = 0; i < nbufs; i++) {
+ m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
+ if (m == NULL) {
+ sc->vtnet_stats.mbuf_alloc_failed++;
+ m_freem(m_head);
+ return (NULL);
+ }
- m->m_len = clustersz;
- m_tail->m_next = m;
- m_tail = m;
+ m->m_len = size;
+ if (m_head != NULL) {
+ m_tail->m_next = m;
+ m_tail = m;
+ } else
+ m_head = m_tail = m;
}
if (m_tailp != NULL)
*m_tailp = m_tail;
return (m_head);
-
-fail:
- sc->vtnet_stats.mbuf_alloc_failed++;
- m_freem(m_head);
-
- return (NULL);
}
/*
* Slow path for when LRO without mergeable buffers is negotiated.
*/
static int
-vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
+vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
int len0)
{
struct vtnet_softc *sc;
- struct mbuf *m, *m_prev;
- struct mbuf *m_new, *m_tail;
+ struct mbuf *m, *m_prev, *m_new, *m_tail;
int len, clustersz, nreplace, error;
sc = rxq->vtnrx_sc;
@@ -1349,25 +1370,23 @@
len = len0;
/*
- * Since these mbuf chains are so large, we avoid allocating an
- * entire replacement chain if possible. When the received frame
- * did not consume the entire chain, the unused mbufs are moved
- * to the replacement chain.
+ * Since these mbuf chains are so large, avoid allocating a complete
+ * replacement when the received frame did not consume the entire
+ * chain. Unused mbufs are moved to the tail of the replacement mbuf.
*/
while (len > 0) {
- /*
- * Something is seriously wrong if we received a frame
- * larger than the chain. Drop it.
- */
if (m == NULL) {
sc->vtnet_stats.rx_frame_too_large++;
return (EMSGSIZE);
}
- /* We always allocate the same cluster size. */
+ /*
+ * Every mbuf should have the expected cluster size sincethat
+ * is also used to allocate the replacements.
+ */
KASSERT(m->m_len == clustersz,
- ("%s: mbuf size %d is not the cluster size %d",
- __func__, m->m_len, clustersz));
+ ("%s: mbuf size %d not expected cluster size %d", __func__,
+ m->m_len, clustersz));
m->m_len = MIN(m->m_len, len);
len -= m->m_len;
@@ -1377,9 +1396,9 @@
nreplace++;
}
- KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
- ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
- sc->vtnet_rx_nmbufs));
+ KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
+ ("%s: invalid replacement mbuf count %d max %d", __func__,
+ nreplace, sc->vtnet_rx_nmbufs));
m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
if (m_new == NULL) {
@@ -1388,8 +1407,8 @@
}
/*
- * Move any unused mbufs from the received chain onto the end
- * of the new chain.
+ * Move any unused mbufs from the received mbuf chain onto the
+ * end of the replacement chain.
*/
if (m_prev->m_next != NULL) {
m_tail->m_next = m_prev->m_next;
@@ -1399,21 +1418,18 @@
error = vtnet_rxq_enqueue_buf(rxq, m_new);
if (error) {
/*
- * BAD! We could not enqueue the replacement mbuf chain. We
- * must restore the m0 chain to the original state if it was
- * modified so we can subsequently discard it.
+ * The replacement is suppose to be an copy of the one
+ * dequeued so this is a very unexpected error.
*
- * NOTE: The replacement is suppose to be an identical copy
- * to the one just dequeued so this is an unexpected error.
+ * Restore the m0 chain to the original state if it was
+ * modified so we can then discard it.
*/
- sc->vtnet_stats.rx_enq_replacement_failed++;
-
if (m_tail->m_next != NULL) {
m_prev->m_next = m_tail->m_next;
m_tail->m_next = NULL;
}
-
m_prev->m_len = clustersz;
+ sc->vtnet_stats.rx_enq_replacement_failed++;
m_freem(m_new);
}
@@ -1429,31 +1445,23 @@
sc = rxq->vtnrx_sc;
- KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
- ("%s: chained mbuf without LRO_NOMRG", __func__));
+ if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
+ return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
- if (m->m_next == NULL) {
- /* Fast-path for the common case of just one mbuf. */
- if (m->m_len < len)
- return (EINVAL);
+ MPASS(m->m_next == NULL);
+ if (m->m_len < len)
+ return (EMSGSIZE);
- m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
- if (m_new == NULL)
- return (ENOBUFS);
+ m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
+ if (m_new == NULL)
+ return (ENOBUFS);
- error = vtnet_rxq_enqueue_buf(rxq, m_new);
- if (error) {
- /*
- * The new mbuf is suppose to be an identical
- * copy of the one just dequeued so this is an
- * unexpected error.
- */
- m_freem(m_new);
- sc->vtnet_stats.rx_enq_replacement_failed++;
- } else
- m->m_len = len;
+ error = vtnet_rxq_enqueue_buf(rxq, m_new);
+ if (error) {
+ sc->vtnet_stats.rx_enq_replacement_failed++;
+ m_freem(m_new);
} else
- error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
+ m->m_len = len;
return (error);
}
@@ -1463,33 +1471,36 @@
{
struct vtnet_softc *sc;
struct sglist *sg;
- int error;
+ int header_inlined, error;
sc = rxq->vtnrx_sc;
sg = rxq->vtnrx_sg;
KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
("%s: mbuf chain without LRO_NOMRG", __func__));
- KASSERT(m->m_len == sc->vtnet_rx_clustersz, ("%s: unexpected mbuf "
- "length %d %d", __func__, m->m_len, sc->vtnet_rx_clustersz));
VTNET_RXQ_LOCK_ASSERT(rxq);
sglist_reset(sg);
- if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
- error = sglist_append_mbuf(sg, m);
- } else {
- struct vtnet_rx_header *rxhdr;
+ header_inlined = vtnet_modern(sc) ||
+ (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
- rxhdr = mtod(m, struct vtnet_rx_header *);
+ if (header_inlined)
+ error = sglist_append_mbuf(sg, m);
+ else {
+ struct vtnet_rx_header *rxhdr =
+ mtod(m, struct vtnet_rx_header *);
MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
- /* Append inlined header and then rest of the mbuf chain. */
+ /* Append the header and remaining mbuf data. */
error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
- if (error == 0) {
- error = sglist_append(sg, &rxhdr[1],
- m->m_len - sizeof(struct vtnet_rx_header));
- }
- if (error == 0 && m->m_next != NULL)
+ if (error)
+ return (error);
+ error = sglist_append(sg, &rxhdr[1],
+ m->m_len - sizeof(struct vtnet_rx_header));
+ if (error)
+ return (error);
+
+ if (m->m_next != NULL)
error = sglist_append_mbuf(sg, m->m_next);
}
@@ -1519,54 +1530,73 @@
return (error);
}
-/*
- * Use the checksum offset in the VirtIO header to set the
- * correct CSUM_* flags.
- */
static int
-vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
- uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
+ int hoff, struct virtio_net_hdr *hdr)
{
struct vtnet_softc *sc;
-#if defined(INET) || defined(INET6)
- int offset = hdr->csum_start + hdr->csum_offset;
-#endif
+ int error;
sc = rxq->vtnrx_sc;
- /* Only do a basic sanity check on the offset. */
- switch (eth_type) {
-#if defined(INET)
- case ETHERTYPE_IP:
- if (__predict_false(offset < ip_start + sizeof(struct ip)))
- return (1);
- break;
-#endif
-#if defined(INET6)
- case ETHERTYPE_IPV6:
- if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
- return (1);
- break;
-#endif
- default:
- sc->vtnet_stats.rx_csum_bad_ethtype++;
- return (1);
+ /*
+ * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
+ * not have an analogous CSUM flag. The checksum has been validated,
+ * but is incomplete (TCP/UDP pseudo header).
+ *
+ * The packet is likely from another VM on the same host that itself
+ * performed checksum offloading so Tx/Rx is basically a memcpy and
+ * the checksum has little value.
+ *
+ * Default to receiving the packet as-is for performance reasons, but
+ * this can cause issues if the packet is to be forwarded because it
+ * does not contain a valid checksum. This patch may be helpful:
+ * https://reviews.freebsd.org/D6611. In the meantime, have the driver
+ * compute the checksum if requested.
+ *
+ * BMV: Need to add an CSUM_PARTIAL flag?
+ */
+ if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
+ error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
+ return (error);
}
/*
- * Use the offset to determine the appropriate CSUM_* flags. This is
- * a bit dirty, but we can get by with it since the checksum offsets
- * happen to be different. We assume the host host does not do IPv4
- * header checksum offloading.
+ * Compute the checksum in the driver so the packet will contain a
+ * valid checksum. The checksum is at csum_offset from csum_start.
*/
- switch (hdr->csum_offset) {
- case offsetof(struct udphdr, uh_sum):
- case offsetof(struct tcphdr, th_sum):
+ switch (etype) {
+#if defined(INET) || defined(INET6)
+ case ETHERTYPE_IP:
+ case ETHERTYPE_IPV6: {
+ int csum_off, csum_end;
+ uint16_t csum;
+
+ csum_off = hdr->csum_start + hdr->csum_offset;
+ csum_end = csum_off + sizeof(uint16_t);
+
+ /* Assume checksum will be in the first mbuf. */
+ if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
+ return (1);
+
+ /*
+ * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
+ * checksum and write it at the specified offset. We could
+ * try to verify the packet: csum_start should probably
+ * correspond to the start of the TCP/UDP header.
+ *
+ * BMV: Need to properly handle UDP with zero checksum. Is
+ * the IPv4 header checksum implicitly validated?
+ */
+ csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
+ *(uint16_t *)(mtodo(m, csum_off)) = csum;
m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xFFFF;
break;
+ }
+#endif
default:
- sc->vtnet_stats.rx_csum_bad_offset++;
+ sc->vtnet_stats.rx_csum_bad_ethtype++;
return (1);
}
@@ -1574,64 +1604,55 @@
}
static int
-vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
- uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
+ uint16_t etype, int hoff, struct virtio_net_hdr *hdr)
{
struct vtnet_softc *sc;
- int offset, proto;
+ int protocol;
sc = rxq->vtnrx_sc;
- switch (eth_type) {
+ switch (etype) {
#if defined(INET)
- case ETHERTYPE_IP: {
- struct ip *ip;
- if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
- return (1);
- ip = (struct ip *)(m->m_data + ip_start);
- proto = ip->ip_p;
- offset = ip_start + (ip->ip_hl << 2);
+ case ETHERTYPE_IP:
+ if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
+ protocol = IPPROTO_DONE;
+ else {
+ struct ip *ip = (struct ip *)(m->m_data + hoff);
+ protocol = ip->ip_p;
+ }
break;
- }
#endif
#if defined(INET6)
case ETHERTYPE_IPV6:
- if (__predict_false(m->m_len < ip_start +
- sizeof(struct ip6_hdr)))
- return (1);
- offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
- if (__predict_false(offset < 0))
- return (1);
+ if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
+ || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
+ protocol = IPPROTO_DONE;
break;
#endif
default:
- sc->vtnet_stats.rx_csum_bad_ethtype++;
- return (1);
+ protocol = IPPROTO_DONE;
+ break;
}
- switch (proto) {
+ switch (protocol) {
case IPPROTO_TCP:
- if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
- return (1);
- m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xFFFF;
- break;
case IPPROTO_UDP:
- if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
- return (1);
m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xFFFF;
break;
default:
/*
- * For the remaining protocols, FreeBSD does not support
- * checksum offloading, so the checksum will be recomputed.
+ * FreeBSD does not support checksum offloading of this
+ * protocol. Let the stack re-verify the checksum later
+ * if the protocol is supported.
*/
#if 0
- if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
- "protocol eth_type=%#x proto=%d csum_start=%d "
- "csum_offset=%d\n", __func__, eth_type, proto,
- hdr->csum_start, hdr->csum_offset);
+ if_printf(sc->vtnet_ifp,
+ "%s: checksum offload of unsupported protocol "
+ "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
+ __func__, etype, protocol, hdr->csum_start,
+ hdr->csum_offset);
#endif
break;
}
@@ -1639,41 +1660,29 @@
return (0);
}
-/*
- * Set the appropriate CSUM_* flags. Unfortunately, the information
- * provided is not directly useful to us. The VirtIO header gives the
- * offset of the checksum, which is all Linux needs, but this is not
- * how FreeBSD does things. We are forced to peek inside the packet
- * a bit.
- *
- * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
- * could accept the offsets and let the stack figure it out.
- */
static int
vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
struct virtio_net_hdr *hdr)
{
- struct ether_header *eh;
- struct ether_vlan_header *evh;
- uint16_t eth_type;
- int offset, error;
-
- eh = mtod(m, struct ether_header *);
- eth_type = ntohs(eh->ether_type);
- if (eth_type == ETHERTYPE_VLAN) {
- /* BMV: We should handle nested VLAN tags too. */
- evh = mtod(m, struct ether_vlan_header *);
- eth_type = ntohs(evh->evl_proto);
- offset = sizeof(struct ether_vlan_header);
+ const struct ether_header *eh;
+ int hoff;
+ uint16_t etype;
+
+ eh = mtod(m, const struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (etype == ETHERTYPE_VLAN) {
+ /* TODO BMV: Handle QinQ. */
+ const struct ether_vlan_header *evh =
+ mtod(m, const struct ether_vlan_header *);
+ etype = ntohs(evh->evl_proto);
+ hoff = sizeof(struct ether_vlan_header);
} else
- offset = sizeof(struct ether_header);
+ hoff = sizeof(struct ether_header);
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
- error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
- else
- error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
-
- return (error);
+ return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
+ else /* VIRTIO_NET_HDR_F_DATA_VALID */
+ return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
}
static void
@@ -1708,14 +1717,16 @@
{
struct vtnet_softc *sc;
struct virtqueue *vq;
- struct mbuf *m, *m_tail;
- int len;
+ struct mbuf *m_tail;
sc = rxq->vtnrx_sc;
vq = rxq->vtnrx_vq;
m_tail = m_head;
while (--nbufs > 0) {
+ struct mbuf *m;
+ int len;
+
m = virtqueue_dequeue(vq, &len);
if (m == NULL) {
rxq->vtnrx_stats.vrxs_ierrors++;
@@ -1756,13 +1767,12 @@
{
struct vtnet_softc *sc;
struct ifnet *ifp;
- struct ether_header *eh;
sc = rxq->vtnrx_sc;
ifp = sc->vtnet_ifp;
if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
- eh = mtod(m, struct ether_header *);
+ struct ether_header *eh = mtod(m, struct ether_header *);
if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
vtnet_vlan_tag_remove(m);
/*
@@ -1777,13 +1787,8 @@
m->m_pkthdr.flowid = rxq->vtnrx_id;
M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
- /*
- * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
- * distinction that Linux does. Need to reevaluate if performing
- * offloading for the NEEDS_CSUM case is really appropriate.
- */
- if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
- VIRTIO_NET_HDR_F_DATA_VALID)) {
+ if (hdr->flags &
+ (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
if (vtnet_rxq_csum(rxq, m, hdr) == 0)
rxq->vtnrx_stats.vrxs_csum++;
else
@@ -1805,8 +1810,7 @@
struct vtnet_softc *sc;
struct ifnet *ifp;
struct virtqueue *vq;
- struct mbuf *m;
- int len, deq, nbufs, adjsz, count;
+ int deq, count;
sc = rxq->vtnrx_sc;
vq = rxq->vtnrx_vq;
@@ -1817,6 +1821,9 @@
VTNET_RXQ_LOCK_ASSERT(rxq);
while (count-- > 0) {
+ struct mbuf *m;
+ int len, nbufs, adjsz;
+
m = virtqueue_dequeue(vq, &len);
if (m == NULL)
break;
@@ -1828,20 +1835,21 @@
continue;
}
- if (vtnet_modern(sc) ||
- sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
- /*
- * For our purposes here, the V1 header is the same as
- * the mergeable buffers header.
- */
+ if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
struct virtio_net_hdr_mrg_rxbuf *mhdr =
mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
nbufs = vtnet_htog16(sc, mhdr->num_buffers);
adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ } else if (vtnet_modern(sc)) {
+ nbufs = 1; /* num_buffers is always 1 */
+ adjsz = sizeof(struct virtio_net_hdr_v1);
} else {
nbufs = 1;
adjsz = sizeof(struct vtnet_rx_header);
- /* Our pad between the header and start of the frame. */
+ /*
+ * Account for our gap between the header and start of
+ * data to keep the segments separated.
+ */
len += VTNET_RX_HEADER_PAD;
}
@@ -1865,9 +1873,9 @@
/*
* Save an endian swapped version of the header prior to it
- * being stripped. For both mergeable and non-mergeable, the
- * header is at the start of the mbuf data. num_buffers was
- * already saved (and longer need it) so use a regular header.
+ * being stripped. The header is always at the start of the
+ * mbuf data. num_buffers was already saved (and not needed)
+ * so use the standard header.
*/
hdr = mtod(m, struct virtio_net_hdr *);
lhdr.flags = hdr->flags;
@@ -2981,31 +2989,24 @@
vtnet_init_rx_queues(struct vtnet_softc *sc)
{
device_t dev;
+ struct ifnet *ifp;
struct vtnet_rxq *rxq;
int i, clustersz, error;
dev = sc->vtnet_dev;
+ ifp = sc->vtnet_ifp;
- /*
- * Use the new cluster size if one has been set (via a MTU
- * change). Otherwise, use the standard 2K clusters.
- *
- * BMV: It might make sense to use page sized clusters as
- * the default (depending on the features negotiated).
- */
- if (sc->vtnet_rx_new_clustersz != 0) {
- clustersz = sc->vtnet_rx_new_clustersz;
- sc->vtnet_rx_new_clustersz = 0;
- } else
- clustersz = MCLBYTES;
-
+ clustersz = vtnet_rx_cluster_size(sc, ifp->if_mtu);
sc->vtnet_rx_clustersz = clustersz;
- sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clustersz);
- KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
- sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
- ("%s: too many rx mbufs %d for %d segments", __func__,
- sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
+ if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
+ sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
+ VTNET_MAX_RX_SIZE, clustersz);
+ KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
+ ("%s: too many rx mbufs %d for %d segments", __func__,
+ sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
+ } else
+ sc->vtnet_rx_nmbufs = 1;
for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
rxq = &sc->vtnet_rxqs[i];
@@ -3016,8 +3017,7 @@
VTNET_RXQ_UNLOCK(rxq);
if (error) {
- device_printf(dev,
- "cannot allocate mbufs for Rx queue %d\n", i);
+ device_printf(dev, "cannot populate Rx queue %d\n", i);
return (error);
}
}
Index: sys/dev/virtio/network/if_vtnetvar.h
===================================================================
--- sys/dev/virtio/network/if_vtnetvar.h
+++ sys/dev/virtio/network/if_vtnetvar.h
@@ -153,6 +153,7 @@
#define VTNET_FLAG_INDIRECT 0x0400
#define VTNET_FLAG_EVENT_IDX 0x0800
#define VTNET_FLAG_SUSPENDED 0x1000
+#define VTNET_FLAG_FIXUP_NEEDS_CSUM 0x2000
int vtnet_link_active;
int vtnet_hdr_size;
@@ -160,7 +161,6 @@
int vtnet_rx_nsegs;
int vtnet_rx_nmbufs;
int vtnet_rx_clustersz;
- int vtnet_rx_new_clustersz;
int vtnet_tx_intr_thresh;
int vtnet_tx_nsegs;
int vtnet_if_flags;
@@ -336,23 +336,18 @@
#define VTNET_MAX_RX_SIZE 65550
/*
- * Used to preallocate the Vq indirect descriptors. The first segment is
- * reserved for the header, except for mergeable buffers or modern since
- * the header is placed inline with the data.
+ * Used to preallocate the VQ indirect descriptors. Modern and mergeable
+ * buffers do not required one segment for the VirtIO header since it is
+ * placed inline at the beginning of the receive buffer.
*/
-#define VTNET_MRG_RX_SEGS 1
-#define VTNET_MODERN_RX_SEGS 1
-#define VTNET_MIN_RX_SEGS 2
-#define VTNET_MAX_RX_SEGS 34
-#define VTNET_MIN_TX_SEGS 32
-#define VTNET_MAX_TX_SEGS 64
+#define VTNET_RX_SEGS_HDR_INLINE 1
+#define VTNET_RX_SEGS_HDR_SEPARATE 2
+#define VTNET_RX_SEGS_LRO_NOMRG 34
+#define VTNET_TX_SEGS_MIN 32
+#define VTNET_TX_SEGS_MAX 64
-/*
- * Assert we can receive and transmit the maximum with regular
- * size clusters.
- */
-CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
-CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU);
+CTASSERT(((VTNET_RX_SEGS_LRO_NOMRG - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
+CTASSERT(((VTNET_TX_SEGS_MAX - 1) * MCLBYTES) >= VTNET_MAX_MTU);
/*
* Number of slots in the Tx bufrings. This value matches most other
@@ -360,16 +355,6 @@
*/
#define VTNET_DEFAULT_BUFRING_SIZE 4096
-/*
- * Determine how many mbufs are in each receive buffer. For LRO without
- * mergeable buffers, we must allocate an mbuf chain large enough to
- * hold both the vtnet_rx_header and the maximum receivable data.
- */
-#define VTNET_NEEDED_RX_MBUFS(_sc, _clustersz) \
- ((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 : \
- howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE, \
- (_clustersz))
-
#define VTNET_CORE_MTX(_sc) &(_sc)->vtnet_mtx
#define VTNET_CORE_LOCK(_sc) mtx_lock(VTNET_CORE_MTX((_sc)))
#define VTNET_CORE_UNLOCK(_sc) mtx_unlock(VTNET_CORE_MTX((_sc)))

File Metadata

Mime Type
text/plain
Expires
Sat, Apr 25, 9:38 PM (17 h, 30 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32108178
Default Alt Text
D27906.id81529.diff (31 KB)

Event Timeline