diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -86,6 +86,10 @@
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
+#if defined(INET) || defined(INET6)
+#include <machine/in_cksum.h>
+#endif
+
 static int	vtnet_modevent(module_t, int, void *);
 
 static int	vtnet_probe(device_t);
@@ -107,7 +111,7 @@
 static void	vtnet_free_rx_filters(struct vtnet_softc *);
 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
 static int	vtnet_setup_interface(struct vtnet_softc *);
-static int	vtnet_change_mtu(struct vtnet_softc *, int);
+static int	vtnet_ioctl_mtu(struct vtnet_softc *, int);
 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t);
 static uint64_t	vtnet_get_counter(struct ifnet *, ift_counter);
 
@@ -115,11 +119,15 @@
 static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
 static struct mbuf *
 		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
-static int	vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
+static int	vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
 		    struct mbuf *, int);
 static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
 static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
 static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
+static int	vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
+		     uint16_t, int, struct virtio_net_hdr *);
+static int	vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
+		     uint16_t, int, struct virtio_net_hdr *);
 static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
 		     struct virtio_net_hdr *);
 static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
@@ -243,32 +251,42 @@
 
 /* Tunables. */
 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-    "VNET driver parameters");
+    "VirtIO Net driver parameters");
+
 static int vtnet_csum_disable = 0;
 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
     &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
+
+static int vtnet_fixup_needs_csum = 0;
+SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
+    &vtnet_fixup_needs_csum, 0,
+    "Calculate valid checksum for NEEDS_CSUM packets");
+
 static int vtnet_tso_disable = 0;
 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
     0, "Disables TCP Segmentation Offload");
+
 static int vtnet_lro_disable = 0;
 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
     0, "Disables TCP Large Receive Offload");
+
 static int vtnet_mq_disable = 0;
 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
-    0, "Disables Multi Queue support");
+    0, "Disables multiqueue support");
+
 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
-    &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
-static int vtnet_rx_process_limit = 512;
+    &vtnet_mq_max_pairs, 0, "Sets the maximum number of multiqueue pairs");
+
+static int vtnet_rx_process_limit = 1024;
 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
-    &vtnet_rx_process_limit, 0,
-    "Limits the number RX segments processed in a single pass");
+    &vtnet_rx_process_limit, 0, "Limits RX segments processed in a single pass");
 
 static uma_zone_t vtnet_tx_header_zone;
 
@@ -617,9 +635,8 @@
 		 */
 		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
 			device_printf(dev,
-			    "LRO disabled due to both mergeable buffers and "
-			    "indirect descriptors not negotiated\n");
-
+			    "LRO disabled since both mergeable buffers and "
+			    "indirect descriptors were not negotiated\n");
 			features &= ~VTNET_LRO_FEATURES;
 			sc->vtnet_features =
 			    virtio_negotiate_features(dev, features);
@@ -655,31 +672,24 @@
 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	} else if (vtnet_modern(sc)) {
-		/*
-		 * The V1 header is the same size and layout as the mergeable
-		 * buffer header, but num_buffers will always be one. Depending
-		 * on the context, the driver uses the mergeable header for
-		 * either case.
-		 */
+		/* This is identical to the mergeable header. */
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
 	} else
 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
 
-	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
-		sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
-	else if (vtnet_modern(sc)) /* TODO: And ANY_LAYOUT when supported */
-		sc->vtnet_rx_nsegs = VTNET_MODERN_RX_SEGS;
+	if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
+		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
 	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
-		sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
+		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
 	else
-		sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
+		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
 
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
 	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
-		sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
+		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
 	else
-		sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
+		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
 
 	sc->vtnet_max_vq_pairs = 1;
 
@@ -944,7 +954,7 @@
 
 	/*
 	 * TODO: Enable interrupt binding if this is multiqueue. This will
-	 * only matter when per-vq MSIX is available.
+	 * only matter when per-virtqueue MSIX is available.
 	 */
 	if (sc->vtnet_flags & VTNET_FLAG_MQ)
 		flags |= 0;
@@ -1024,6 +1034,10 @@
 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
 		ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
 
+		if (vtnet_tunable_int(sc, "fixup_needs_csum",
+		    vtnet_fixup_needs_csum) != 0)
+			sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
+
 		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) ||
 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_ECN))
@@ -1069,42 +1083,59 @@
 	return (0);
 }
 
-/* BMV: This needs rethinking. */
 static int
-vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
+vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
+{
+	int framesz;
+
+	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
+		return (MJUMPAGESIZE);
+	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
+		return (MCLBYTES);
+
+	/*
+	 * Try to scale the receive mbuf cluster size from the MTU. Without
+	 * the GUEST_TSO[46] features, the VirtIO specification says the
+	 * driver must only be able to receive ~1500 byte frames. But if
+	 * jumbo frames can be transmitted then try to receive jumbo.
+	 */
+	if (vtnet_modern(sc)) {
+		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
+		framesz = sizeof(struct virtio_net_hdr_v1);
+	} else
+		framesz = sizeof(struct vtnet_rx_header);
+	framesz += sizeof(struct ether_vlan_header) + mtu;
+
+	if (framesz <= MCLBYTES)
+		return (MCLBYTES);
+	else if (framesz <= MJUMPAGESIZE)
+		return (MJUMPAGESIZE);
+	else if (framesz <= MJUM9BYTES)
+		return (MJUM9BYTES);
+
+	/* Sane default; avoid 16KB clusters. */
+	return (MCLBYTES);
+}
+
+static int
+vtnet_ioctl_mtu(struct vtnet_softc *sc, int mtu)
 {
 	struct ifnet *ifp;
-	int frame_size, clustersz;
+	int clustersz;
 
 	ifp = sc->vtnet_ifp;
+	VTNET_CORE_LOCK_ASSERT(sc);
 
-	if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
+	if (ifp->if_mtu == mtu)
+		return (0);
+	else if (mtu < ETHERMIN || mtu > VTNET_MAX_MTU)
 		return (EINVAL);
 
-	frame_size = sc->vtnet_hdr_size;
-	frame_size += sizeof(struct ether_vlan_header) + new_mtu;
+	ifp->if_mtu = mtu;
+	clustersz = vtnet_rx_cluster_size(sc, mtu);
 
-	/*
-	 * Based on the new MTU, determine which cluster size is appropriate
-	 * for the receive queues.
-	 *
-	 * BMV: This likely needs rethinking wrt LRO enabled/disabled and
-	 * the size of the virtqueue.
-	 */
-	if (frame_size <= MCLBYTES)
-		clustersz = MCLBYTES;
-	else if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
-		clustersz = MJUMPAGESIZE;
-	else {
-		if (frame_size > MJUM9BYTES)
-			return (EINVAL);
-		clustersz = MJUM9BYTES;
-	}
-
-	ifp->if_mtu = new_mtu;
-	sc->vtnet_rx_new_clustersz = clustersz;
-
-	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+	if (clustersz != sc->vtnet_rx_clustersz &&
+	    ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		vtnet_init_locked(sc, 0);
 	}
@@ -1125,11 +1156,9 @@
 
 	switch (cmd) {
 	case SIOCSIFMTU:
-		if (ifp->if_mtu != ifr->ifr_mtu) {
-			VTNET_CORE_LOCK(sc);
-			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
-			VTNET_CORE_UNLOCK(sc);
-		}
+		VTNET_CORE_LOCK(sc);
+		error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
+		VTNET_CORE_UNLOCK(sc);
 		break;
 
 	case SIOCSIFFLAGS:
@@ -1160,7 +1189,7 @@
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		VTNET_CORE_LOCK(sc);
-		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX && 
+		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
 		    ifp->if_drv_flags & IFF_DRV_RUNNING)
 			vtnet_rx_filter_mac(sc);
 		VTNET_CORE_UNLOCK(sc);
@@ -1289,53 +1318,45 @@
 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
 {
 	struct mbuf *m_head, *m_tail, *m;
-	int i, clustersz;
+	int i, size;
 
-	clustersz = sc->vtnet_rx_clustersz;
+	m_head = NULL;
+	size = sc->vtnet_rx_clustersz;
 
 	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
-	    ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
+	    ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
 
-	m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clustersz);
-	if (m_head == NULL)
-		goto fail;
-
-	m_head->m_len = clustersz;
-	m_tail = m_head;
-
-	/* Allocate the rest of the chain. */
-	for (i = 1; i < nbufs; i++) {
-		m = m_getjcl(M_NOWAIT, MT_DATA, 0, clustersz);
-		if (m == NULL)
-			goto fail;
+	for (i = 0; i < nbufs; i++) {
+		m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
+		if (m == NULL) {
+			sc->vtnet_stats.mbuf_alloc_failed++;
+			m_freem(m_head);
+			return (NULL);
+		}
 
-		m->m_len = clustersz;
-		m_tail->m_next = m;
-		m_tail = m;
+		m->m_len = size;
+		if (m_head != NULL) {
+			m_tail->m_next = m;
+			m_tail = m;
+		} else
+			m_head = m_tail = m;
 	}
 
 	if (m_tailp != NULL)
 		*m_tailp = m_tail;
 
 	return (m_head);
-
-fail:
-	sc->vtnet_stats.mbuf_alloc_failed++;
-	m_freem(m_head);
-
-	return (NULL);
 }
 
 /*
  * Slow path for when LRO without mergeable buffers is negotiated.
  */
 static int
-vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
+vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
     int len0)
 {
 	struct vtnet_softc *sc;
-	struct mbuf *m, *m_prev;
-	struct mbuf *m_new, *m_tail;
+	struct mbuf *m, *m_prev, *m_new, *m_tail;
 	int len, clustersz, nreplace, error;
 
 	sc = rxq->vtnrx_sc;
@@ -1349,25 +1370,23 @@
 	len = len0;
 
 	/*
-	 * Since these mbuf chains are so large, we avoid allocating an
-	 * entire replacement chain if possible. When the received frame
-	 * did not consume the entire chain, the unused mbufs are moved
-	 * to the replacement chain.
+	 * Since these mbuf chains are so large, avoid allocating a complete
+	 * replacement when the received frame did not consume the entire
+	 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
 	 */
 	while (len > 0) {
-		/*
-		 * Something is seriously wrong if we received a frame
-		 * larger than the chain. Drop it.
-		 */
 		if (m == NULL) {
 			sc->vtnet_stats.rx_frame_too_large++;
 			return (EMSGSIZE);
 		}
 
-		/* We always allocate the same cluster size. */
+		/*
+		 * Every mbuf should have the expected cluster size since that
+		 * is also used to allocate the replacements.
+		 */
 		KASSERT(m->m_len == clustersz,
-		    ("%s: mbuf size %d is not the cluster size %d",
-		    __func__, m->m_len, clustersz));
+		    ("%s: mbuf size %d not expected cluster size %d", __func__,
+		    m->m_len, clustersz));
 
 		m->m_len = MIN(m->m_len, len);
 		len -= m->m_len;
@@ -1377,9 +1396,9 @@
 		nreplace++;
 	}
 
-	KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
-	    ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
-	    sc->vtnet_rx_nmbufs));
+	KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
+	    ("%s: invalid replacement mbuf count %d max %d", __func__,
+	    nreplace, sc->vtnet_rx_nmbufs));
 
 	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
 	if (m_new == NULL) {
@@ -1388,8 +1407,8 @@
 	}
 
 	/*
-	 * Move any unused mbufs from the received chain onto the end
-	 * of the new chain.
+	 * Move any unused mbufs from the received mbuf chain onto the
+	 * end of the replacement chain.
 	 */
 	if (m_prev->m_next != NULL) {
 		m_tail->m_next = m_prev->m_next;
@@ -1399,21 +1418,18 @@
 	error = vtnet_rxq_enqueue_buf(rxq, m_new);
 	if (error) {
 		/*
-		 * BAD! We could not enqueue the replacement mbuf chain. We
-		 * must restore the m0 chain to the original state if it was
-		 * modified so we can subsequently discard it.
+		 * The replacement is suppose to be an copy of the one
+		 * dequeued so this is a very unexpected error.
 		 *
-		 * NOTE: The replacement is suppose to be an identical copy
-		 * to the one just dequeued so this is an unexpected error.
+		 * Restore the m0 chain to the original state if it was
+		 * modified so we can then discard it.
 		 */
-		sc->vtnet_stats.rx_enq_replacement_failed++;
-
 		if (m_tail->m_next != NULL) {
 			m_prev->m_next = m_tail->m_next;
 			m_tail->m_next = NULL;
 		}
-
 		m_prev->m_len = clustersz;
+		sc->vtnet_stats.rx_enq_replacement_failed++;
 		m_freem(m_new);
 	}
 
@@ -1429,31 +1445,23 @@
 
 	sc = rxq->vtnrx_sc;
 
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
-	    ("%s: chained mbuf without LRO_NOMRG", __func__));
+	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
+		return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
 
-	if (m->m_next == NULL) {
-		/* Fast-path for the common case of just one mbuf. */
-		if (m->m_len < len)
-			return (EINVAL);
+	MPASS(m->m_next == NULL);
+	if (m->m_len < len)
+		return (EMSGSIZE);
 
-		m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
-		if (m_new == NULL)
-			return (ENOBUFS);
+	m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
+	if (m_new == NULL)
+		return (ENOBUFS);
 
-		error = vtnet_rxq_enqueue_buf(rxq, m_new);
-		if (error) {
-			/*
-			 * The new mbuf is suppose to be an identical
-			 * copy of the one just dequeued so this is an
-			 * unexpected error.
-			 */
-			m_freem(m_new);
-			sc->vtnet_stats.rx_enq_replacement_failed++;
-		} else
-			m->m_len = len;
+	error = vtnet_rxq_enqueue_buf(rxq, m_new);
+	if (error) {
+		sc->vtnet_stats.rx_enq_replacement_failed++;
+		m_freem(m_new);
 	} else
-		error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
+		m->m_len = len;
 
 	return (error);
 }
@@ -1463,33 +1471,36 @@
 {
 	struct vtnet_softc *sc;
 	struct sglist *sg;
-	int error;
+	int header_inlined, error;
 
 	sc = rxq->vtnrx_sc;
 	sg = rxq->vtnrx_sg;
 
 	KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
 	    ("%s: mbuf chain without LRO_NOMRG", __func__));
-	KASSERT(m->m_len == sc->vtnet_rx_clustersz, ("%s: unexpected mbuf "
-		"length %d %d", __func__, m->m_len, sc->vtnet_rx_clustersz));
 	VTNET_RXQ_LOCK_ASSERT(rxq);
 
 	sglist_reset(sg);
-	if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
-		error = sglist_append_mbuf(sg, m);
-	} else {
-		struct vtnet_rx_header *rxhdr;
+	header_inlined = vtnet_modern(sc) ||
+	    (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
 
-		rxhdr = mtod(m, struct vtnet_rx_header *);
+	if (header_inlined)
+		error = sglist_append_mbuf(sg, m);
+	else {
+		struct vtnet_rx_header *rxhdr =
+		    mtod(m, struct vtnet_rx_header *);
 		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
 
-		/* Append inlined header and then rest of the mbuf chain. */
+		/* Append the header and remaining mbuf data. */
 		error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
-		if (error == 0) {
-			error = sglist_append(sg, &rxhdr[1],
-			    m->m_len - sizeof(struct vtnet_rx_header));
-		}
-		if (error == 0 && m->m_next != NULL)
+		if (error)
+			return (error);
+		error = sglist_append(sg, &rxhdr[1],
+		    m->m_len - sizeof(struct vtnet_rx_header));
+		if (error)
+			return (error);
+
+		if (m->m_next != NULL)
 			error = sglist_append_mbuf(sg, m->m_next);
 	}
 
@@ -1519,54 +1530,73 @@
 	return (error);
 }
 
-/*
- * Use the checksum offset in the VirtIO header to set the
- * correct CSUM_* flags.
- */
 static int
-vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
-    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
+    int hoff, struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
-#if defined(INET) || defined(INET6)
-	int offset = hdr->csum_start + hdr->csum_offset;
-#endif
+	int error;
 
 	sc = rxq->vtnrx_sc;
 
-	/* Only do a basic sanity check on the offset. */
-	switch (eth_type) {
-#if defined(INET)
-	case ETHERTYPE_IP:
-		if (__predict_false(offset < ip_start + sizeof(struct ip)))
-			return (1);
-		break;
-#endif
-#if defined(INET6)
-	case ETHERTYPE_IPV6:
-		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
-			return (1);
-		break;
-#endif
-	default:
-		sc->vtnet_stats.rx_csum_bad_ethtype++;
-		return (1);
+	/*
+	 * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
+	 * not have an analogous CSUM flag. The checksum has been validated,
+	 * but is incomplete (TCP/UDP pseudo header).
+	 *
+	 * The packet is likely from another VM on the same host that itself
+	 * performed checksum offloading so Tx/Rx is basically a memcpy and
+	 * the checksum has little value.
+	 *
+	 * Default to receiving the packet as-is for performance reasons, but
+	 * this can cause issues if the packet is to be forwarded because it
+	 * does not contain a valid checksum. This patch may be helpful:
+	 * https://reviews.freebsd.org/D6611. In the meantime, have the driver
+	 * compute the checksum if requested.
+	 *
+	 * BMV: Need to add an CSUM_PARTIAL flag?
+	 */
+	if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
+		error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
+		return (error);
 	}
 
 	/*
-	 * Use the offset to determine the appropriate CSUM_* flags. This is
-	 * a bit dirty, but we can get by with it since the checksum offsets
-	 * happen to be different. We assume the host host does not do IPv4
-	 * header checksum offloading.
+	 * Compute the checksum in the driver so the packet will contain a
+	 * valid checksum. The checksum is at csum_offset from csum_start.
 	 */
-	switch (hdr->csum_offset) {
-	case offsetof(struct udphdr, uh_sum):
-	case offsetof(struct tcphdr, th_sum):
+	switch (etype) {
+#if defined(INET) || defined(INET6)
+	case ETHERTYPE_IP:
+	case ETHERTYPE_IPV6: {
+		int csum_off, csum_end;
+		uint16_t csum;
+
+		csum_off = hdr->csum_start + hdr->csum_offset;
+		csum_end = csum_off + sizeof(uint16_t);
+
+		/* Assume checksum will be in the first mbuf. */
+		if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
+			return (1);
+
+		/*
+		 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
+		 * checksum and write it at the specified offset. We could
+		 * try to verify the packet: csum_start should probably
+		 * correspond to the start of the TCP/UDP header.
+		 *
+		 * BMV: Need to properly handle UDP with zero checksum. Is
+		 * the IPv4 header checksum implicitly validated?
+		 */
+		csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
+		*(uint16_t *)(mtodo(m, csum_off)) = csum;
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
+	}
+#endif
 	default:
-		sc->vtnet_stats.rx_csum_bad_offset++;
+		sc->vtnet_stats.rx_csum_bad_ethtype++;
 		return (1);
 	}
 
@@ -1574,64 +1604,55 @@
 }
 
 static int
-vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
-    uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
+    uint16_t etype, int hoff, struct virtio_net_hdr *hdr)
 {
 	struct vtnet_softc *sc;
-	int offset, proto;
+	int protocol;
 
 	sc = rxq->vtnrx_sc;
 
-	switch (eth_type) {
+	switch (etype) {
 #if defined(INET)
-	case ETHERTYPE_IP: {
-		struct ip *ip;
-		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
-			return (1);
-		ip = (struct ip *)(m->m_data + ip_start);
-		proto = ip->ip_p;
-		offset = ip_start + (ip->ip_hl << 2);
+	case ETHERTYPE_IP:
+		if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
+			protocol = IPPROTO_DONE;
+		else {
+			struct ip *ip = (struct ip *)(m->m_data + hoff);
+			protocol = ip->ip_p;
+		}
 		break;
-	}
 #endif
 #if defined(INET6)
 	case ETHERTYPE_IPV6:
-		if (__predict_false(m->m_len < ip_start +
-		    sizeof(struct ip6_hdr)))
-			return (1);
-		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
-		if (__predict_false(offset < 0))
-			return (1);
+		if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
+		    || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
+			protocol = IPPROTO_DONE;
 		break;
 #endif
 	default:
-		sc->vtnet_stats.rx_csum_bad_ethtype++;
-		return (1);
+		protocol = IPPROTO_DONE;
+		break;
 	}
 
-	switch (proto) {
+	switch (protocol) {
 	case IPPROTO_TCP:
-		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
-			return (1);
-		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
-		m->m_pkthdr.csum_data = 0xFFFF;
-		break;
 	case IPPROTO_UDP:
-		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
-			return (1);
 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		m->m_pkthdr.csum_data = 0xFFFF;
 		break;
 	default:
 		/*
-		 * For the remaining protocols, FreeBSD does not support
-		 * checksum offloading, so the checksum will be recomputed.
+		 * FreeBSD does not support checksum offloading of this
+		 * protocol. Let the stack re-verify the checksum later
+		 * if the protocol is supported.
 		 */
 #if 0
-		if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
-		    "protocol eth_type=%#x proto=%d csum_start=%d "
-		    "csum_offset=%d\n", __func__, eth_type, proto,
-		    hdr->csum_start, hdr->csum_offset);
+		if_printf(sc->vtnet_ifp,
+		    "%s: checksum offload of unsupported protocol "
+		    "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
+		    __func__, etype, protocol, hdr->csum_start,
+		    hdr->csum_offset);
 #endif
 		break;
 	}
@@ -1639,41 +1660,29 @@
 	return (0);
 }
 
-/*
- * Set the appropriate CSUM_* flags. Unfortunately, the information
- * provided is not directly useful to us. The VirtIO header gives the
- * offset of the checksum, which is all Linux needs, but this is not
- * how FreeBSD does things. We are forced to peek inside the packet
- * a bit.
- *
- * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
- * could accept the offsets and let the stack figure it out.
- */
 static int
 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
     struct virtio_net_hdr *hdr)
 {
-	struct ether_header *eh;
-	struct ether_vlan_header *evh;
-	uint16_t eth_type;
-	int offset, error;
-
-	eh = mtod(m, struct ether_header *);
-	eth_type = ntohs(eh->ether_type);
-	if (eth_type == ETHERTYPE_VLAN) {
-		/* BMV: We should handle nested VLAN tags too. */
-		evh = mtod(m, struct ether_vlan_header *);
-		eth_type = ntohs(evh->evl_proto);
-		offset = sizeof(struct ether_vlan_header);
+	const struct ether_header *eh;
+	int hoff;
+	uint16_t etype;
+
+	eh = mtod(m, const struct ether_header *);
+	etype = ntohs(eh->ether_type);
+	if (etype == ETHERTYPE_VLAN) {
+		/* TODO BMV: Handle QinQ. */
+		const struct ether_vlan_header *evh =
+		    mtod(m, const struct ether_vlan_header *);
+		etype = ntohs(evh->evl_proto);
+		hoff = sizeof(struct ether_vlan_header);
 	} else
-		offset = sizeof(struct ether_header);
+		hoff = sizeof(struct ether_header);
 
 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
-		error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
-	else
-		error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
-
-	return (error);
+		return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
+	else /* VIRTIO_NET_HDR_F_DATA_VALID */
+		return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
 }
 
 static void
@@ -1708,14 +1717,16 @@
 {
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
-	struct mbuf *m, *m_tail;
-	int len;
+	struct mbuf *m_tail;
 
 	sc = rxq->vtnrx_sc;
 	vq = rxq->vtnrx_vq;
 	m_tail = m_head;
 
 	while (--nbufs > 0) {
+		struct mbuf *m;
+		int len;
+
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL) {
 			rxq->vtnrx_stats.vrxs_ierrors++;
@@ -1756,13 +1767,12 @@
 {
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
-	struct ether_header *eh;
 
 	sc = rxq->vtnrx_sc;
 	ifp = sc->vtnet_ifp;
 
 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
-		eh = mtod(m, struct ether_header *);
+		struct ether_header *eh = mtod(m, struct ether_header *);
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			vtnet_vlan_tag_remove(m);
 			/*
@@ -1777,13 +1787,8 @@
 	m->m_pkthdr.flowid = rxq->vtnrx_id;
 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 
-	/*
-	 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
-	 * distinction that Linux does. Need to reevaluate if performing
-	 * offloading for the NEEDS_CSUM case is really appropriate.
-	 */
-	if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
-	    VIRTIO_NET_HDR_F_DATA_VALID)) {
+	if (hdr->flags &
+	    (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
 		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
 			rxq->vtnrx_stats.vrxs_csum++;
 		else
@@ -1805,8 +1810,7 @@
 	struct vtnet_softc *sc;
 	struct ifnet *ifp;
 	struct virtqueue *vq;
-	struct mbuf *m;
-	int len, deq, nbufs, adjsz, count;
+	int deq, count;
 
 	sc = rxq->vtnrx_sc;
 	vq = rxq->vtnrx_vq;
@@ -1817,6 +1821,9 @@
 	VTNET_RXQ_LOCK_ASSERT(rxq);
 
 	while (count-- > 0) {
+		struct mbuf *m;
+		int len, nbufs, adjsz;
+
 		m = virtqueue_dequeue(vq, &len);
 		if (m == NULL)
 			break;
@@ -1828,20 +1835,21 @@
 			continue;
 		}
 
-		if (vtnet_modern(sc) ||
-		    sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
-			/*
-			 * For our purposes here, the V1 header is the same as
-			 * the mergeable buffers header.
-			 */
+		if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
 			struct virtio_net_hdr_mrg_rxbuf *mhdr =
 			    mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
 			nbufs = vtnet_htog16(sc, mhdr->num_buffers);
 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+		} else if (vtnet_modern(sc)) {
+			nbufs = 1; /* num_buffers is always 1 */
+			adjsz = sizeof(struct virtio_net_hdr_v1);
 		} else {
 			nbufs = 1;
 			adjsz = sizeof(struct vtnet_rx_header);
-			/* Our pad between the header and start of the frame. */
+			/*
+			 * Account for our gap between the header and start of
+			 * data to keep the segments separated.
+			 */
 			len += VTNET_RX_HEADER_PAD;
 		}
 
@@ -1865,9 +1873,9 @@
 
 		/*
 		 * Save an endian swapped version of the header prior to it
-		 * being stripped. For both mergeable and non-mergeable, the
-		 * header is at the start of the mbuf data. num_buffers was
-		 * already saved (and longer need it) so use a regular header.
+		 * being stripped. The header is always at the start of the
+		 * mbuf data. num_buffers was already saved (and not needed)
+		 * so use the standard header.
 		 */
 		hdr = mtod(m, struct virtio_net_hdr *);
 		lhdr.flags = hdr->flags;
@@ -2986,31 +2994,24 @@
 vtnet_init_rx_queues(struct vtnet_softc *sc)
 {
 	device_t dev;
+	struct ifnet *ifp;
 	struct vtnet_rxq *rxq;
 	int i, clustersz, error;
 
 	dev = sc->vtnet_dev;
+	ifp = sc->vtnet_ifp;
 
-	/*
-	 * Use the new cluster size if one has been set (via a MTU
-	 * change). Otherwise, use the standard 2K clusters.
-	 *
-	 * BMV: It might make sense to use page sized clusters as
-	 * the default (depending on the features negotiated).
-	 */
-	if (sc->vtnet_rx_new_clustersz != 0) {
-		clustersz = sc->vtnet_rx_new_clustersz;
-		sc->vtnet_rx_new_clustersz = 0;
-	} else
-		clustersz = MCLBYTES;
-
+	clustersz = vtnet_rx_cluster_size(sc, ifp->if_mtu);
 	sc->vtnet_rx_clustersz = clustersz;
-	sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clustersz);
 
-	KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
-	    sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
-	    ("%s: too many rx mbufs %d for %d segments", __func__,
-	    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
+	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
+		sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
+		    VTNET_MAX_RX_SIZE, clustersz);
+		KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
+		    ("%s: too many rx mbufs %d for %d segments", __func__,
+		    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
+	} else
+		sc->vtnet_rx_nmbufs = 1;
 
 	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
 		rxq = &sc->vtnet_rxqs[i];
@@ -3021,8 +3022,7 @@
 		VTNET_RXQ_UNLOCK(rxq);
 
 		if (error) {
-			device_printf(dev,
-			    "cannot allocate mbufs for Rx queue %d\n", i);
+			device_printf(dev, "cannot populate Rx queue %d\n", i);
 			return (error);
 		}
 	}
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -153,6 +153,7 @@
 #define VTNET_FLAG_INDIRECT	 0x0400
 #define VTNET_FLAG_EVENT_IDX	 0x0800
 #define VTNET_FLAG_SUSPENDED	 0x1000
+#define VTNET_FLAG_FIXUP_NEEDS_CSUM 0x2000
 
 	int			 vtnet_link_active;
 	int			 vtnet_hdr_size;
@@ -160,7 +161,6 @@
 	int			 vtnet_rx_nsegs;
 	int			 vtnet_rx_nmbufs;
 	int			 vtnet_rx_clustersz;
-	int			 vtnet_rx_new_clustersz;
 	int			 vtnet_tx_intr_thresh;
 	int			 vtnet_tx_nsegs;
 	int			 vtnet_if_flags;
@@ -193,7 +193,7 @@
 /*
  * Maximum number of queue pairs we will autoconfigure to.
  */
-#define VTNET_MAX_QUEUE_PAIRS	8
+#define VTNET_MAX_QUEUE_PAIRS	32
 
 /*
  * Additional completed entries can appear in a virtqueue before we can
@@ -336,23 +336,18 @@
 #define VTNET_MAX_RX_SIZE	65550
 
 /*
- * Used to preallocate the Vq indirect descriptors. The first segment is
- * reserved for the header, except for mergeable buffers or modern since
- * the header is placed inline with the data.
+ * Used to preallocate the VQ indirect descriptors. Modern and mergeable
+ * buffers do not required one segment for the VirtIO header since it is
+ * placed inline at the beginning of the receive buffer.
  */
-#define VTNET_MRG_RX_SEGS	1
-#define VTNET_MODERN_RX_SEGS	1
-#define VTNET_MIN_RX_SEGS	2
-#define VTNET_MAX_RX_SEGS	34
-#define VTNET_MIN_TX_SEGS	32
-#define VTNET_MAX_TX_SEGS	64
+#define VTNET_RX_SEGS_HDR_INLINE	1
+#define VTNET_RX_SEGS_HDR_SEPARATE	2
+#define VTNET_RX_SEGS_LRO_NOMRG		34
+#define VTNET_TX_SEGS_MIN		32
+#define VTNET_TX_SEGS_MAX		64
 
-/*
- * Assert we can receive and transmit the maximum with regular
- * size clusters.
- */
-CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
-CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU);
+CTASSERT(((VTNET_RX_SEGS_LRO_NOMRG - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
+CTASSERT(((VTNET_TX_SEGS_MAX - 1) * MCLBYTES) >= VTNET_MAX_MTU);
 
 /*
  * Number of slots in the Tx bufrings. This value matches most other
@@ -360,16 +355,6 @@
  */
 #define VTNET_DEFAULT_BUFRING_SIZE	4096
 
-/*
- * Determine how many mbufs are in each receive buffer. For LRO without
- * mergeable buffers, we must allocate an mbuf chain large enough to
- * hold both the vtnet_rx_header and the maximum receivable data.
- */
-#define VTNET_NEEDED_RX_MBUFS(_sc, _clustersz)				\
-	((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 :		\
-	    howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE,	\
-	        (_clustersz))
-
 #define VTNET_CORE_MTX(_sc)		&(_sc)->vtnet_mtx
 #define VTNET_CORE_LOCK(_sc)		mtx_lock(VTNET_CORE_MTX((_sc)))
 #define VTNET_CORE_UNLOCK(_sc)		mtx_unlock(VTNET_CORE_MTX((_sc)))