Page MenuHomeFreeBSD

D4824.diff
No OneTemporary

D4824.diff

Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.h
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.h
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h
@@ -43,6 +43,8 @@
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/sx.h>
+#include <netinet/in.h>
+#include <netinet/tcp_lro.h>
#include <dev/hyperv/include/hyperv.h>
@@ -993,6 +995,17 @@
int temp_unusable;
struct hv_device *hn_dev_obj;
netvsc_dev *net_dev;
+
+ struct lro_ctrl hn_lro;
+ int hn_lro_hiwat;
+
+ /* Trust tcp segments verification on host side */
+ int hn_trust_hosttcp;
+
+ u_long hn_csum_ip;
+ u_long hn_csum_tcp;
+ u_long hn_csum_trusted;
+ u_long hn_lro_tried;
} hn_softc_t;
Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.c
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.c
@@ -919,6 +919,7 @@
*/
hv_nv_on_receive_completion(device, vm_xfer_page_pkt->d.transaction_id,
status);
+ hv_rf_receive_rollup(net_dev);
}
/*
Index: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
+++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
@@ -69,6 +69,7 @@
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/sx.h>
+#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_arp.h>
@@ -138,6 +139,15 @@
CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP| \
CSUM_IP6_TSO|CSUM_IP6_ISCSI)
+/* XXX move to netinet/tcp_lro.h */
+#define HN_LRO_HIWAT_MAX 65535
+#define HN_LRO_HIWAT_DEF HN_LRO_HIWAT_MAX
+/* YYY 2*MTU is a bit rough, but should be good enough. */
+#define HN_LRO_HIWAT_MTULIM(ifp) (2 * (ifp)->if_mtu)
+#define HN_LRO_HIWAT_ISVALID(sc, hiwat) \
+ ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) || \
+ (hiwat) <= HN_LRO_HIWAT_MAX)
+
/*
* Data types
*/
@@ -171,6 +181,9 @@
/* The one and only one */
static struct hv_netvsc_driver_context g_netvsc_drv;
+/* Trust tcp segements verification on host side. */
+static int hn_trust_hosttcp = 0;
+TUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp);
/*
* Forward declarations
@@ -181,6 +194,19 @@
static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
static int hn_start_locked(struct ifnet *ifp);
static void hn_start(struct ifnet *ifp);
+#ifdef HN_LRO_HIWAT
+static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS);
+#endif
+static int hn_check_iplen(const struct mbuf *, int);
+
+static __inline void
+hn_set_lro_hiwat(struct hn_softc *sc, int hiwat)
+{
+ sc->hn_lro_hiwat = hiwat;
+#ifdef HN_LRO_HIWAT
+ sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
+#endif
+}
/*
* NetVsc get message transport protocol type
@@ -310,6 +336,8 @@
hn_softc_t *sc;
int unit = device_get_unit(dev);
struct ifnet *ifp;
+ struct sysctl_oid_list *child;
+ struct sysctl_ctx_list *ctx;
int ret;
netvsc_init();
@@ -322,6 +350,8 @@
bzero(sc, sizeof(hn_softc_t));
sc->hn_unit = unit;
sc->hn_dev = dev;
+ sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF;
+ sc->hn_trust_hosttcp = hn_trust_hosttcp;
NV_LOCK_INIT(sc, "NetVSCLock");
@@ -349,9 +379,11 @@
*/
ifp->if_hdrlen = sizeof(struct ether_vlan_header);
ifp->if_capabilities |=
- IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO;
+ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
+ IFCAP_LRO;
ifp->if_capenable |=
- IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO;
+ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
+ IFCAP_LRO;
/*
* Only enable UDP checksum offloading when it is on 2012R2 or
* later. UDP checksum offloading doesn't work on earlier
@@ -372,8 +404,59 @@
sc->hn_carrier = 1;
}
+ tcp_lro_init(&sc->hn_lro);
+ /* Driver private LRO settings */
+ sc->hn_lro.ifp = ifp;
+#ifdef HN_LRO_HIWAT
+ sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
+#endif
+
ether_ifattach(ifp, device_info.mac_addr);
+ ctx = device_get_sysctl_ctx(dev);
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued",
+ CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed",
+ CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried",
+ CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries");
+#ifdef HN_LRO_HIWAT
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat",
+ CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl,
+ "I", "LRO high watermark");
+#endif
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp",
+ CTLFLAG_RW, &sc->hn_trust_hosttcp, 0,
+ "Trust tcp segement verification on host side, "
+ "when csum info is missing");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip",
+ CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp",
+ CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted",
+ CTLFLAG_RW, &sc->hn_csum_trusted,
+ "# of TCP segements that we trust host's csum verification");
+
+ if (unit == 0) {
+ struct sysctl_ctx_list *dc_ctx;
+ struct sysctl_oid_list *dc_child;
+ devclass_t dc;
+
+ /*
+ * Add sysctl nodes for devclass
+ */
+ dc = device_get_devclass(dev);
+ dc_ctx = devclass_get_sysctl_ctx(dc);
+ dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc));
+
+ SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp",
+ CTLFLAG_RD, &hn_trust_hosttcp, 0,
+ "Trust tcp segement verification on host side, "
+ "when csum info is missing (global setting)");
+ }
+
return (0);
}
@@ -383,6 +466,7 @@
static int
netvsc_detach(device_t dev)
{
+ struct hn_softc *sc = device_get_softc(dev);
struct hv_device *hv_device = vmbus_get_devctx(dev);
if (bootverbose)
@@ -401,6 +485,8 @@
hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
+ tcp_lro_free(&sc->hn_lro);
+
return (0);
}
@@ -887,7 +973,7 @@
struct mbuf *m_new;
struct ifnet *ifp;
device_t dev = device_ctx->device;
- int size;
+ int size, do_lro = 0;
if (sc == NULL) {
return (0); /* TODO: KYS how can this be! */
@@ -938,6 +1024,7 @@
if (csum_info->receive.ip_csum_succeeded) {
m_new->m_pkthdr.csum_flags |=
(CSUM_IP_CHECKED | CSUM_IP_VALID);
+ sc->hn_csum_ip++;
}
/* TCP csum offload */
@@ -945,9 +1032,50 @@
m_new->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m_new->m_pkthdr.csum_data = 0xffff;
+ sc->hn_csum_tcp++;
}
- }
+ if (csum_info->receive.ip_csum_succeeded &&
+ csum_info->receive.tcp_csum_succeeded)
+ do_lro = 1;
+ } else {
+ const struct ether_header *eh;
+ uint16_t etype;
+ int hoff;
+
+ hoff = sizeof(*eh);
+ if (m_new->m_len < hoff)
+ goto skip;
+ eh = mtod(m_new, struct ether_header *);
+ etype = ntohs(eh->ether_type);
+ if (etype == ETHERTYPE_VLAN) {
+ const struct ether_vlan_header *evl;
+
+ hoff = sizeof(*evl);
+ if (m_new->m_len < hoff)
+ goto skip;
+ evl = mtod(m_new, struct ether_vlan_header *);
+ etype = ntohs(evl->evl_proto);
+ }
+
+ if (etype == ETHERTYPE_IP) {
+ int pr;
+
+ pr = hn_check_iplen(m_new, hoff);
+ if (pr == IPPROTO_TCP) {
+ if (sc->hn_trust_hosttcp) {
+ sc->hn_csum_trusted++;
+ m_new->m_pkthdr.csum_flags |=
+ (CSUM_IP_CHECKED | CSUM_IP_VALID |
+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ m_new->m_pkthdr.csum_data = 0xffff;
+ }
+ /* Rely on SW csum verification though... */
+ do_lro = 1;
+ }
+ }
+ }
+skip:
if ((packet->vlan_tci != 0) &&
(ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
m_new->m_pkthdr.ether_vtag = packet->vlan_tci;
@@ -961,12 +1089,37 @@
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
+ struct lro_ctrl *lro = &sc->hn_lro;
+
+ if (lro->lro_cnt) {
+ sc->hn_lro_tried++;
+ if (tcp_lro_rx(lro, m_new, 0) == 0) {
+ /* DONE! */
+ return 0;
+ }
+ }
+ }
+
/* We're not holding the lock here, so don't release it */
(*ifp->if_input)(ifp, m_new);
return (0);
}
+void
+netvsc_recv_rollup(struct hv_device *device_ctx)
+{
+ hn_softc_t *sc = device_get_softc(device_ctx->device);
+ struct lro_ctrl *lro = &sc->hn_lro;
+ struct lro_entry *queued;
+
+ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
+ SLIST_REMOVE_HEAD(&lro->lro_active, next);
+ tcp_lro_flush(lro, queued);
+ }
+}
+
/*
* Rules for using sc->temp_unusable:
* 1. sc->temp_unusable can only be read or written while holding NV_LOCK()
@@ -1022,7 +1175,13 @@
/* Obtain and record requested MTU */
ifp->if_mtu = ifr->ifr_mtu;
-
+ /*
+ * Make sure that LRO high watermark is still valid,
+ * after MTU change (the 2*MTU limit).
+ */
+ if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat))
+ hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp));
+
do {
NV_LOCK(sc);
if (!sc->temp_unusable) {
@@ -1147,6 +1306,8 @@
ifp->if_capenable |= IFCAP_RXCSUM;
}
}
+ if (mask & IFCAP_LRO)
+ ifp->if_capenable ^= IFCAP_LRO;
if (mask & IFCAP_TSO4) {
ifp->if_capenable ^= IFCAP_TSO4;
@@ -1292,6 +1453,102 @@
}
#endif
+#ifdef HN_LRO_HIWAT
+static int
+hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int hiwat, error;
+
+ hiwat = sc->hn_lro_hiwat;
+ error = sysctl_handle_int(oidp, &hiwat, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+
+ if (!HN_LRO_HIWAT_ISVALID(sc, hiwat))
+ return EINVAL;
+
+ if (sc->hn_lro_hiwat != hiwat)
+ hn_set_lro_hiwat(sc, hiwat);
+ return 0;
+}
+#endif /* HN_LRO_HIWAT */
+
+static int
+hn_check_iplen(const struct mbuf *m, int hoff)
+{
+ const struct ip *ip;
+ int len, iphlen, iplen;
+ const struct tcphdr *th;
+ int thoff; /* TCP data offset */
+
+ len = hoff + sizeof(struct ip);
+
+ /* The packet must be at least the size of an IP header. */
+ if (m->m_pkthdr.len < len)
+ return IPPROTO_DONE;
+
+ /* The fixed IP header must reside completely in the first mbuf. */
+ if (m->m_len < len)
+ return IPPROTO_DONE;
+
+ ip = mtodo(m, hoff);
+
+ /* Bound check the packet's stated IP header length. */
+ iphlen = ip->ip_hl << 2;
+ if (iphlen < sizeof(struct ip)) /* minimum header length */
+ return IPPROTO_DONE;
+
+ /* The full IP header must reside completely in the one mbuf. */
+ if (m->m_len < hoff + iphlen)
+ return IPPROTO_DONE;
+
+ iplen = ntohs(ip->ip_len);
+
+ /*
+ * Check that the amount of data in the buffers is as
+ * at least much as the IP header would have us expect.
+ */
+ if (m->m_pkthdr.len < hoff + iplen)
+ return IPPROTO_DONE;
+
+ /*
+ * Ignore IP fragments.
+ */
+ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
+ return IPPROTO_DONE;
+
+ /*
+ * The TCP/IP or UDP/IP header must be entirely contained within
+ * the first fragment of a packet.
+ */
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ if (iplen < iphlen + sizeof(struct tcphdr))
+ return IPPROTO_DONE;
+ if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
+ return IPPROTO_DONE;
+ th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
+ thoff = th->th_off << 2;
+ if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
+ return IPPROTO_DONE;
+ if (m->m_len < hoff + iphlen + thoff)
+ return IPPROTO_DONE;
+ break;
+ case IPPROTO_UDP:
+ if (iplen < iphlen + sizeof(struct udphdr))
+ return IPPROTO_DONE;
+ if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
+ return IPPROTO_DONE;
+ break;
+ default:
+ if (iplen < iphlen)
+ return IPPROTO_DONE;
+ break;
+ }
+ return ip->ip_p;
+}
+
static device_method_t netvsc_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, netvsc_probe),
Index: head/sys/dev/hyperv/netvsc/hv_rndis.h
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis.h
+++ head/sys/dev/hyperv/netvsc/hv_rndis.h
@@ -1049,6 +1049,7 @@
int netvsc_recv(struct hv_device *device_ctx,
netvsc_packet *packet,
rndis_tcp_ip_csum_info *csum_info);
+void netvsc_recv_rollup(struct hv_device *device_ctx);
void* hv_set_rppi_data(rndis_msg *rndis_mesg,
uint32_t rppi_size,
Index: head/sys/dev/hyperv/netvsc/hv_rndis_filter.h
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.h
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.h
@@ -98,6 +98,7 @@
int hv_rf_on_receive(netvsc_dev *net_dev,
struct hv_device *device, netvsc_packet *pkt);
+void hv_rf_receive_rollup(netvsc_dev *net_dev);
int hv_rf_on_device_add(struct hv_device *device, void *additl_info);
int hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel);
int hv_rf_on_open(struct hv_device *device);
Index: head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
@@ -963,3 +963,14 @@
request->halt_complete_flag = 1;
}
+/*
+ * RNDIS filter when "all" reception is done
+ */
+void
+hv_rf_receive_rollup(netvsc_dev *net_dev)
+{
+ rndis_device *rndis_dev;
+
+ rndis_dev = (rndis_device *)net_dev->extension;
+ netvsc_recv_rollup(rndis_dev->net_dev->dev);
+}

File Metadata

Mime Type
text/plain
Expires
Thu, Nov 20, 2:07 PM (6 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25729398
Default Alt Text
D4824.diff (13 KB)

Event Timeline