Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -400,6 +400,8 @@ DUMMYNET opt_ipdn.h INET opt_inet.h INET6 opt_inet6.h +LRO_ENTRIES opt_lro.h +LRO_PAYLOAD_MAX opt_lro.h IPDIVERT IPFILTER opt_ipfilter.h IPFILTER_DEFAULT_BLOCK opt_ipfilter.h Index: sys/netinet/ip_input.c =================================================================== --- sys/netinet/ip_input.c +++ sys/netinet/ip_input.c @@ -442,7 +442,8 @@ struct ifaddr *ifa; struct ifnet *ifp; int checkif, hlen = 0; - uint16_t sum, ip_len; + uint32_t ip_len; + uint16_t sum; int dchg = 0; /* dest changed after fw */ struct in_addr odst; /* original dst address */ @@ -519,7 +520,10 @@ return; #endif - ip_len = ntohs(ip->ip_len); + if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) + ip_len = m->m_pkthdr.len; + else + ip_len = ntohs(ip->ip_len); if (ip_len < hlen) { IPSTAT_INC(ips_badlen); goto bad; @@ -1443,6 +1447,7 @@ struct in_addr dest; struct route ro; int error, type = 0, code = 0, mtu = 0; + int ip_len; if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { IPSTAT_INC(ips_cantforward); @@ -1508,8 +1513,14 @@ m_free(mcopy); mcopy = NULL; } + + if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) + ip_len = m->m_pkthdr.len; + else + ip_len = ntohs(ip->ip_len); + if (mcopy != NULL) { - mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); + mcopy->m_len = min(ip_len, M_TRAILINGSPACE(mcopy)); mcopy->m_pkthdr.len = mcopy->m_len; m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); } @@ -1637,7 +1648,7 @@ if (ia != NULL) mtu = ia->ia_ifp->if_mtu; else - mtu = ip_next_mtu(ntohs(ip->ip_len), 0); + mtu = ip_next_mtu(ip_len, 0); } IPSTAT_INC(ips_cantfrag); break; Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c +++ sys/netinet/ip_output.c @@ -126,13 +126,14 @@ struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m0; int hlen = sizeof (struct ip); + int ip_len; int mtu; int error = 0; struct sockaddr_in *dst; const struct sockaddr_in *gw; struct in_ifaddr *ia; int isbroadcast; - uint16_t ip_len, ip_off; + uint16_t ip_off; struct route iproute; struct rtentry *rte; /* cache for ro->ro_rt */ struct in_addr odst; @@ -171,7 +172,11 @@ hlen = len; /* ip->ip_hl is updated above */ } ip = mtod(m, struct ip *); - ip_len = ntohs(ip->ip_len); + + if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) + ip_len = m->m_pkthdr.len; + else + ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); /* @@ -701,9 +706,13 @@ int firstlen; struct mbuf **mnext; int nfrags; - uint16_t ip_len, ip_off; + int ip_len; + uint16_t ip_off; - ip_len = ntohs(ip->ip_len); + if (M_HASHTYPE_GET(m0) == M_HASHTYPE_LRO_TCP) + ip_len = m0->m_pkthdr.len; + else + ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); if (ip_off & IP_DF) { /* Fragmentation not allowed */ Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -644,7 +644,10 @@ ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)((caddr_t)ip6 + off0); - tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; + if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) + tlen = m->m_pkthdr.len - off0; + else + tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; @@ -695,8 +698,10 @@ } ip = mtod(m, struct ip *); th = (struct tcphdr *)((caddr_t)ip + off0); - tlen = ntohs(ip->ip_len) - off0; - + if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) + tlen = m->m_pkthdr.len - off0; + else + tlen = ntohs(ip->ip_len) - off0; if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; Index: sys/netinet/tcp_lro.c =================================================================== --- sys/netinet/tcp_lro.c +++ sys/netinet/tcp_lro.c @@ -32,6 +32,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_lro.h" #include "opt_inet.h" #include "opt_inet6.h" @@ -62,6 +63,14 @@ #define LRO_ENTRIES 8 /* # of LRO entries per RX queue. */ #endif +#ifndef LRO_PAYLOAD_MAX +#define LRO_PAYLOAD_MAX IP_MAXPACKET +#endif + +#if (LRO_PAYLOAD_MAX < 65535) +#error "LRO_PAYLOAD_MAX must be at least 65535 bytes" +#endif + #define TCP_LRO_UPDATE_CSUM 1 #ifndef TCP_LRO_UPDATE_CSUM #define TCP_LRO_INVALID_CSUM 0x0000 @@ -219,8 +228,20 @@ if (le->append_cnt > 0) { struct tcphdr *th; uint16_t p_len; - - p_len = htons(le->p_len); + /* + * The TCP/IP stack should use the "m_pkthdr.len" + * field instead of the IP-payload length field to + * compute the total TCP payload length when it + * recognizes the M_HASHTYPE_LRO_TCP hash type. This + * allows accumulation of more than 64Kbytes worth of + * payload data. + */ + if (le->p_len > IP_MAXPACKET) { + M_HASHTYPE_SET(le->m_head, M_HASHTYPE_LRO_TCP); + p_len = htons(IP_MAXPACKET); + } else { + p_len = htons(le->p_len); + } switch (le->eh_type) { #ifdef INET6 case ETHERTYPE_IPV6: @@ -501,7 +522,7 @@ } /* Flush now if appending will result in overflow. */ - if (le->p_len > (65535 - tcp_data_len)) { + if (le->p_len > (LRO_PAYLOAD_MAX - tcp_data_len)) { SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); tcp_lro_flush(lc, le); break; @@ -559,7 +580,7 @@ * If a possible next full length packet would cause an * overflow, pro-actively flush now. */ - if (le->p_len > (65535 - lc->ifp->if_mtu)) { + if (le->p_len > (LRO_PAYLOAD_MAX - lc->ifp->if_mtu)) { SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); tcp_lro_flush(lc, le); } else Index: sys/sys/mbuf.h =================================================================== --- sys/sys/mbuf.h +++ sys/sys/mbuf.h @@ -302,6 +302,7 @@ #define M_HASHTYPE_RSS_UDP_IPV6 9 /* IPv6 UDP 4-tuple */ #define M_HASHTYPE_RSS_UDP_IPV6_EX 10 /* IPv6 UDP 4-tuple + ext hdrs */ +#define M_HASHTYPE_LRO_TCP 254 /* TCP large receive offload */ #define M_HASHTYPE_OPAQUE 255 /* ordering, not affinity */ #define M_HASHTYPE_CLEAR(m) ((m)->m_pkthdr.rsstype = 0)