diff --git a/share/man/man9/mbuf.9 b/share/man/man9/mbuf.9 --- a/share/man/man9/mbuf.9 +++ b/share/man/man9/mbuf.9 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 29, 2025 +.Dd August 1, 2025 .Dt MBUF 9 .Os .\" @@ -1102,8 +1102,7 @@ .Vt int Va csum_flags and .Vt int Va csum_data . -The meaning of those fields depends on the direction a packet flows in, -and on whether the packet is fragmented. +The meaning of those fields depends on whether the packet is fragmented. Henceforth, .Va csum_flags or @@ -1117,14 +1116,14 @@ .Vt mbuf chain containing the packet. .Pp -On output, the computation of the checksum is delayed until the outgoing -interface has been determined for a packet. +When a packet is sent by SCTP, TCP, or UDP, the computation of the checksum +is delayed until the outgoing interface has been determined for a packet. The interface-specific field .Va ifnet.if_data.ifi_hwassist (see .Xr ifnet 9 ) -is consulted for the capabilities of the interface to assist in -computing checksums. +is consulted by IP for the capabilities of the network interface selected for +output to assist in computing checksums. The .Va csum_flags field of the packet header is set to indicate which actions the interface @@ -1163,8 +1162,8 @@ In the case of SCTP, the checksum field will be initially set by the SCTP implementation to 0. .Pp -On input, an interface indicates the actions it has performed -on a packet by setting one or more of the following flags in +When a packet is received by an interface, it indicates the actions it has +performed on a packet by setting one or more of the following flags in .Va csum_flags associated with the packet: .Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent @@ -1215,6 +1214,21 @@ in .Va csum_flags is not set, since SCTP does not use a pseudo header checksum. +.Pp +If IP delivers a packet with the flags +.Dv CSUM_SCTP , +.Dv CSUM_TCP , +or +.Dv CSUM_UDP +set in +.Va csum_flags +to a local SCTP, TCP, or UDP stack, the packet will be processed without +computing or validating the checksum, since the packet has not been on the +wire. +This can happen if the packet was handled by a virtual interface such as +.Xr tap 4 +or +.Xr epair 4 . .Sh STRESS TESTING When running a kernel compiled with the option .Dv MBUF_STRESS_TEST , diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -69,6 +69,7 @@ #include #include "opt_ipstealth.h" +#include "opt_sctp.h" #include #include @@ -102,6 +103,10 @@ #include +#if defined(SCTP) || defined(SCTP_SUPPORT) +#include +#endif + #define V_ipsendredirects VNET(ipsendredirects) static struct mbuf * @@ -460,6 +465,23 @@ } else gw = (const struct sockaddr *)dst; + /* + * If TCP/UDP header still needs a valid checksum and interface will not + * calculate it for us, do it here. + */ + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & + ~nh->nh_ifp->if_hwassist)) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP_SCTP & + ~nh->nh_ifp->if_hwassist)) { + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + m->m_pkthdr.csum_flags &= ~CSUM_IP_SCTP; + } +#endif + /* Handle redirect case. */ redest.s_addr = 0; if (V_ipsendredirects && osrc.s_addr == ip->ip_src.s_addr && diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -5780,7 +5780,11 @@ goto out; } ecn_bits = ip->ip_tos; - if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) { + if (m->m_pkthdr.csum_flags & (CSUM_SCTP_VALID | CSUM_IP_SCTP)) { + /* + * Packet with CSUM_IP_SCTP were sent from local host using + * checksum offloading. Checksum not required. + */ SCTP_STAT_INCR(sctps_recvhwcrc); compute_crc = 0; } else { diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -650,6 +650,12 @@ th->th_sum = in6_cksum_pseudo(ip6, tlen, IPPROTO_TCP, m->m_pkthdr.csum_data); th->th_sum ^= 0xffff; + } else if (m->m_pkthdr.csum_flags & CSUM_IP6_TCP) { + /* + * Packet from local host (maybe from a VM). + * Checksum not required. + */ + th->th_sum = 0; } else th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen); if (th->th_sum) { @@ -710,6 +716,12 @@ htonl(m->m_pkthdr.csum_data + tlen + IPPROTO_TCP)); th->th_sum ^= 0xffff; + } else if (m->m_pkthdr.csum_flags & CSUM_IP_TCP) { + /* + * Packet from local host (maybe from a VM). + * Checksum not required. + */ + th->th_sum = 0; } else { struct ipovly *ipov = (struct ipovly *)ip; diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -560,6 +560,12 @@ ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + proto)); uh_sum ^= 0xffff; + } else if (m->m_pkthdr.csum_flags & CSUM_IP_UDP) { + /* + * Packet from local host (maybe from a VM). + * Checksum not required. + */ + uh_sum = 0; } else { char b[offsetof(struct ipovly, ih_src)]; struct ipovly *ipov = (struct ipovly *)ip; diff --git a/sys/netinet6/ip6_fastfwd.c b/sys/netinet6/ip6_fastfwd.c --- a/sys/netinet6/ip6_fastfwd.c +++ b/sys/netinet6/ip6_fastfwd.c @@ -27,6 +27,7 @@ #include #include "opt_inet6.h" #include "opt_ipstealth.h" +#include "opt_sctp.h" #include #include @@ -54,6 +55,10 @@ #include #include +#if defined(SCTP) || defined(SCTP_SUPPORT) +#include +#endif + static int ip6_findroute(struct nhop_object **pnh, const struct sockaddr_in6 *dst, struct mbuf *m) @@ -277,6 +282,29 @@ ip6->ip6_hlim -= IPV6_HLIMDEC; } + /* + * If TCP/UDP header still needs a valid checksum and interface will not + * calculate it for us, do it here. + */ + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & + ~nh->nh_ifp->if_hwassist)) { + int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL); + + if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len) + goto drop; + in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP & + ~nh->nh_ifp->if_hwassist)) { + int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL); + + sctp_delayed_cksum(m, offset); + m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP; + } +#endif + m_clrprotoflags(m); /* Avoid confusing lower layers. */ IP_PROBE(send, NULL, NULL, ip6, nh->nh_ifp, NULL, ip6); diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -75,6 +75,10 @@ #include +#if defined(SCTP) || defined(SCTP_SUPPORT) +#include +#endif + /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful @@ -389,6 +393,29 @@ goto bad; } + /* + * If TCP/UDP header still needs a valid checksum and interface will not + * calculate it for us, do it here. + */ + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & + ~nh->nh_ifp->if_hwassist)) { + int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL); + + if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len) + goto bad; + in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP & + ~nh->nh_ifp->if_hwassist)) { + int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL); + + sctp_delayed_cksum(m, offset); + m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP; + } +#endif + /* Currently LLE layer stores embedded IPv6 addresses */ if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6_addr)) { in6_set_unicast_scopeid(&dst.sin6_addr, dst.sin6_scope_id); diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c --- a/sys/netinet6/sctp6_usrreq.c +++ b/sys/netinet6/sctp6_usrreq.c @@ -139,7 +139,11 @@ goto out; } ecn_bits = IPV6_TRAFFIC_CLASS(ip6); - if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) { + if (m->m_pkthdr.csum_flags & (CSUM_SCTP_VALID | CSUM_IP6_SCTP)) { + /* + * Packet with CSUM_IP6_SCTP were sent from local host using + * checksum offloading. Checksum not required. + */ SCTP_STAT_INCR(sctps_recvhwcrc); compute_crc = 0; } else { diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -434,6 +434,12 @@ uh_sum = in6_cksum_pseudo(ip6, ulen, nxt, m->m_pkthdr.csum_data); uh_sum ^= 0xffff; + } else if (m->m_pkthdr.csum_flags & CSUM_IP6_UDP) { + /* + * Packet from local host (maybe from a VM). + * Checksum not required. + */ + uh_sum = 0; } else uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -641,16 +641,15 @@ /* * Flags indicating checksum, segmentation and other offload work to be - * done, or already done, by hardware or lower layers. It is split into - * separate inbound and outbound flags. + * done, or already done, by hardware or lower layers. * - * Outbound flags that are set by upper protocol layers requesting lower + * Flags that are set by upper protocol layers requesting lower * layers, or ideally the hardware, to perform these offloading tasks. - * For outbound packets this field and its flags can be directly tested - * against ifnet if_hwassist. Note that the outbound and the inbound flags do - * not collide right now but they could be allowed to (as long as the flags are - * scrubbed appropriately when the direction of an mbuf changes). CSUM_BITS - * would also have to split into CSUM_BITS_TX and CSUM_BITS_RX. + * Before passing packets to a network interface this field and its flags can + * be directly tested against ifnet if_hwassist. Note that the flags + * CSUM_IP_SCTP, CSUM_IP_TCP, and CSUM_IP_UDP can appear on input processing + * of SCTP, TCP, and UDP. In such a case the checksum will not be computed or + * validated by SCTP, TCP, or TCP, since the packet has not been on the wire. * * CSUM_INNER_ is the same as CSUM_ but it applies to the inner frame. * The CSUM_ENCAP_ bits identify the outer encapsulation. @@ -679,7 +678,7 @@ #define CSUM_ENCAP_VXLAN 0x00040000 /* VXLAN outer encapsulation */ #define CSUM_ENCAP_RSVD1 0x00080000 -/* Inbound checksum support where the checksum was verified by hardware. */ +/* Flags used to indicate that the checksum was verified by hardware. */ #define CSUM_INNER_L3_CALC 0x00100000 #define CSUM_INNER_L3_VALID 0x00200000 #define CSUM_INNER_L4_CALC 0x00400000