Page MenuHomeFreeBSD

D51475.id159201.diff
No OneTemporary

D51475.id159201.diff

diff --git a/share/man/man9/mbuf.9 b/share/man/man9/mbuf.9
--- a/share/man/man9/mbuf.9
+++ b/share/man/man9/mbuf.9
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd December 28, 2023
+.Dd July 25, 2025
.Dt MBUF 9
.Os
.\"
@@ -1091,7 +1091,7 @@
altered prior to transmission.
.El
.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
-This section currently applies to TCP/IP only.
+This section currently applies to TCP/IP and SCTP only.
In order to save the host CPU resources, computing checksums is
offloaded to the network interface hardware if possible.
The
@@ -1135,34 +1135,29 @@
.Va csum_flags .
.Pp
The flags demanding a particular action from an interface are as follows:
-.Bl -tag -width ".Dv CSUM_TCP" -offset indent
+.Bl -ohang -offset indent
.It Dv CSUM_IP
The IP header checksum is to be computed and stored in the
corresponding field of the packet.
The hardware is expected to know the format of an IP header
to determine the offset of the IP checksum field.
-.It Dv CSUM_TCP
-The TCP checksum is to be computed.
-(See below.)
-.It Dv CSUM_UDP
-The UDP checksum is to be computed.
-(See below.)
-.El
-.Pp
-Should a TCP or UDP checksum be offloaded to the hardware,
-the field
+.It Dv CSUM_IP_TCP CSUM_IP_UDP CSUM_IP_SCTP
+The TCP, UDP, or SCTP checksum is to be computed and stored in the
+corresponding field of the packet.
+To assist the hardware, the field
.Va csum_data
will contain the byte offset of the checksum field relative to the
end of the IP header.
In this case, the checksum field will be initially
set by the TCP/IP module to the checksum of the pseudo header
-defined by the TCP and UDP specifications.
+defined by the TCP and UDP specifications or by the SCTP module to zero.
+.El
.Pp
On input, an interface indicates the actions it has performed
on a packet by setting one or more of the following flags in
.Va csum_flags
associated with the packet:
-.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
+.Bl -ohang -offset indent
.It Dv CSUM_IP_CHECKED
The IP header checksum has been computed.
.It Dv CSUM_IP_VALID
@@ -1203,6 +1198,17 @@
calculated over any valid packet will be
.Li 0xFFFF
as long as the original checksum field is included.
+.Pp
+Note that the flag
+.Dv CSUM_IP_TCP ,
+.Dv CSUM_IP_UDP ,
+or
+.Dv CSUM_IP_SCTP
+can appear on input if a packet sent by the local host with checksum
+offloading switched to the input path (e.g., due to a virtual interface
+such as tap or epair).
+The TCP, UDP, or SCTP checksum is still incorrect but will be ignored because
+the packet has not been on the wire.
.Sh STRESS TESTING
When running a kernel compiled with the option
.Dv MBUF_STRESS_TEST ,
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -66,9 +66,9 @@
#include <net/if_var.h>
#include <net/if_clone.h>
#include <net/if_media.h>
-#include <net/if_var.h>
#include <net/if_private.h>
#include <net/if_types.h>
+#include <net/if_vlan_var.h>
#include <net/netisr.h>
#ifdef RSS
#include <net/rss_config.h>
@@ -96,6 +96,7 @@
#define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx)
#define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx)
#define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx)
+#define EPAIR_LOCK_ASSERT() mtx_assert(&epair_n_index_mtx, MA_OWNED);
struct epair_softc;
struct epair_queue {
@@ -425,6 +426,23 @@
imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
}
+/*
+ * To be called under EPAIR_LOCK. Update ifp->if_hwassist according to the
+ * current value of ifp->if_capenable.
+ */
+static void
+epair_caps_changed(struct ifnet *ifp)
+{
+ uint64_t hwassist = 0;
+
+ EPAIR_LOCK_ASSERT();
+ if (ifp->if_capenable & IFCAP_TXCSUM)
+ hwassist |= CSUM_IP_TCP | CSUM_IP_UDP;
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+ hwassist |= CSUM_IP6_TCP | CSUM_IP6_UDP;
+ ifp->if_hwassist = hwassist;
+}
+
static int
epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
@@ -452,6 +470,33 @@
error = 0;
break;
+ case SIOCGIFCAP:
+ ifr->ifr_reqcap = ifp->if_capabilities;
+ ifr->ifr_curcap = ifp->if_capenable;
+ error = 0;
+ break;
+ case SIOCSIFCAP:
+ sc = ifp->if_softc;
+ EPAIR_LOCK();
+ ifp->if_capenable = ifr->ifr_reqcap & ifp->if_capabilities;
+ epair_caps_changed(ifp);
+ /*
+ * If IFCAP_TXCSUM(_IPV6) has been changed, change it on the
+ * other epair interface as well.
+ */
+ if ((ifp->if_capenable ^ sc->oifp->if_capenable) &
+ (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6)) {
+ sc->oifp->if_capenable &=
+ ~(IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6);
+ sc->oifp->if_capenable |= ifp->if_capenable &
+ (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6);
+ epair_caps_changed(sc->oifp);
+ }
+ EPAIR_UNLOCK();
+ VLAN_CAPABILITIES(ifp);
+ error = 0;
+ break;
+
default:
/* Let the common ethernet handler process this. */
error = ether_ioctl(ifp, cmd, data);
@@ -549,8 +594,12 @@
ifp->if_dname = epairname;
ifp->if_dunit = unit;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_capabilities = IFCAP_VLAN_MTU;
- ifp->if_capenable = IFCAP_VLAN_MTU;
+ EPAIR_LOCK();
+ ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_TXCSUM |
+ IFCAP_TXCSUM_IPV6;
+ ifp->if_capenable = IFCAP_VLAN_MTU | IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
+ epair_caps_changed(ifp);
+ EPAIR_UNLOCK();
ifp->if_transmit = epair_transmit;
ifp->if_qflush = epair_qflush;
ifp->if_start = epair_start;
diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c
--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@@ -69,6 +69,7 @@
#include <sys/cdefs.h>
#include "opt_ipstealth.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -102,6 +103,10 @@
#include <machine/in_cksum.h>
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+#include <netinet/sctp_crc32.h>
+#endif
+
#define V_ipsendredirects VNET(ipsendredirects)
static struct mbuf *
@@ -460,6 +465,23 @@
} else
gw = (const struct sockaddr *)dst;
+ /*
+ * If TCP/UDP header still needs a valid checksum and interface will not
+ * calculate it for us, do it here.
+ */
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &
+ ~nh->nh_ifp->if_hwassist)) {
+ in_delayed_cksum(m);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP_SCTP &
+ ~nh->nh_ifp->if_hwassist)) {
+ sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+ m->m_pkthdr.csum_flags &= ~CSUM_IP_SCTP;
+ }
+#endif
+
/* Handle redirect case. */
redest.s_addr = 0;
if (V_ipsendredirects && osrc.s_addr == ip->ip_src.s_addr &&
diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c
--- a/sys/netinet/sctp_input.c
+++ b/sys/netinet/sctp_input.c
@@ -5783,6 +5783,13 @@
if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
SCTP_STAT_INCR(sctps_recvhwcrc);
compute_crc = 0;
+ } if (m->m_pkthdr.csum_flags & CSUM_IP_SCTP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ SCTP_STAT_INCR(sctps_recvzerocrc);
+ compute_crc = 0;
} else {
SCTP_STAT_INCR(sctps_recvswcrc);
compute_crc = 1;
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -649,6 +649,12 @@
th->th_sum = in6_cksum_pseudo(ip6, tlen,
IPPROTO_TCP, m->m_pkthdr.csum_data);
th->th_sum ^= 0xffff;
+ } else if (m->m_pkthdr.csum_flags & CSUM_IP6_TCP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ th->th_sum = 0;
} else
th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
if (th->th_sum) {
@@ -709,6 +715,12 @@
htonl(m->m_pkthdr.csum_data + tlen +
IPPROTO_TCP));
th->th_sum ^= 0xffff;
+ } else if (m->m_pkthdr.csum_flags & CSUM_IP_TCP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ th->th_sum = 0;
} else {
struct ipovly *ipov = (struct ipovly *)ip;
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -555,6 +555,12 @@
ip->ip_dst.s_addr, htonl((u_short)len +
m->m_pkthdr.csum_data + proto));
uh_sum ^= 0xffff;
+ } else if (m->m_pkthdr.csum_flags & CSUM_IP_UDP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ uh_sum = 0;
} else {
char b[offsetof(struct ipovly, ih_src)];
struct ipovly *ipov = (struct ipovly *)ip;
diff --git a/sys/netinet6/ip6_fastfwd.c b/sys/netinet6/ip6_fastfwd.c
--- a/sys/netinet6/ip6_fastfwd.c
+++ b/sys/netinet6/ip6_fastfwd.c
@@ -27,6 +27,7 @@
#include <sys/cdefs.h>
#include "opt_inet6.h"
#include "opt_ipstealth.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -54,6 +55,10 @@
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+#include <netinet/sctp_crc32.h>
+#endif
+
static int
ip6_findroute(struct nhop_object **pnh, const struct sockaddr_in6 *dst,
struct mbuf *m)
@@ -277,6 +282,27 @@
ip6->ip6_hlim -= IPV6_HLIMDEC;
}
+ /*
+ * If TCP/UDP header still needs a valid checksum and interface will not
+ * calculate it for us, do it here.
+ */
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
+ ~nh->nh_ifp->if_hwassist)) {
+ u_short offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
+ if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len)
+ goto drop;
+ in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+ }
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP &
+ ~nh->nh_ifp->if_hwassist)) {
+ uint32_t offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
+ sctp_delayed_cksum(m, offset);
+ m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP;
+ }
+#endif
+
m_clrprotoflags(m); /* Avoid confusing lower layers. */
IP_PROBE(send, NULL, NULL, ip6, nh->nh_ifp, NULL, ip6);
diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c
--- a/sys/netinet6/ip6_forward.c
+++ b/sys/netinet6/ip6_forward.c
@@ -75,6 +75,10 @@
#include <netipsec/ipsec_support.h>
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+#include <netinet/sctp_crc32.h>
+#endif
+
/*
* Forward a packet. If some error occurs return the sender
* an icmp packet. Note we can't always generate a meaningful
@@ -389,6 +393,27 @@
goto bad;
}
+ /*
+ * If TCP/UDP header still needs a valid checksum and interface will not
+ * calculate it for us, do it here.
+ */
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
+ ~nh->nh_ifp->if_hwassist)) {
+ u_short offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
+ if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len)
+ goto bad;
+ in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+ }
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP &
+ ~nh->nh_ifp->if_hwassist)) {
+ uint32_t offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
+ sctp_delayed_cksum(m, offset);
+ m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP;
+ }
+#endif
+
/* Currently LLE layer stores embedded IPv6 addresses */
if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6_addr)) {
in6_set_unicast_scopeid(&dst.sin6_addr, dst.sin6_scope_id);
diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c
--- a/sys/netinet6/sctp6_usrreq.c
+++ b/sys/netinet6/sctp6_usrreq.c
@@ -142,6 +142,13 @@
if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
SCTP_STAT_INCR(sctps_recvhwcrc);
compute_crc = 0;
+ } if (m->m_pkthdr.csum_flags & CSUM_IP6_SCTP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ SCTP_STAT_INCR(sctps_recvzerocrc);
+ compute_crc = 0;
} else {
SCTP_STAT_INCR(sctps_recvswcrc);
compute_crc = 1;
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -429,6 +429,12 @@
uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
m->m_pkthdr.csum_data);
uh_sum ^= 0xffff;
+ } else if (m->m_pkthdr.csum_flags & CSUM_IP6_UDP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ uh_sum = 0;
} else
uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -647,10 +647,10 @@
* Outbound flags that are set by upper protocol layers requesting lower
* layers, or ideally the hardware, to perform these offloading tasks.
* For outbound packets this field and its flags can be directly tested
- * against ifnet if_hwassist. Note that the outbound and the inbound flags do
- * not collide right now but they could be allowed to (as long as the flags are
- * scrubbed appropriately when the direction of an mbuf changes). CSUM_BITS
- * would also have to split into CSUM_BITS_TX and CSUM_BITS_RX.
+ * against ifnet if_hwassist. Note that outbound flags CSUM_IP_UDP,
+ * CSUM_IP_TCP, and CSUM_IP_SCTP can appear on an inbound packet if the mbuf
+ * changed the direction. In such a case the checksum is still incorrect but
+ * TCP, UDP, or SCTP ignores that since the packet has not been on the wire.
*
* CSUM_INNER_<x> is the same as CSUM_<x> but it applies to the inner frame.
* The CSUM_ENCAP_<x> bits identify the outer encapsulation.

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 15, 10:49 AM (9 h, 21 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27646935
Default Alt Text
D51475.id159201.diff (13 KB)

Event Timeline