Index: sys/netinet/tcp_offload.h =================================================================== --- sys/netinet/tcp_offload.h +++ sys/netinet/tcp_offload.h @@ -36,6 +36,8 @@ #error "no user-serviceable parts inside" #endif +#include + extern int registered_toedevs; int tcp_offload_connect(struct socket *, struct sockaddr *); @@ -48,5 +50,6 @@ void tcp_offload_tcp_info(struct tcpcb *, struct tcp_info *); int tcp_offload_alloc_tls_session(struct tcpcb *, struct ktls_session *, int); void tcp_offload_detach(struct tcpcb *); +void tcp_offload_pmtu_update(struct tcpcb *, tcp_seq, int); #endif Index: sys/netinet/tcp_offload.c =================================================================== --- sys/netinet/tcp_offload.c +++ sys/netinet/tcp_offload.c @@ -219,3 +219,14 @@ tod->tod_pcb_detach(tod, tp); } + +void +tcp_offload_pmtu_update(struct tcpcb *tp, tcp_seq seq, int mtu) +{ + struct toedev *tod = tp->tod; + + KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); + INP_WLOCK_ASSERT(tp->t_inpcb); + + tod->tod_pmtu_update(tod, tp, seq, mtu); +} Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -2460,6 +2460,21 @@ #endif /* INET6 */ #ifdef INET +/* Path MTU to try next when a fragmentation-needed message is received. */ +static inline int +tcp_next_pmtu(const struct icmp *icp, const struct ip *ip) +{ + int mtu = ntohs(icp->icmp_nextmtu); + + /* If no alternative MTU was proposed, try the next smaller one. */ + if (!mtu) + mtu = ip_next_mtu(ntohs(ip->ip_len), 1); + if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr)) + mtu = V_tcp_minmss + sizeof(struct tcpiphdr); + + return (mtu); +} + void tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) { @@ -2515,28 +2530,26 @@ !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { tp = intotcpcb(inp); +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) { + /* + * MTU discovery for offloaded connections. Let + * the TOE driver verify seq# and process it. + */ + mtu = tcp_next_pmtu(icp, ip); + tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu); + goto out; + } +#endif if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) && SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* - * MTU discovery: - * If we got a needfrag set the MTU - * in the route to the suggested new - * value (if given) and then notify. + * MTU discovery: we got a needfrag and + * will potentially try a lower MTU. */ - mtu = ntohs(icp->icmp_nextmtu); - /* - * If no alternative MTU was - * proposed, try the next smaller - * one. - */ - if (!mtu) - mtu = ip_next_mtu( - ntohs(ip->ip_len), 1); - if (mtu < V_tcp_minmss + - sizeof(struct tcpiphdr)) - mtu = V_tcp_minmss + - sizeof(struct tcpiphdr); + mtu = tcp_next_pmtu(icp, ip); + /* * Only process the offered MTU if it * is smaller than the current one. @@ -2570,6 +2583,20 @@ #endif /* INET */ #ifdef INET6 +static inline int +tcp6_next_pmtu(const struct icmp6_hdr *icmp6) +{ + int mtu = ntohl(icmp6->icmp6_mtu); + + /* + * If no alternative MTU was proposed, or the proposed MTU was too + * small, set to the min. + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU - 8; /* XXXNP: what is the adjustment for? */ + return (mtu); +} + void tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { @@ -2661,6 +2688,14 @@ !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { tp = intotcpcb(inp); +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) { + /* MTU discovery for offloaded connections. */ + mtu = tcp6_next_pmtu(icmp6); + tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu); + goto out; + } +#endif if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) && SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) { if (cmd == PRC_MSGSIZE) { @@ -2670,15 +2705,8 @@ * in the route to the suggested new * value (if given) and then notify. */ - mtu = ntohl(icmp6->icmp6_mtu); - /* - * If no alternative MTU was - * proposed, or the proposed - * MTU was too small, set to - * the min. - */ - if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU - 8; + mtu = tcp6_next_pmtu(icmp6); + bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; Index: sys/netinet/toecore.h =================================================================== --- sys/netinet/toecore.h +++ sys/netinet/toecore.h @@ -35,6 +35,7 @@ #error "no user-serviceable parts inside" #endif +#include #include struct tcpopt; @@ -114,6 +115,9 @@ /* Create a TLS session */ int (*tod_alloc_tls_session)(struct toedev *, struct tcpcb *, struct ktls_session *, int); + + /* ICMP fragmentation-needed received, adjust PMTU. */ + void (*tod_pmtu_update)(struct toedev *, struct tcpcb *, tcp_seq, int); }; typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *); Index: sys/netinet/toecore.c =================================================================== --- sys/netinet/toecore.c +++ sys/netinet/toecore.c @@ -199,6 +199,14 @@ return (EINVAL); } +static void +toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused, + tcp_seq seq __unused, int mtu __unused) +{ + + return; +} + /* * Inform one or more TOE devices about a listening socket. */ @@ -290,6 +298,7 @@ tod->tod_ctloutput = toedev_ctloutput; tod->tod_tcp_info = toedev_tcp_info; tod->tod_alloc_tls_session = toedev_alloc_tls_session; + tod->tod_pmtu_update = toedev_pmtu_update; } /*