diff --git a/sys/netinet/tcp_offload.h b/sys/netinet/tcp_offload.h --- a/sys/netinet/tcp_offload.h +++ b/sys/netinet/tcp_offload.h @@ -36,6 +36,8 @@ #error "no user-serviceable parts inside" #endif +#include + extern int registered_toedevs; int tcp_offload_connect(struct socket *, struct sockaddr *); @@ -48,5 +50,6 @@ void tcp_offload_tcp_info(struct tcpcb *, struct tcp_info *); int tcp_offload_alloc_tls_session(struct tcpcb *, struct ktls_session *, int); void tcp_offload_detach(struct tcpcb *); +void tcp_offload_pmtu_update(struct tcpcb *, tcp_seq, int); #endif diff --git a/sys/netinet/tcp_offload.c b/sys/netinet/tcp_offload.c --- a/sys/netinet/tcp_offload.c +++ b/sys/netinet/tcp_offload.c @@ -219,3 +219,14 @@ tod->tod_pcb_detach(tod, tp); } + +void +tcp_offload_pmtu_update(struct tcpcb *tp, tcp_seq seq, int mtu) +{ + struct toedev *tod = tp->tod; + + KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); + INP_WLOCK_ASSERT(tp->t_inpcb); + + tod->tod_pmtu_update(tod, tp, seq, mtu); +} diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2791,6 +2791,21 @@ #endif /* INET6 */ #ifdef INET +/* Path MTU to try next when a fragmentation-needed message is received. */ +static inline int +tcp_next_pmtu(const struct icmp *icp, const struct ip *ip) +{ + int mtu = ntohs(icp->icmp_nextmtu); + + /* If no alternative MTU was proposed, try the next smaller one. */ + if (!mtu) + mtu = ip_next_mtu(ntohs(ip->ip_len), 1); + if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr)) + mtu = V_tcp_minmss + sizeof(struct tcpiphdr); + + return (mtu); +} + static void tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port) { @@ -2846,6 +2861,17 @@ !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { tp = intotcpcb(inp); +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) { + /* + * MTU discovery for offloaded connections. Let + * the TOE driver verify seq# and process it. + */ + mtu = tcp_next_pmtu(icp, ip); + tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu); + goto out; + } +#endif if (tp->t_port != port) { goto out; } @@ -2853,24 +2879,11 @@ SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* - * MTU discovery: - * If we got a needfrag set the MTU - * in the route to the suggested new - * value (if given) and then notify. + * MTU discovery: we got a needfrag and + * will potentially try a lower MTU. */ - mtu = ntohs(icp->icmp_nextmtu); - /* - * If no alternative MTU was - * proposed, try the next smaller - * one. - */ - if (!mtu) - mtu = ip_next_mtu( - ntohs(ip->ip_len), 1); - if (mtu < V_tcp_minmss + - sizeof(struct tcpiphdr)) - mtu = V_tcp_minmss + - sizeof(struct tcpiphdr); + mtu = tcp_next_pmtu(icp, ip); + /* * Only process the offered MTU if it * is smaller than the current one. @@ -2948,6 +2961,20 @@ #endif /* INET */ #ifdef INET6 +static inline int +tcp6_next_pmtu(const struct icmp6_hdr *icmp6) +{ + int mtu = ntohl(icmp6->icmp6_mtu); + + /* + * If no alternative MTU was proposed, or the proposed MTU was too + * small, set to the min. + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU - 8; /* XXXNP: what is the adjustment for? */ + return (mtu); +} + static void tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port) { @@ -3039,6 +3066,14 @@ !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { tp = intotcpcb(inp); +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE && cmd == PRC_MSGSIZE) { + /* MTU discovery for offloaded connections. */ + mtu = tcp6_next_pmtu(icmp6); + tcp_offload_pmtu_update(tp, icmp_tcp_seq, mtu); + goto out; + } +#endif if (tp->t_port != port) { goto out; } @@ -3051,15 +3086,8 @@ * in the route to the suggested new * value (if given) and then notify. */ - mtu = ntohl(icmp6->icmp6_mtu); - /* - * If no alternative MTU was - * proposed, or the proposed - * MTU was too small, set to - * the min. - */ - if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU - 8; + mtu = tcp6_next_pmtu(icmp6); + bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; diff --git a/sys/netinet/toecore.h b/sys/netinet/toecore.h --- a/sys/netinet/toecore.h +++ b/sys/netinet/toecore.h @@ -35,6 +35,7 @@ #error "no user-serviceable parts inside" #endif +#include #include struct tcpopt; @@ -114,6 +115,9 @@ /* Create a TLS session */ int (*tod_alloc_tls_session)(struct toedev *, struct tcpcb *, struct ktls_session *, int); + + /* ICMP fragmentation-needed received, adjust PMTU. */ + void (*tod_pmtu_update)(struct toedev *, struct tcpcb *, tcp_seq, int); }; typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *); diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c --- a/sys/netinet/toecore.c +++ b/sys/netinet/toecore.c @@ -199,6 +199,14 @@ return (EINVAL); } +static void +toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused, + tcp_seq seq __unused, int mtu __unused) +{ + + return; +} + /* * Inform one or more TOE devices about a listening socket. */ @@ -290,6 +298,7 @@ tod->tod_ctloutput = toedev_ctloutput; tod->tod_tcp_info = toedev_tcp_info; tod->tod_alloc_tls_session = toedev_alloc_tls_session; + tod->tod_pmtu_update = toedev_pmtu_update; } /*