Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -675,6 +675,12 @@ send: SOCKBUF_LOCK_ASSERT(&so->so_snd); + if (len > 0) { + if (len >= tp->t_maxseg) + tp->t_pmtud_flags |= PLPMTU_MAXSEGSNT; + else + tp->t_pmtud_flags &= ~PLPMTU_MAXSEGSNT; + } /* * Before ESTABLISHED, force sending of initial options * unless TCP set not to do any options. @@ -1219,8 +1225,12 @@ * * NB: Don't set DF on small MTU/MSS to have a safe fallback. */ - if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) + if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) { ip->ip_off |= htons(IP_DF); + tp->t_pmtud_flags |= PLPMTU_PMTUD; + } else { + tp->t_pmtud_flags &= ~PLPMTU_PMTUD; + } if (tp->t_state == TCPS_SYN_SENT) TCP_PROBE5(connect__request, NULL, tp, ip, tp, th); Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -66,6 +66,9 @@ #include #include #include +#ifdef INET6 +#include +#endif #include #ifdef TCPDEBUG #include @@ -127,6 +130,61 @@ &tcp_rexmit_drop_options, 0, "Drop TCP options from 3rd and later retransmitted SYN"); + +VNET_DECLARE(int, tcp_pmtud_blackhole_detect); +#define V_tcp_pmtud_blackhole_detect VNET(tcp_pmtud_blackhole_detect) + +VNET_DEFINE(int, tcp_pmtud_blackhole_detect) = 0; +SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, + CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_pmtud_blackhole_detect), 0, + "Path MTU Discovery Black Hole Detection Enabled"); + +VNET_DECLARE(int, tcp_pmtud_blackhole_activated); +#define V_tcp_pmtud_blackhole_activated \ + VNET(tcp_pmtud_blackhole_activated) + +VNET_DEFINE(int, tcp_pmtud_blackhole_activated) = 0; +SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated, + CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_pmtud_blackhole_activated), 0, + "Path MTU Discovery Black Hole Detection, Activation Count"); + +VNET_DECLARE(int, tcp_pmtud_blackhole_min_activated); +#define V_tcp_pmtud_blackhole_min_activated \ + VNET(tcp_pmtud_blackhole_min_activated) + +VNET_DEFINE(int, tcp_pmtud_blackhole_min_activated) = 0; +SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_min_activated, + CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_pmtud_blackhole_min_activated), 0, + "Path MTU Discovery Black Hole Detection, Min MSS Activation Count"); + +VNET_DECLARE(int, tcp_pmtud_blackhole_failed); +#define V_tcp_pmtud_blackhole_failed VNET(tcp_pmtud_blackhole_failed) + +VNET_DEFINE(int, tcp_pmtud_blackhole_failed) = 0; +SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed, + CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_pmtud_blackhole_failed), 0, + "Path MTU Discovery Black Hole Detection, Failure Count"); + +#ifdef INET +VNET_DECLARE(int, tcp_pmtud_blackhole_mss); +#define V_tcp_pmtud_blackhole_mss VNET(tcp_pmtud_blackhole_mss) + +VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, + CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_pmtud_blackhole_mss), 0, + "Path MTU Discovery Black Hole Detection lowered MSS"); +#endif + +#ifdef INET6 +VNET_DECLARE(int, tcp_v6pmtud_blackhole_mss); +#define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss) + +VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss, + CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0, + "Path MTU Discovery IPv6 Black Hole Detection lowered MSS"); +#endif + #ifdef RSS static int per_cpu_timers = 1; #else @@ -539,6 +597,10 @@ ostate = tp->t_state; #endif +#if defined(INET6) && defined(INET) + int isipv6; +#endif + INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; /* @@ -640,6 +702,111 @@ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, TCPTV_REXMTMAX); + + if (V_tcp_pmtud_blackhole_detect && (tp->t_state == TCPS_ESTABLISHED)) { + int optlen = 0; + if (((tp->t_pmtud_flags & (PLPMTU_PMTUD|PLPMTU_MAXSEGSNT)) == + (PLPMTU_PMTUD|PLPMTU_MAXSEGSNT)) && + (tp->t_rxtshift <= 2)) { + /* + * Enter Path MTU Black-hole Detection mechanism: + * - Disable Path MTU Discovery (IP "DF" bit). + * - Reduce MTU to lower value than what we + * negociated with peer. + */ + /* Record that we may have found a black hole */ + tp->t_pmtud_flags |= PLPMTU_BLACKHOLE; + + /* Keep track of previous MSS */ + optlen = tp->t_maxopd - tp->t_maxseg; + tp->t_pmtud_saved_maxopd = tp->t_maxopd; + +#if defined(INET6) && defined(INET) + isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0; + + /* + * Reduce the MSS to blackhole value or to the default + * in an attempt to retransmit. + */ + if (isipv6) { + if (tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) { + /* use the sysctl tuneable blackhole MSS */ + tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss; + V_tcp_pmtud_blackhole_activated++; + } else { + /* use the default MSS */ + tp->t_maxopd = V_tcp_v6mssdflt; + /* + * Disable Path MTU Discovery when we + * switch to minmss + */ + tp->t_pmtud_flags &= ~PLPMTU_PMTUD; + V_tcp_pmtud_blackhole_min_activated++; + } + } +#elif defined(INET6) + if (tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) { + /* use the sysctl tuneable blackhole MSS */ + tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss; + V_tcp_pmtud_blackhole_activated++; + } else { + /* use the default MSS */ + tp->t_maxopd = V_tcp_v6mssdflt; + /* + * Disable Path MTU Discovery when we switch to + * minmss + */ + tp->t_pmtud_flags &= ~PLPMTU_PMTUD; + V_tcp_pmtud_blackhole_min_activated++; + } +#elif defined(INET) + if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) { + /* use the sysctl tuneable blackhole MSS */ + tp->t_maxopd = V_tcp_pmtud_blackhole_mss; + V_tcp_pmtud_blackhole_activated++; + } else { + /* use the default MSS */ + tp->t_maxopd = V_tcp_mssdflt; + /* + * Disable Path MTU Discovery when we switch to + * minmss + */ + tp->t_pmtud_flags &= ~PLPMTU_PMTUD; + V_tcp_pmtud_blackhole_min_activated++; + } +#endif /* INET6 vs INET*/ + tp->t_maxseg = tp->t_maxopd - optlen; + /* + * Reset the slow-start flight size + * as it may depend on the new MSS + */ + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); + } else { + /* + * If further retransmissions are still unsuccessful + * with a lowered MTU, maybe this isn't a blackhole and + * we restore the previous MSS and blackhole detection + * flags. + */ + if ((tp->t_pmtud_flags & PLPMTU_BLACKHOLE) && + (tp->t_rxtshift > 4)) { + tp->t_pmtud_flags |= PLPMTU_PMTUD; + tp->t_pmtud_flags &= ~PLPMTU_BLACKHOLE; + optlen = tp->t_maxopd - tp->t_maxseg; + tp->t_maxopd = tp->t_pmtud_saved_maxopd; + tp->t_maxseg = tp->t_maxopd - optlen; + V_tcp_pmtud_blackhole_failed++; + /* + * Reset the slow-start flight size as it + * may depend on the new MSS. + */ + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); + } + } + } + /* * Disable RFC1323 and SACK if we haven't got any response to * our third SYN to work-around some broken terminal servers Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -200,6 +200,8 @@ u_int t_keepcnt; /* number of keepalives before close */ u_int t_tsomax; /* tso burst length limit */ + u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */ + u_int t_pmtud_flags; /* PLPMTU flags */ uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */ @@ -275,6 +277,13 @@ #endif /* TCP_SIGNATURE */ /* + * Flags for PLPMTU handling, t_pmtu_flags + */ +#define PLPMTU_BLACKHOLE 0x00000001 /* possible PLPMTUD Black Hole */ +#define PLPMTU_PMTUD 0x00000002 /* allowed to attempt PLPMTUD */ +#define PLPMTU_MAXSEGSNT 0x00000004 /* last segment sent was full seg */ + +/* * Structure to hold TCP options that are only used during segment * processing (in tcp_input), but not held in the tcpcb. * It's basically used to reduce the number of parameters