Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -675,6 +675,12 @@ send: SOCKBUF_LOCK_ASSERT(&so->so_snd); + if (len > 0) { + if (len >= tp->t_maxseg) + tp->t_pmtud_flags |= PLPMTU_MAXSEGSNT; + else + tp->t_pmtud_flags &= ~PLPMTU_MAXSEGSNT; + } /* * Before ESTABLISHED, force sending of initial options * unless TCP set not to do any options. @@ -1219,8 +1225,12 @@ * * NB: Don't set DF on small MTU/MSS to have a safe fallback. */ - if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) + if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) { ip->ip_off |= htons(IP_DF); + tp->t_pmtud_flags |= PLPMTU_PMTUD; + } else { + tp->t_pmtud_flags &= ~PLPMTU_PMTUD; + } if (tp->t_state == TCPS_SYN_SENT) TCP_PROBE5(connect__request, NULL, tp, ip, tp, th); Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -66,6 +66,9 @@ #include #include #include +#ifdef INET6 +#include +#endif #include #ifdef TCPDEBUG #include @@ -127,6 +130,21 @@ &tcp_rexmit_drop_options, 0, "Drop TCP options from 3rd and later retransmitted SYN"); +int tcp_pmtud_black_hole_detect = 0 ; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, + CTLTYPE_INT|CTLFLAG_RW, &tcp_pmtud_black_hole_detect, 0, + "Path MTU Discovery Black Hole Detection"); + +int tcp_pmtud_black_hole_mss = 1200 ; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, + CTLTYPE_INT|CTLFLAG_RW, &tcp_pmtud_black_hole_mss, 0, + "Path MTU Discovery Black Hole Detection lowered MSS"); + +int tcp_v6pmtud_black_hole_mss = 1200 ; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss, + CTLTYPE_INT|CTLFLAG_RW, &tcp_v6pmtud_black_hole_mss, 0, + "Path MTU Discovery IPv6 Black Hole Detection lowered MSS"); + #ifdef RSS static int per_cpu_timers = 1; #else @@ -539,6 +557,9 @@ ostate = tp->t_state; #endif +#ifdef INET6 + int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0; +#endif /* INET6 */ INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; /* @@ -633,6 +654,7 @@ tp->t_flags |= TF_PREVVALID; } else tp->t_flags &= ~TF_PREVVALID; + TCPSTAT_INC(tcps_rexmttimeo); if (tp->t_state == TCPS_SYN_SENT) rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; @@ -640,6 +662,72 @@ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, TCPTV_REXMTMAX); + + if (tcp_pmtud_black_hole_detect && (tp->t_state == TCPS_ESTABLISHED)) { + int optlen = 0; + if (((tp->t_pmtud_flags & (PLPMTU_PMTUD|PLPMTU_MAXSEGSNT)) == + (PLPMTU_PMTUD|PLPMTU_MAXSEGSNT)) && + (tp->t_rxtshift <= 2)) { + /* + * Enter Path MTU Black-hole Detection mechanism: + * - Disable Path MTU Discovery (IP "DF" bit). + * - Reduce MTU to lower value than what we + * negociated with peer. + */ + /* Disable Path MTU Discovery for now */ + tp->t_pmtud_flags &= ~PLPMTU_PMTUD; + /* Record that we may have found a black hole */ + tp->t_pmtud_flags |= PLPMTU_BLACKHOLE; + optlen = tp->t_maxopd - tp->t_maxseg; + /* Keep track of previous MSS */ + tp->t_pmtud_saved_maxopd = tp->t_maxopd; + /* Reduce the MSS to intermediary value */ + if (tp->t_maxopd > tcp_pmtud_black_hole_mss) { + /* use the sysctl tuneable blackhole MSS */ + tp->t_maxopd = +#ifdef INET6 + isipv6 ? tcp_v6pmtud_black_hole_mss : +#endif /* INET6 */ + tcp_pmtud_black_hole_mss; + } else { + /* use the default MSS */ + tp->t_maxopd = +#ifdef INET6 + isipv6 ? V_tcp_v6mssdflt : +#endif /* INET6 */ + tcp_mssdflt; + } + tp->t_maxseg = tp->t_maxopd - optlen; + /* + * Reset the slow-start flight size + * as it may depend on the new MSS + */ + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); + } + /* + * If further retransmissions are still unsuccessful with a + * lowered MTU, maybe this isn't a Black Hole and we restore + * the previous MSS and blackhole detection flags. + */ + else { + if ((tp->t_pmtud_flags & PLPMTU_BLACKHOLE) && + (tp->t_rxtshift > 4)) { + tp->t_pmtud_flags |= PLPMTU_PMTUD; + tp->t_pmtud_flags &= ~PLPMTU_BLACKHOLE; + optlen = tp->t_maxopd - tp->t_maxseg; + tp->t_maxopd = tp->t_pmtud_saved_maxopd; + tp->t_maxseg = tp->t_maxopd - optlen; + /* + * Reset the slow-start flight size as it + * may depend on the new MSS + */ + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); + } + } + } + /* * Disable RFC1323 and SACK if we haven't got any response to * our third SYN to work-around some broken terminal servers Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -200,8 +200,10 @@ u_int t_keepcnt; /* number of keepalives before close */ u_int t_tsomax; /* tso burst length limit */ + u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */ + u_int t_pmtud_flags; /* PLPMTU flags */ - uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ + uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */ void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */ uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */ }; @@ -275,6 +277,13 @@ #endif /* TCP_SIGNATURE */ /* + * Flags for PLPMTU handling, t_pmtu_flags + */ +#define PLPMTU_BLACKHOLE 0x00000001 /* possible PLPMTUD Black Hole */ +#define PLPMTU_PMTUD 0x00000002 /* allowed to attempt PLPMTUD */ +#define PLPMTU_MAXSEGSNT 0x00000004 /* last segment sent was full seg */ + +/* * Structure to hold TCP options that are only used during segment * processing (in tcp_input), but not held in the tcpcb. * It's basically used to reduce the number of parameters