Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -1615,7 +1615,14 @@ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); - + if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) { + /* + * We don't look at sack's from the + * peer because the MSS is too small which + * can subject us to an attack. + */ + to.to_flags &= ~TOF_SACK; + } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if ((tp->t_flags & TF_SIGNATURE) != 0 && (to.to_flags & TOF_SIGNATURE) == 0) { @@ -3883,6 +3890,17 @@ mss = max(mss, 64); tp->t_maxseg = mss; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } + } void @@ -3934,6 +3952,16 @@ * XXXGL: shouldn't we reserve space for IP/IPv6 options? */ tp->t_maxseg = max(mss, 64); + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } SOCKBUF_LOCK(&so->so_rcv); if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe) Index: sys/netinet/tcp_stacks/bbr.c =================================================================== --- sys/netinet/tcp_stacks/bbr.c +++ sys/netinet/tcp_stacks/bbr.c @@ -5134,6 +5134,16 @@ tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; tp->t_maxseg = tp->t_pmtud_saved_maxseg; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not + * process incoming SACK's since we are + * subject to attack in such a case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed); } } @@ -7556,7 +7566,7 @@ * Sort the SACK blocks so we can update the rack scoreboard with * just one pass. */ - new_sb = sack_filter_blks(&bbr->r_ctl.bbr_sf, sack_blocks, + new_sb = sack_filter_blks(tp, &bbr->r_ctl.bbr_sf, sack_blocks, num_sack_blks, th->th_ack); ctf_log_sack_filter(bbr->rc_tp, new_sb, sack_blocks); BBR_STAT_ADD(bbr_sack_blocks, num_sack_blks); @@ -11323,7 +11333,14 @@ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); - + if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) { + /* + * We don't look at sack's from the + * peer because the MSS is too small which + * can subject us to an attack. + */ + to.to_flags &= ~TOF_SACK; + } /* * If timestamps were negotiated during SYN/ACK and a * segment without a timestamp is received, silently drop @@ -13773,6 +13790,16 @@ if (old_maxseg <= tp->t_maxseg) { /* Huh it did not shrink? */ tp->t_maxseg = old_maxseg - 40; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not + * process incoming SACK's since we are + * subject to attack in such a case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts); } /* Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -8583,6 +8583,16 @@ tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; tp->t_maxseg = tp->t_pmtud_saved_maxseg; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not + * process incoming SACK's since we are + * subject to attack in such a case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed); } } @@ -11197,7 +11207,7 @@ * If we have some sack blocks in the filter * lets prune them out by calling sfb with no blocks. */ - sack_filter_blks(&rack->r_ctl.rack_sf, NULL, 0, th_ack); + sack_filter_blks(tp, &rack->r_ctl.rack_sf, NULL, 0, th_ack); } if (SEQ_GT(th_ack, tp->snd_una)) { /* Clear any app ack remembered settings */ @@ -12052,7 +12062,7 @@ * just one pass. */ o_cnt = num_sack_blks; - num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks, + num_sack_blks = sack_filter_blks(tp, &rack->r_ctl.rack_sf, sack_blocks, num_sack_blks, th->th_ack); ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks); if (sacks_seen != NULL) @@ -17933,7 +17943,14 @@ __func__)); KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", __func__)); - + if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) { + /* + * We don't look at sack's from the + * peer because the MSS is too small which + * can subject us to an attack. + */ + to.to_flags &= ~TOF_SACK; + } if ((tp->t_state >= TCPS_FIN_WAIT_1) && (tp->t_flags & TF_GPUTINPROG)) { /* Index: sys/netinet/tcp_stacks/sack_filter.h =================================================================== --- sys/netinet/tcp_stacks/sack_filter.h +++ sys/netinet/tcp_stacks/sack_filter.h @@ -37,7 +37,7 @@ * will get much benefit beyond 7, in testing * there was a small amount but very very small. */ -#define SACK_FILTER_BLOCKS 7 +#define SACK_FILTER_BLOCKS 15 struct sack_filter { tcp_seq sf_ack; @@ -48,7 +48,7 @@ }; #ifdef _KERNEL void sack_filter_clear(struct sack_filter *sf, tcp_seq seq); -int sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks, +int sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack); void sack_filter_reject(struct sack_filter *sf, struct sackblk *in); static inline uint8_t sack_filter_blks_used(struct sack_filter *sf) Index: sys/netinet/tcp_stacks/sack_filter.c =================================================================== --- sys/netinet/tcp_stacks/sack_filter.c +++ sys/netinet/tcp_stacks/sack_filter.c @@ -143,7 +143,7 @@ * if part of it is on the board. */ static int32_t -is_sack_on_board(struct sack_filter *sf, struct sackblk *b) +is_sack_on_board(struct sack_filter *sf, struct sackblk *b, int32_t segmax, uint32_t snd_max) { int32_t i, cnt; @@ -184,6 +184,14 @@ * board |---| * sack |---| */ + if ((b->end != snd_max) && + ((b->end - b->start) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } goto nxt_blk; } /* Jonathans Rule 3 */ @@ -194,6 +202,14 @@ * board |---| * sack |---| */ + if ((b->end != snd_max) && + ((b->end - b->start) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } goto nxt_blk; } if (SEQ_LEQ(sf->sf_blks[i].start, b->start)) { @@ -207,10 +223,21 @@ * sack |--------------| * * up with this one (we have part of it). + * * 1) Update the board block to the new end * and * 2) Update the start of this block to my end. + * + * We only do this if the new piece is large enough. */ + if ((b->end != snd_max) && + ((b->end - sf->sf_blks[i].end) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } b->start = sf->sf_blks[i].end; sf->sf_blks[i].end = b->end; goto nxt_blk; @@ -224,10 +251,21 @@ * * board |----| * sack |----------| + * * 1) Update the board block to the new start * and * 2) Update the start of this block to my end. + * + * We only do this if the new piece is large enough. */ + if ((b->end != snd_max) && + ((sf->sf_blks[i].start - b->start) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } b->end = sf->sf_blks[i].start; sf->sf_blks[i].start = b->start; goto nxt_blk; @@ -238,14 +276,26 @@ i %= SACK_FILTER_BLOCKS; } /* Did we totally consume it in pieces? */ - if (b->start != b->end) + if (b->start != b->end) { + if ((b->end != snd_max) && + ((b->end - b->start) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } return(0); - else + } else { + /* + * It was all consumed by the board. + */ return(1); + } } static int32_t -sack_filter_old(struct sack_filter *sf, struct sackblk *in, int numblks) +sack_filter_old(struct sack_filter *sf, struct sackblk *in, int numblks, int32_t segmax, uint32_t snd_max) { int32_t num, i; struct sackblk blkboard[TCP_MAX_SACK]; @@ -258,12 +308,17 @@ * won't add to the board. */ for( i = 0, num = 0; isf_used + num) > SACK_FILTER_BLOCKS) { + /* + * We will over-run the board. We don't allow + * that to occur and drop new data beyond the + * board. This is to protect against sack attacks + * that try to out-run our board. + * + * We do keep up to the limit of the board though. + */ + num = SACK_FILTER_BLOCKS - sf->sf_used; + } + if (num == 0) + return(num); /* Now what we are left with is either * completely merged on to the board * from the above steps, or is new @@ -333,7 +400,7 @@ numblks = num; /* Now go through and add to our board as needed */ for(i=(num-1); i>=0; i--) { - if (is_sack_on_board(sf, &blkboard[i])) { + if (is_sack_on_board(sf, &blkboard[i], segmax, snd_max)) { continue; } /* Add this guy its not listed */ @@ -345,6 +412,7 @@ } #ifndef _KERNEL if (sack_blk_used(sf, sf->sf_cur)) { + /* TSNH with the new changes above */ over_written++; if (sf->sf_used < SACK_FILTER_BLOCKS) empty_avail++; @@ -497,10 +565,11 @@ static #endif int -sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks, +sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack) { int32_t i, ret; + int32_t segmax; if (numblks > TCP_MAX_SACK) { #ifdef _KERNEL @@ -518,6 +587,7 @@ if (sf->sf_used > 1) sack_board_collapse(sf); #endif + segmax = (tp->t_maxseg - 40); if ((sf->sf_used == 0) && numblks) { /* * We are brand new add the blocks in @@ -528,6 +598,14 @@ sf->sf_ack = th_ack; for(i=(numblks-1), sf->sf_cur=0; i >= 0; i--) { + if ((in[i].end != tp->snd_max) && + ((in[i].end - in[i].start) < segmax)) { + /* + * We do not accept blocks less than a MSS minus all + * possible options space that is not at max_seg. + */ + continue; + } memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk)); sf->sf_bits = sack_blk_set(sf, sf->sf_cur); sf->sf_cur++; @@ -549,9 +627,9 @@ } if (numblks) { if (SEQ_GEQ(th_ack, sf->sf_ack)) { - ret = sack_filter_new(sf, in, numblks, th_ack); + ret = sack_filter_new(sf, in, numblks, th_ack, segmax, tp->snd_max); } else { - ret = sack_filter_old(sf, in, numblks); + ret = sack_filter_old(sf, in, numblks, segmax, tp->snd_max); } } else ret = 0; Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -3320,8 +3320,19 @@ so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); /* If the mss is larger than the socket buffer, decrease the mss. */ - if (so->so_snd.sb_hiwat < tp->t_maxseg) + if (so->so_snd.sb_hiwat < tp->t_maxseg) { tp->t_maxseg = so->so_snd.sb_hiwat; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } + } SOCKBUF_UNLOCK(&so->so_snd); TCPSTAT_INC(tcps_mturesent); @@ -3454,8 +3465,19 @@ opt = inp->in6p_outputopts; if (opt != NULL && opt->ip6po_minmtu == IP6PO_MINMTU_ALL && - tp->t_maxseg > TCP6_MSS) + tp->t_maxseg > TCP6_MSS) { tp->t_maxseg = TCP6_MSS; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } + } } } #endif /* INET6 */ Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -756,6 +756,16 @@ tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; tp->t_maxseg = tp->t_pmtud_saved_maxseg; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not + * process incoming SACK's since we are + * subject to attack in such a case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } TCPSTAT_INC(tcps_pmtud_blackhole_failed); /* * Reset the slow-start flight size as it Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -2203,9 +2203,19 @@ INP_WLOCK_RECHECK(inp); if (optval > 0 && optval <= tp->t_maxseg && - optval + 40 >= V_tcp_minmss) + optval + 40 >= V_tcp_minmss) { tp->t_maxseg = optval; - else + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } + } else error = EINVAL; goto unlock_and_done; Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -846,6 +846,7 @@ #define TF2_MBUF_QUEUE_READY 0x00020000 /* Inputs can be queued */ #define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */ #define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */ +#define TF2_PROC_SACK_PROHIBIT 0x00100000 /* Due to small MSS size do not process sack's */ /* * Structure to hold TCP options that are only used during segment