Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -1615,7 +1615,14 @@ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); - + if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) { + /* + * We don't look at sack's from the + * peer because the MSS is too small which + * can subject us to an attack. + */ + to.to_flags &= ~TOF_SACK; + } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if ((tp->t_flags & TF_SIGNATURE) != 0 && (to.to_flags & TOF_SIGNATURE) == 0) { @@ -3883,6 +3890,17 @@ mss = max(mss, 64); tp->t_maxseg = mss; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } + } void @@ -3934,6 +3952,16 @@ * XXXGL: shouldn't we reserve space for IP/IPv6 options? */ tp->t_maxseg = max(mss, 64); + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } SOCKBUF_LOCK(&so->so_rcv); if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe) Index: sys/netinet/tcp_stacks/bbr.c =================================================================== --- sys/netinet/tcp_stacks/bbr.c +++ sys/netinet/tcp_stacks/bbr.c @@ -5134,6 +5134,16 @@ tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; tp->t_maxseg = tp->t_pmtud_saved_maxseg; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not + * process incoming SACK's since we are + * subject to attack in such a case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed); } } @@ -7556,7 +7566,7 @@ * Sort the SACK blocks so we can update the rack scoreboard with * just one pass. */ - new_sb = sack_filter_blks(&bbr->r_ctl.bbr_sf, sack_blocks, + new_sb = sack_filter_blks(tp, &bbr->r_ctl.bbr_sf, sack_blocks, num_sack_blks, th->th_ack); ctf_log_sack_filter(bbr->rc_tp, new_sb, sack_blocks); BBR_STAT_ADD(bbr_sack_blocks, num_sack_blks); @@ -11323,7 +11333,14 @@ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); - + if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) { + /* + * We don't look at sack's from the + * peer because the MSS is too small which + * can subject us to an attack. + */ + to.to_flags &= ~TOF_SACK; + } /* * If timestamps were negotiated during SYN/ACK and a * segment without a timestamp is received, silently drop @@ -13773,6 +13790,16 @@ if (old_maxseg <= tp->t_maxseg) { /* Huh it did not shrink? */ tp->t_maxseg = old_maxseg - 40; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not + * process incoming SACK's since we are + * subject to attack in such a case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts); } /* Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -8583,6 +8583,16 @@ tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; tp->t_maxseg = tp->t_pmtud_saved_maxseg; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not + * process incoming SACK's since we are + * subject to attack in such a case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed); } } @@ -11197,7 +11207,7 @@ * If we have some sack blocks in the filter * lets prune them out by calling sfb with no blocks. */ - sack_filter_blks(&rack->r_ctl.rack_sf, NULL, 0, th_ack); + sack_filter_blks(tp, &rack->r_ctl.rack_sf, NULL, 0, th_ack); } if (SEQ_GT(th_ack, tp->snd_una)) { /* Clear any app ack remembered settings */ @@ -12052,7 +12062,7 @@ * just one pass. */ o_cnt = num_sack_blks; - num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks, + num_sack_blks = sack_filter_blks(tp, &rack->r_ctl.rack_sf, sack_blocks, num_sack_blks, th->th_ack); ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks); if (sacks_seen != NULL) @@ -17933,7 +17943,14 @@ __func__)); KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", __func__)); - + if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) { + /* + * We don't look at sack's from the + * peer because the MSS is too small which + * can subject us to an attack. + */ + to.to_flags &= ~TOF_SACK; + } if ((tp->t_state >= TCPS_FIN_WAIT_1) && (tp->t_flags & TF_GPUTINPROG)) { /* Index: sys/netinet/tcp_stacks/sack_filter.h =================================================================== --- sys/netinet/tcp_stacks/sack_filter.h +++ sys/netinet/tcp_stacks/sack_filter.h @@ -25,19 +25,84 @@ * SUCH DAMAGE. */ -/* - * Seven entry's is carefully choosen to - * fit in one cache line. We can easily - * change this to 15 (but it gets very - * little extra filtering). To change it - * to be larger than 15 would require either - * sf_bits becoming a uint32_t and then you - * could go to 31.. or change it to a full - * bitstring.. It is really doubtful you - * will get much benefit beyond 7, in testing - * there was a small amount but very very small. +/** + * + * The Sack filter is designed to do two functions, first it trys to reduce + * the processing of sacks. Consider that often times you get something like + * + * ack 1 (sack 100:200) + * ack 1 (sack 100:300) + * ack 1 (sack(100:400) + * + * You really want to process the 100:200 and then on the next sack process + * only 200:300 (the new data) and then finally on the third 300:400. The filter + * removes from your processing routines the already processed sack information so + * that after the filter completes you only have "new" sacks that you have not + * processed. This saves computation time so you do not need to worry about + * previously processed sack information. + * + * The second thing that the sack filter does is help protect against malicious + * attackers that are trying to attack any linked lists (or other data structures) + * that are used in sack processing. Consider an attacker sending in sacks for + * every other byte of data outstanding. This could in theory drastically split + * up any scoreboard you are maintaining and make you search through a very large + * linked list (or other structure) eating up CPU. If you split far enough and + * fracture your data structure enough you could potentially be crippled by a malicious + * peer. How the filter works here is it filters out sacks that are less than an MSS. + * We do this because generally a packet (aka MSS) should be kept whole. The only place + * we allow a smaller SACK is when the SACK touches the end of our socket buffer. This allows + * TLP to still work properly and yet protects us from splitting. The filter also only allows + * a set number of splits (defined in SACK_FILTER_BLOCKS). If more than that many sacks locations + * are being sent we discard additional ones until the earlier holes are filled up. The maximum + * the current filter can be is 15, which we have moved to since we want to be as generous as + * possible with allowing for loss. However, in previous testing of the filter it was found + * that there was very little benefit from moving from 7 to 15 sack points. Though at + * that previous set of tests, we would just discard earlier information in the filter. Now + * that we do not do that i.e. discard information and instead drop sack data we have raised + * the value to the max i.e. 15. If you want to expand beyond 15 one would have to either increase + * the size of the sf_bits to a uint32_t which could then get you a maximum of 31 splits or + * move to a true bitstring. If this is done however it further increases your risk to + * sack attacks, the bigger the number of splits (filter blocks) that are allowed + * the larger your processing arrays will grow as well as the filter. + * + * Note that this protection does not prevent an attacker from asking for a 20 byte + * MSS, that protection must be done elsewhere during the negotiation of the connection + * and is done now by just ignoring sack's from connections with too small of MSS which + * prevents sack from working and thus makes the connection less efficient but protects + * the system from harm. + * + * We may actually want to consider dropping the size of the array back to 7 to further + * protect the system which would be a more cautious approach. + * + * TCP Developer information: + * + * To use the sack filter its actually pretty simple. All you do is the normal sorting + * and sanity checks of your sacks but then after that you call out to sack_filter_blks() + * passing in the tcpcb, the sack-filter you are using (memory you have allocated) the + * pointer to the sackblk array and how many sorted valid blocks there are as well + * as what the new th_ack point is. The filter will return to you the number of + * blocks left after filtering. It will reshape the blocks based on the previous + * sacks you have received and processed. If sack_filter_blks() returns 0 then no + * new sack data is present to be processed. + * + * Whenever you reach the point of snd_una == snd_max, you should call sack_filter_clear with + * the snd_una point. You also need to call this if you invalidate your sack array for any + * reason (such as RTO's or MTU changes or some other thing that makes you think all + * data is now un-acknowledged). You can also pass in sack_filter_blks(tp, sf, NULL, 0, th_ack) to + * advance the cum-ack point. You can use sack_filter_blks_used(sf) to determine if you have filter blocks as + * well. So putting these two together, anytime the cum-ack moves forward you probably want to + * do: + * if (sack_filter_blks_used(sf)) + * sack_filter_blks(tp, sf, NULL, 0, th_ack); + * + * If for some reason you have ran the sack-filter and something goes wrong (you can't allocate space + * for example to split your sack-array. You can "undo" the data within the sack filter by calling + * sack_filter_rject(sf, in) passing in the list of blocks to be "removed" from the sack-filter. + * You can see an example of this use in bbr.c though rack.c has never found it needed. + * */ -#define SACK_FILTER_BLOCKS 7 + +#define SACK_FILTER_BLOCKS 15 struct sack_filter { tcp_seq sf_ack; @@ -48,7 +113,7 @@ }; #ifdef _KERNEL void sack_filter_clear(struct sack_filter *sf, tcp_seq seq); -int sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks, +int sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack); void sack_filter_reject(struct sack_filter *sf, struct sackblk *in); static inline uint8_t sack_filter_blks_used(struct sack_filter *sf) Index: sys/netinet/tcp_stacks/sack_filter.c =================================================================== --- sys/netinet/tcp_stacks/sack_filter.c +++ sys/netinet/tcp_stacks/sack_filter.c @@ -35,7 +35,13 @@ #include #endif #include +#ifdef _KERNEL #include +#else +struct inpcb { + uint32_t stuff; +}; +#endif #include #include #include @@ -86,9 +92,9 @@ int highest_used=0; int over_written=0; int empty_avail=0; -int no_collapse = 0; FILE *out = NULL; FILE *in = NULL; + #endif #define sack_blk_used(sf, i) ((1 << i) & sf->sf_bits) @@ -96,6 +102,13 @@ #define sack_blk_clr(sf, i) (~(1 << i) & sf->sf_bits) #ifndef _KERNEL + +static u_int tcp_fixed_maxseg(const struct tcpcb *tp) +{ + /* Lets pretend their are timestamps on for user space */ + return (tp->t_maxseg - 12); +} + static #endif void @@ -118,7 +131,7 @@ /* start with the oldest */ for (i = 0; i < SACK_FILTER_BLOCKS; i++) { if (sack_blk_used(sf, i)) { - if (SEQ_GT(th_ack, sf->sf_blks[i].end)) { + if (SEQ_GEQ(th_ack, sf->sf_blks[i].end)) { /* This block is consumed */ sf->sf_bits = sack_blk_clr(sf, i); sf->sf_used--; @@ -143,23 +156,27 @@ * if part of it is on the board. */ static int32_t -is_sack_on_board(struct sack_filter *sf, struct sackblk *b) +is_sack_on_board(struct sack_filter *sf, struct sackblk *b, int32_t segmax, uint32_t snd_max) { int32_t i, cnt; + int span_cnt = 0; + uint32_t span_start, span_end; + if (SEQ_LT(b->start, sf->sf_ack)) { + /* Behind cum-ack update */ + b->start = sf->sf_ack; + } + if (SEQ_LT(b->end, sf->sf_ack)) { + /* End back behind too */ + b->end = sf->sf_ack; + } + if (b->start == b->end) { + return(1); + } + span_start = b->start; + span_end = b->end; for (i = sf->sf_cur, cnt=0; cnt < SACK_FILTER_BLOCKS; cnt++) { if (sack_blk_used(sf, i)) { - if (SEQ_LT(b->start, sf->sf_ack)) { - /* Behind cum-ack update */ - b->start = sf->sf_ack; - } - if (SEQ_LT(b->end, sf->sf_ack)) { - /* End back behind too */ - b->end = sf->sf_ack; - } - if (b->start == b->end) { - return(1); - } /* Jonathans Rule 1 */ if (SEQ_LEQ(sf->sf_blks[i].start, b->start) && SEQ_GEQ(sf->sf_blks[i].end, b->end)) { @@ -184,6 +201,15 @@ * board |---| * sack |---| */ + if ((b->end != snd_max) && + (span_cnt < 2) && + ((b->end - b->start) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } goto nxt_blk; } /* Jonathans Rule 3 */ @@ -194,6 +220,16 @@ * board |---| * sack |---| */ + if ((b->end != snd_max) && + (sf->sf_blks[i].end != snd_max) && + (span_cnt < 2) && + ((b->end - b->start) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } goto nxt_blk; } if (SEQ_LEQ(sf->sf_blks[i].start, b->start)) { @@ -207,12 +243,36 @@ * sack |--------------| * * up with this one (we have part of it). + * * 1) Update the board block to the new end * and * 2) Update the start of this block to my end. + * + * We only do this if the new piece is large enough. */ + if (((b->end != snd_max) || (sf->sf_blks[i].end == snd_max)) && + (span_cnt == 0) && + ((b->end - sf->sf_blks[i].end) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } b->start = sf->sf_blks[i].end; sf->sf_blks[i].end = b->end; + if (span_cnt == 0) { + span_start = sf->sf_blks[i].start; + span_end = sf->sf_blks[i].end; + } else { + if (SEQ_LT(span_start, sf->sf_blks[i].start)) { + span_start = sf->sf_blks[i].start; + } + if (SEQ_GT(span_end, sf->sf_blks[i].end)) { + span_end = sf->sf_blks[i].end; + } + } + span_cnt++; goto nxt_blk; } if (SEQ_GEQ(sf->sf_blks[i].end, b->end)) { @@ -224,12 +284,36 @@ * * board |----| * sack |----------| + * * 1) Update the board block to the new start * and * 2) Update the start of this block to my end. + * + * We only do this if the new piece is large enough. */ + if (((b->end != snd_max) || (sf->sf_blks[i].end == snd_max)) && + (span_cnt == 0) && + ((sf->sf_blks[i].start - b->start) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); + } b->end = sf->sf_blks[i].start; sf->sf_blks[i].start = b->start; + if (span_cnt == 0) { + span_start = sf->sf_blks[i].start; + span_end = sf->sf_blks[i].end; + } else { + if (SEQ_LT(span_start, sf->sf_blks[i].start)) { + span_start = sf->sf_blks[i].start; + } + if (SEQ_GT(span_end, sf->sf_blks[i].end)) { + span_end = sf->sf_blks[i].end; + } + } + span_cnt++; goto nxt_blk; } } @@ -238,46 +322,23 @@ i %= SACK_FILTER_BLOCKS; } /* Did we totally consume it in pieces? */ - if (b->start != b->end) - return(0); - else - return(1); -} - -static int32_t -sack_filter_old(struct sack_filter *sf, struct sackblk *in, int numblks) -{ - int32_t num, i; - struct sackblk blkboard[TCP_MAX_SACK]; - /* - * An old sack has arrived. It may contain data - * we do not have. We might not have it since - * we could have had a lost ack we might have the - * entire thing on our current board. We want to prune - * off anything we have. With this function though we - * won't add to the board. - */ - for( i = 0, num = 0; istart != b->end) { + if ((b->end != snd_max) && + ((b->end - b->start) < segmax) && + ((span_end - span_start) < segmax)) { + /* + * Too small for us to mess with so we + * pretend its on the board. + */ + return (1); } - /* Did not find it (or found only - * a piece of it). Copy it to - * our outgoing board. + return(0); + } else { + /* + * It was all consumed by the board. */ - memcpy(&blkboard[num], &in[i], sizeof(struct sackblk)); -#ifndef _KERNEL - cnt_used_oldsack++; -#endif - num++; - } - if (num) { - memcpy(in, blkboard, (num * sizeof(struct sackblk))); + return(1); } - return (num); } /* @@ -303,54 +364,53 @@ } static int32_t -sack_filter_new(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack) +sack_filter_run(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack, int32_t segmax, uint32_t snd_max) { struct sackblk blkboard[TCP_MAX_SACK]; - int32_t num, i; + int32_t num, i, room, at; /* * First lets trim the old and possibly * throw any away we have. */ for(i=0, num=0; i=0; i--) { - if (is_sack_on_board(sf, &blkboard[i])) { - continue; + room = SACK_FILTER_BLOCKS - sf->sf_used; + if (room < 1) + return (0); + /* + * Now lets walk through our filtered blkboard (the previous loop + * trimmed off anything on the board we already have so anything + * in blkboard is unique and not seen before) if there is room we copy + * it back out and place a new entry on our board. + */ + for(i=0, at=0; isf_cur++; sf->sf_cur %= SACK_FILTER_BLOCKS; if ((sack_blk_used(sf, sf->sf_cur)) && (sf->sf_used < SACK_FILTER_BLOCKS)) { sack_move_to_empty(sf, sf->sf_cur); } -#ifndef _KERNEL - if (sack_blk_used(sf, sf->sf_cur)) { - over_written++; - if (sf->sf_used < SACK_FILTER_BLOCKS) - empty_avail++; - } -#endif - memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk)); + memcpy(&sf->sf_blks[sf->sf_cur], &blkboard[i], sizeof(struct sackblk)); if (sack_blk_used(sf, sf->sf_cur) == 0) { sf->sf_used++; #ifndef _KERNEL @@ -360,7 +420,26 @@ sf->sf_bits = sack_blk_set(sf, sf->sf_cur); } } - return(numblks); + return(at); +} + +/* + * Collapse entry src into entry into + * and free up the src entry afterwards. + */ +static void +sack_collapse(struct sack_filter *sf, int32_t src, int32_t into) +{ + if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) { + /* src has a lower starting point */ + sf->sf_blks[into].start = sf->sf_blks[src].start; + } + if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) { + /* src has a higher ending point */ + sf->sf_blks[into].end = sf->sf_blks[src].end; + } + sf->sf_bits = sack_blk_clr(sf, src); + sf->sf_used--; } /* @@ -415,25 +494,6 @@ return (-1); } -/* - * Collapse entry src into entry into - * and free up the src entry afterwards. - */ -static void -sack_collapse(struct sack_filter *sf, int32_t src, int32_t into) -{ - if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) { - /* src has a lower starting point */ - sf->sf_blks[into].start = sf->sf_blks[src].start; - } - if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) { - /* src has a higher ending point */ - sf->sf_blks[into].end = sf->sf_blks[src].end; - } - sf->sf_bits = sack_blk_clr(sf, src); - sf->sf_used--; -} - static void sack_board_collapse(struct sack_filter *sf) { @@ -485,9 +545,12 @@ for(i=0; isf_blks[i].start, - sf->sf_blks[i].end); + fprintf(out, "Entry:%d start:%u end:%u the block is %s\n", + i, + sf->sf_blks[i].start, + sf->sf_blks[i].end, + (sack_blk_used(sf, i) ? "USED" : "NOT-USED") + ); } } } @@ -497,10 +560,11 @@ static #endif int -sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks, +sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack) { int32_t i, ret; + int32_t segmax; if (numblks > TCP_MAX_SACK) { #ifdef _KERNEL @@ -510,14 +574,10 @@ #endif return(numblks); } -#ifndef _KERNEL - if ((sf->sf_used > 1) && (no_collapse == 0)) - sack_board_collapse(sf); - -#else if (sf->sf_used > 1) sack_board_collapse(sf); -#endif + + segmax = tcp_fixed_maxseg(tp); if ((sf->sf_used == 0) && numblks) { /* * We are brand new add the blocks in @@ -527,7 +587,15 @@ int cnt_added = 0; sf->sf_ack = th_ack; - for(i=(numblks-1), sf->sf_cur=0; i >= 0; i--) { + for(i=0, sf->sf_cur=0; isnd_max) && + ((in[i].end - in[i].start) < segmax)) { + /* + * We do not accept blocks less than a MSS minus all + * possible options space that is not at max_seg. + */ + continue; + } memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk)); sf->sf_bits = sack_blk_set(sf, sf->sf_cur); sf->sf_cur++; @@ -548,11 +616,9 @@ sack_filter_prune(sf, th_ack); } if (numblks) { - if (SEQ_GEQ(th_ack, sf->sf_ack)) { - ret = sack_filter_new(sf, in, numblks, th_ack); - } else { - ret = sack_filter_old(sf, in, numblks); - } + ret = sack_filter_run(sf, in, numblks, th_ack, segmax, tp->snd_max); + if (sf->sf_used > 1) + sack_board_collapse(sf); } else ret = 0; return (ret); @@ -625,7 +691,8 @@ char buffer[512]; struct sackblk blks[TCP_MAX_SACK]; FILE *err; - tcp_seq th_ack, snd_una, snd_max = 0; + tcp_seq th_ack; + struct tcpcb tp; struct sack_filter sf; int32_t numblks,i; int snd_una_set=0; @@ -638,10 +705,13 @@ in = stdin; out = stdout; - while ((i = getopt(argc, argv, "ndIi:o:?h")) != -1) { + memset(&tp, 0, sizeof(tp)); + tp.t_maxseg = 1460; + + while ((i = getopt(argc, argv, "dIi:o:?hS:")) != -1) { switch (i) { - case 'n': - no_collapse = 1; + case 'S': + tp.t_maxseg = strtol(optarg, NULL, 0); break; case 'd': detailed_dump = 1; @@ -666,7 +736,7 @@ default: case '?': case 'h': - fprintf(stderr, "Use %s [ -i infile -o outfile -I]\n", argv[0]); + fprintf(stderr, "Use %s [ -i infile -o outfile -I -S maxseg -n -d ]\n", argv[0]); return(0); break; }; @@ -679,28 +749,28 @@ while (fgets(buffer, sizeof(buffer), in) != NULL) { sprintf(line_buf[line_buf_at], "%s", buffer); line_buf_at++; - if (strncmp(buffer, "QUIT", 4) == 0) { + if (strncmp(buffer, "quit", 4) == 0) { break; - } else if (strncmp(buffer, "DUMP", 4) == 0) { + } else if (strncmp(buffer, "dump", 4) == 0) { sack_filter_dump(out, &sf); - } else if (strncmp(buffer, "MAX:", 4) == 0) { - snd_max = strtoul(&buffer[4], NULL, 0); - } else if (strncmp(buffer, "COMMIT", 6) == 0) { + } else if (strncmp(buffer, "max:", 4) == 0) { + tp.snd_max = strtoul(&buffer[4], NULL, 0); + } else if (strncmp(buffer, "commit", 6) == 0) { int nn, ii; if (numblks) { uint32_t szof, tot_chg; + printf("Dumping line buffer (lines:%d)\n", line_buf_at); for(ii=0; ii chg_remembered)){ @@ -724,20 +794,21 @@ ); } sack_chg = chg_remembered = 0; - } else if (strncmp(buffer, "RXT", 3) == 0) { - sack_filter_clear(&sf, snd_una); - } else if (strncmp(buffer, "ACK:", 4) == 0) { + } else if (strncmp(buffer, "rxt", 3) == 0) { + sack_filter_clear(&sf, tp.snd_una); + } else if (strncmp(buffer, "ack:", 4) == 0) { th_ack = strtoul(&buffer[4], NULL, 0); if (snd_una_set == 0) { - snd_una = th_ack; + tp.snd_una = th_ack; snd_una_set = 1; - } else if (SEQ_GT(th_ack, snd_una)) { - snd_una = th_ack; + } else if (SEQ_GT(th_ack, tp.snd_una)) { + tp.snd_una = th_ack; } - } else if (strncmp(buffer, "EXIT", 4) == 0) { - sack_filter_clear(&sf, snd_una); + sack_filter_blks(&tp, &sf, NULL, 0, th_ack); + } else if (strncmp(buffer, "exit", 4) == 0) { + sack_filter_clear(&sf, tp.snd_una); sack_chg = chg_remembered = 0; - } else if (strncmp(buffer, "SACK:", 5) == 0) { + } else if (strncmp(buffer, "sack:", 5) == 0) { char *end=NULL; uint32_t start; uint32_t endv; @@ -749,8 +820,8 @@ fprintf(out, "--Sack invalid skip 0 start:%u : ??\n", start); continue; } - if (SEQ_GT(endv, snd_max)) - snd_max = endv; + if (SEQ_GT(endv, tp.snd_max)) + tp.snd_max = endv; if (SEQ_LT(endv, start)) { fprintf(out, "--Sack invalid skip 1 endv:%u < start:%u\n", endv, start); continue; @@ -762,7 +833,7 @@ blks[numblks].start = start; blks[numblks].end = endv; numblks++; - } else if (strncmp(buffer, "REJ:n:n", 4) == 0) { + } else if (strncmp(buffer, "rej:n:n", 4) == 0) { struct sackblk in; char *end=NULL; @@ -772,18 +843,63 @@ sack_filter_reject(&sf, &in); } else fprintf(out, "Invalid input END:A:B\n"); - } else if (strncmp(buffer, "HELP", 4) == 0) { + } else if (strncmp(buffer, "save", 4) == 0) { + FILE *io; + + io = fopen("sack_setup.bin", "w+"); + if (io != NULL) { + if (fwrite(&sf, sizeof(sf), 1, io) != 1) { + printf("Failed to write out sf data\n"); + unlink("sack_setup.bin"); + goto outwrite; + } + if (fwrite(&tp, sizeof(tp), 1, io) != 1) { + printf("Failed to write out tp data\n"); + unlink("sack_setup.bin"); + } else + printf("Save completed\n"); + outwrite: + fclose(io); + } else { + printf("failed to open sack_setup.bin for writting .. sorry\n"); + } + } else if (strncmp(buffer, "restore", 7) == 0) { + FILE *io; + + io = fopen("sack_setup.bin", "r"); + if (io != NULL) { + if (fread(&sf, sizeof(sf), 1, io) != 1) { + printf("Failed to read out sf data\n"); + goto outread; + } + if (fread(&tp, sizeof(tp), 1, io) != 1) { + printf("Failed to read out tp data\n"); + } else { + printf("Restore completed\n"); + sack_filter_dump(out, &sf); + } + outread: + fclose(io); + } else { + printf("can't open sack_setup.bin -- sorry no load\n"); + } + + } else if (strncmp(buffer, "help", 4) == 0) { +help: fprintf(out, "You can input:\n"); - fprintf(out, "SACK:S:E -- to define a sack block\n"); - fprintf(out, "RXT -- to clear the filter without changing the remembered\n"); - fprintf(out, "EXIT -- To clear the sack filter and start all fresh\n"); - fprintf(out, "ACK:N -- To advance the cum-ack to N\n"); - fprintf(out, "MAX:N -- To set send-max to N\n"); - fprintf(out, "COMMIT -- To apply the sack you built to the filter and dump the filter\n"); - fprintf(out, "DUMP -- To display the current contents of the sack filter\n"); - fprintf(out, "QUIT -- To exit this program\n"); + fprintf(out, "sack:S:E -- to define a sack block\n"); + fprintf(out, "rxt -- to clear the filter without changing the remembered\n"); + fprintf(out, "save -- save current state to sack_setup.bin\n"); + fprintf(out, "restore -- restore state from sack_setup.bin\n"); + fprintf(out, "exit -- To clear the sack filter and start all fresh\n"); + fprintf(out, "ack:N -- To advance the cum-ack to N\n"); + fprintf(out, "max:N -- To set send-max to N\n"); + fprintf(out, "commit -- To apply the sack you built to the filter and dump the filter\n"); + fprintf(out, "dump -- To display the current contents of the sack filter\n"); + fprintf(out, "quit -- To exit this program\n"); } else { fprintf(out, "Command %s unknown\n", buffer); + goto help; } memset(buffer, 0, sizeof(buffer)); } Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -1793,6 +1793,7 @@ * * NOTE: If m != NULL, then th must point to *inside* the mbuf. */ + void tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, tcp_seq ack, tcp_seq seq, uint16_t flags) @@ -3320,8 +3321,19 @@ so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); /* If the mss is larger than the socket buffer, decrease the mss. */ - if (so->so_snd.sb_hiwat < tp->t_maxseg) + if (so->so_snd.sb_hiwat < tp->t_maxseg) { tp->t_maxseg = so->so_snd.sb_hiwat; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } + } SOCKBUF_UNLOCK(&so->so_snd); TCPSTAT_INC(tcps_mturesent); @@ -3454,8 +3466,19 @@ opt = inp->in6p_outputopts; if (opt != NULL && opt->ip6po_minmtu == IP6PO_MINMTU_ALL && - tp->t_maxseg > TCP6_MSS) + tp->t_maxseg > TCP6_MSS) { tp->t_maxseg = TCP6_MSS; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } + } } } #endif /* INET6 */ Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -756,6 +756,16 @@ tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; tp->t_maxseg = tp->t_pmtud_saved_maxseg; + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not + * process incoming SACK's since we are + * subject to attack in such a case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } TCPSTAT_INC(tcps_pmtud_blackhole_failed); /* * Reset the slow-start flight size as it Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -2203,9 +2203,19 @@ INP_WLOCK_RECHECK(inp); if (optval > 0 && optval <= tp->t_maxseg && - optval + 40 >= V_tcp_minmss) + optval + 40 >= V_tcp_minmss) { tp->t_maxseg = optval; - else + if (tp->t_maxseg < V_tcp_mssdflt) { + /* + * The MSS is so small we should not process incoming + * SACK's since we are subject to attack in such a + * case. + */ + tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; + } else { + tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT; + } + } else error = EINVAL; goto unlock_and_done; Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -846,6 +846,7 @@ #define TF2_MBUF_QUEUE_READY 0x00020000 /* Inputs can be queued */ #define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */ #define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */ +#define TF2_PROC_SACK_PROHIBIT 0x00100000 /* Due to small MSS size do not process sack's */ /* * Structure to hold TCP options that are only used during segment