Page MenuHomeFreeBSD

D44903.id137896.diff
No OneTemporary

D44903.id137896.diff

Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -1615,7 +1615,14 @@
tcp_dooptions(&to, (u_char *)(th + 1),
(th->th_off << 2) - sizeof(struct tcphdr),
(thflags & TH_SYN) ? TO_SYN : 0);
-
+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+ /*
+ * We don't look at sack's from the
+ * peer because the MSS is too small which
+ * can subject us to an attack.
+ */
+ to.to_flags &= ~TOF_SACK;
+ }
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if ((tp->t_flags & TF_SIGNATURE) != 0 &&
(to.to_flags & TOF_SIGNATURE) == 0) {
@@ -3883,6 +3890,17 @@
mss = max(mss, 64);
tp->t_maxseg = mss;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+
}
void
@@ -3934,6 +3952,16 @@
* XXXGL: shouldn't we reserve space for IP/IPv6 options?
*/
tp->t_maxseg = max(mss, 64);
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
SOCKBUF_LOCK(&so->so_rcv);
if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
Index: sys/netinet/tcp_stacks/bbr.c
===================================================================
--- sys/netinet/tcp_stacks/bbr.c
+++ sys/netinet/tcp_stacks/bbr.c
@@ -5134,6 +5134,16 @@
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not
+ * process incoming SACK's since we are
+ * subject to attack in such a case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed);
}
}
@@ -7556,7 +7566,7 @@
* Sort the SACK blocks so we can update the rack scoreboard with
* just one pass.
*/
- new_sb = sack_filter_blks(&bbr->r_ctl.bbr_sf, sack_blocks,
+ new_sb = sack_filter_blks(tp, &bbr->r_ctl.bbr_sf, sack_blocks,
num_sack_blks, th->th_ack);
ctf_log_sack_filter(bbr->rc_tp, new_sb, sack_blocks);
BBR_STAT_ADD(bbr_sack_blocks, num_sack_blks);
@@ -11323,7 +11333,14 @@
tcp_dooptions(&to, (u_char *)(th + 1),
(th->th_off << 2) - sizeof(struct tcphdr),
(thflags & TH_SYN) ? TO_SYN : 0);
-
+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+ /*
+ * We don't look at sack's from the
+ * peer because the MSS is too small which
+ * can subject us to an attack.
+ */
+ to.to_flags &= ~TOF_SACK;
+ }
/*
* If timestamps were negotiated during SYN/ACK and a
* segment without a timestamp is received, silently drop
@@ -13773,6 +13790,16 @@
if (old_maxseg <= tp->t_maxseg) {
/* Huh it did not shrink? */
tp->t_maxseg = old_maxseg - 40;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not
+ * process incoming SACK's since we are
+ * subject to attack in such a case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts);
}
/*
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -8583,6 +8583,16 @@
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not
+ * process incoming SACK's since we are
+ * subject to attack in such a case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed);
}
}
@@ -11197,7 +11207,7 @@
* If we have some sack blocks in the filter
* lets prune them out by calling sfb with no blocks.
*/
- sack_filter_blks(&rack->r_ctl.rack_sf, NULL, 0, th_ack);
+ sack_filter_blks(tp, &rack->r_ctl.rack_sf, NULL, 0, th_ack);
}
if (SEQ_GT(th_ack, tp->snd_una)) {
/* Clear any app ack remembered settings */
@@ -12052,7 +12062,7 @@
* just one pass.
*/
o_cnt = num_sack_blks;
- num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks,
+ num_sack_blks = sack_filter_blks(tp, &rack->r_ctl.rack_sf, sack_blocks,
num_sack_blks, th->th_ack);
ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks);
if (sacks_seen != NULL)
@@ -17933,7 +17943,14 @@
__func__));
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
__func__));
-
+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+ /*
+ * We don't look at sack's from the
+ * peer because the MSS is too small which
+ * can subject us to an attack.
+ */
+ to.to_flags &= ~TOF_SACK;
+ }
if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
(tp->t_flags & TF_GPUTINPROG)) {
/*
Index: sys/netinet/tcp_stacks/sack_filter.h
===================================================================
--- sys/netinet/tcp_stacks/sack_filter.h
+++ sys/netinet/tcp_stacks/sack_filter.h
@@ -1,4 +1,4 @@
-#ifndef __sack_filter_h__
+`#ifndef __sack_filter_h__
#define __sack_filter_h__
/*-
* Copyright (c) 2017-9 Netflix, Inc.
@@ -25,19 +25,84 @@
* SUCH DAMAGE.
*/
-/*
- * Seven entry's is carefully choosen to
- * fit in one cache line. We can easily
- * change this to 15 (but it gets very
- * little extra filtering). To change it
- * to be larger than 15 would require either
- * sf_bits becoming a uint32_t and then you
- * could go to 31.. or change it to a full
- * bitstring.. It is really doubtful you
- * will get much benefit beyond 7, in testing
- * there was a small amount but very very small.
+/**
+ *
+ * The Sack filter is designed to do two functions, first it trys to reduce
+ * the processing of sacks. Consider that often times you get something like
+ *
+ * ack 1 (sack 100:200)
+ * ack 1 (sack 100:300)
+ * ack 1 (sack(100:400)
+ *
+ * You really want to process the 100:200 and then on the next sack process
+ * only 200:300 (the new data) and then finally on the third 300:400. The filter
+ * removes from your processing routines the already processed sack information so
+ * that after the filter completes you only have "new" sacks that you have not
+ * processed. This saves computation time so you do not need to worry about
+ * previously processed sack information.
+ *
+ * The second thing that the sack filter does is help protect against malicious
+ * attackers that are trying to attack any linked lists (or other data structures)
+ * that are used in sack processing. Consider an attacker sending in sacks for
+ * every other byte of data outstanding. This could in theory drastically split
+ * up any scoreboard you are maintaining and make you search through a very large
+ * linked list (or other structure) eatting up CPU. If you split far enough and
+ * fracture your data structure enough you could potentially be crippled by a malicious
+ * peer. How the filter works here is it filters out sacks that are less than an MSS.
+ * We do this because generally a packet (aka MSS) should be kept whole. The only place
+ * we allow a smaller SACK is when the SACK touches the end of our socket buffer. This allows
+ * TLP to still work properly and yet protects us from splitting. The filter also only allows
+ * a set number of splits (defined in SACK_FILTER_BLOCKS). If more than that many sacks locations
+ * are being sent we discard additional ones until the earlier holes are filled up. The maximum
+ * the current filter can be is 15, which we have moved to since we want to be as generous as
+ * possible with allowing for loss. However, in previous testing of the filter it was found
+ * that there was very little benefit from moving from 7 to 15 sack points. Though at
+ * that previous set of tests, we would just discard earlier information in the filter. Now
+ * that we do not do that i.e. discard information and instead drop sack data we have raised
+ * the value to the max i.e. 15. If you want to expand beyond 15 one would have to either increase
+ * the size of the sf_bits to a uint32_t which could then get you a maximum of 31 splits or
+ * move to a true bitstring. If this is done however it further increases your risk to
+ * sack attacks, the bigger the number of splits (filter blocks) that are allowed
+ * the larger your processing arrays will grow as well as the filter.
+ *
+ * Note that this protection does not prevent an attacker from asking for a 20 byte
+ * MSS, that protection must be done elsewhere during the negotiation of the connection
+ * and is done now by just ignoring sack's from connections with too small of MSS which
+ * prevents sack from working and thus makes the connection less efficient but protects
+ * the system from harm.
+ *
+ * We may actually want to consider dropping the size of the array back to 7 to further
+ * protect the system which would be a more cautious approach.
+ *
+ * TCP Developer information:
+ *
+ * To use the sack filter its actually pretty simple. All you do is the normal sorting
+ * and sanity checks of your sacks but then after that you call out to sack_filter_blks()
+ * passing in the tcpcb, the sack-filter you are using (memory you have allocated) the
+ * pointer to the sackblk array and how many sorted valid blocks there are as well
+ * as what the new th_ack point is. The fitler will return to you the number of
+ * blocks left after filtering. It will reshape the blocks based on the previous
+ * sacks you have received and processed. If sack_filter_blks() returns 0 then no
+ * new sack data is present to be processed.
+ *
+ * Whenever you reach the point of snd_una == snd_max, you should call sack_filter_clear with
+ * the snd_una point. You also need to call this if you invalidate your sack array for any
+ * reason (such as RTO's or MTU changes or some other thing that makes you think all
+ * data is now un-acknowledged). You can also pass in sack_filter_blks(tp, sf, NULL, 0, th_ack) to
+ * advance the cum-ack point. You can use sack_filter_blks_used(sf) to determine if you have filter blocks as
+ * well. So putting these two together, anytime the cum-ack moves forward you probably want to
+ * do:
+ * if (sack_filter_blks_used(sf))
+ * sack_filter_blks(tp, sf, NULL, 0, th_ack);
+ *
+ * If for some reason you have ran the sack-filter and something goes wrong (you can't allocate space
+ * for example to split your sack-array. You can "undo" the data within the sack filter by calling
+ * sack_filter_rject(sf, in) passing in the list of blocks to be "removed" from the sack-filter.
+ * You can see an example of this use in bbr.c though rack.c has never found it needed.
+ *
*/
-#define SACK_FILTER_BLOCKS 7
+
+#define SACK_FILTER_BLOCKS 15
struct sack_filter {
tcp_seq sf_ack;
@@ -48,7 +113,7 @@
};
#ifdef _KERNEL
void sack_filter_clear(struct sack_filter *sf, tcp_seq seq);
-int sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
+int sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks,
tcp_seq th_ack);
void sack_filter_reject(struct sack_filter *sf, struct sackblk *in);
static inline uint8_t sack_filter_blks_used(struct sack_filter *sf)
Index: sys/netinet/tcp_stacks/sack_filter.c
===================================================================
--- sys/netinet/tcp_stacks/sack_filter.c
+++ sys/netinet/tcp_stacks/sack_filter.c
@@ -35,7 +35,13 @@
#include <sys/sockopt.h>
#endif
#include <netinet/in.h>
+#ifdef _KERNEL
#include <netinet/in_pcb.h>
+#else
+struct inpcb {
+ uint32_t stuff;
+};
+#endif
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_seq.h>
@@ -86,9 +92,9 @@
int highest_used=0;
int over_written=0;
int empty_avail=0;
-int no_collapse = 0;
FILE *out = NULL;
FILE *in = NULL;
+
#endif
#define sack_blk_used(sf, i) ((1 << i) & sf->sf_bits)
@@ -118,7 +124,7 @@
/* start with the oldest */
for (i = 0; i < SACK_FILTER_BLOCKS; i++) {
if (sack_blk_used(sf, i)) {
- if (SEQ_GT(th_ack, sf->sf_blks[i].end)) {
+ if (SEQ_GEQ(th_ack, sf->sf_blks[i].end)) {
/* This block is consumed */
sf->sf_bits = sack_blk_clr(sf, i);
sf->sf_used--;
@@ -143,10 +149,14 @@
* if part of it is on the board.
*/
static int32_t
-is_sack_on_board(struct sack_filter *sf, struct sackblk *b)
+is_sack_on_board(struct sack_filter *sf, struct sackblk *b, int32_t segmax, uint32_t snd_max)
{
int32_t i, cnt;
+ int span_cnt = 0;
+ uint32_t span_start, span_end;
+ span_start = b->start;
+ span_end = b->end;
for (i = sf->sf_cur, cnt=0; cnt < SACK_FILTER_BLOCKS; cnt++) {
if (sack_blk_used(sf, i)) {
if (SEQ_LT(b->start, sf->sf_ack)) {
@@ -184,6 +194,15 @@
* board |---|
* sack |---|
*/
+ if ((b->end != snd_max) &&
+ (span_cnt < 2) &&
+ ((b->end - b->start) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
+ }
goto nxt_blk;
}
/* Jonathans Rule 3 */
@@ -194,6 +213,16 @@
* board |---|
* sack |---|
*/
+ if ((b->end != snd_max) &&
+ (sf->sf_blks[i].end != snd_max) &&
+ (span_cnt < 2) &&
+ ((b->end - b->start) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
+ }
goto nxt_blk;
}
if (SEQ_LEQ(sf->sf_blks[i].start, b->start)) {
@@ -207,12 +236,36 @@
* sack |--------------|
*
* up with this one (we have part of it).
+ *
* 1) Update the board block to the new end
* and
* 2) Update the start of this block to my end.
+ *
+ * We only do this if the new piece is large enough.
*/
+ if (((b->end != snd_max) || (sf->sf_blks[i].end == snd_max)) &&
+ (span_cnt == 0) &&
+ ((b->end - sf->sf_blks[i].end) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
+ }
b->start = sf->sf_blks[i].end;
sf->sf_blks[i].end = b->end;
+ if (span_cnt == 0) {
+ span_start = sf->sf_blks[i].start;
+ span_end = sf->sf_blks[i].end;
+ } else {
+ if (SEQ_LT(span_start, sf->sf_blks[i].start)) {
+ span_start = sf->sf_blks[i].start;
+ }
+ if (SEQ_GT(span_end, sf->sf_blks[i].end)) {
+ span_end = sf->sf_blks[i].end;
+ }
+ }
+ span_cnt++;
goto nxt_blk;
}
if (SEQ_GEQ(sf->sf_blks[i].end, b->end)) {
@@ -224,12 +277,36 @@
* <or>
* board |----|
* sack |----------|
+ *
* 1) Update the board block to the new start
* and
* 2) Update the start of this block to my end.
+ *
+ * We only do this if the new piece is large enough.
*/
+ if (((b->end != snd_max) || (sf->sf_blks[i].end == snd_max)) &&
+ (span_cnt == 0) &&
+ ((sf->sf_blks[i].start - b->start) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
+ }
b->end = sf->sf_blks[i].start;
sf->sf_blks[i].start = b->start;
+ if (span_cnt == 0) {
+ span_start = sf->sf_blks[i].start;
+ span_end = sf->sf_blks[i].end;
+ } else {
+ if (SEQ_LT(span_start, sf->sf_blks[i].start)) {
+ span_start = sf->sf_blks[i].start;
+ }
+ if (SEQ_GT(span_end, sf->sf_blks[i].end)) {
+ span_end = sf->sf_blks[i].end;
+ }
+ }
+ span_cnt++;
goto nxt_blk;
}
}
@@ -238,46 +315,23 @@
i %= SACK_FILTER_BLOCKS;
}
/* Did we totally consume it in pieces? */
- if (b->start != b->end)
- return(0);
- else
- return(1);
-}
-
-static int32_t
-sack_filter_old(struct sack_filter *sf, struct sackblk *in, int numblks)
-{
- int32_t num, i;
- struct sackblk blkboard[TCP_MAX_SACK];
- /*
- * An old sack has arrived. It may contain data
- * we do not have. We might not have it since
- * we could have had a lost ack <or> we might have the
- * entire thing on our current board. We want to prune
- * off anything we have. With this function though we
- * won't add to the board.
- */
- for( i = 0, num = 0; i<numblks; i++ ) {
- if (is_sack_on_board(sf, &in[i])) {
-#ifndef _KERNEL
- cnt_skipped_oldsack++;
-#endif
- continue;
+ if (b->start != b->end) {
+ if ((b->end != snd_max) &&
+ ((b->end - b->start) < segmax) &&
+ ((span_end - span_start) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
}
- /* Did not find it (or found only
- * a piece of it). Copy it to
- * our outgoing board.
+ return(0);
+ } else {
+ /*
+ * It was all consumed by the board.
*/
- memcpy(&blkboard[num], &in[i], sizeof(struct sackblk));
-#ifndef _KERNEL
- cnt_used_oldsack++;
-#endif
- num++;
- }
- if (num) {
- memcpy(in, blkboard, (num * sizeof(struct sackblk)));
+ return(1);
}
- return (num);
}
/*
@@ -303,54 +357,61 @@
}
static int32_t
-sack_filter_new(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack)
+sack_filter_run(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack, int32_t segmax, uint32_t snd_max)
{
struct sackblk blkboard[TCP_MAX_SACK];
- int32_t num, i;
+ int32_t num, i, room, at;
/*
* First lets trim the old and possibly
* throw any away we have.
*/
for(i=0, num=0; i<numblks; i++) {
- if (is_sack_on_board(sf, &in[i]))
+ if (is_sack_on_board(sf, &in[i], segmax, snd_max))
continue;
memcpy(&blkboard[num], &in[i], sizeof(struct sackblk));
num++;
}
- if (num == 0)
+ if (num == 0) {
return(num);
+ }
+
+ /*
+ * Calculate the space we have in the filter table.
+ */
+ room = SACK_FILTER_BLOCKS - sf->sf_used;
- /* Now what we are left with is either
- * completely merged on to the board
- * from the above steps, or is new
- * and need to be added to the board
- * with the last one updated to current.
+ /*
+ * Now lets walk through our filtered blkboard (the previous loop
+ * trimmed off anything on the board we already have so anything
+ * in blkboard is unique and not seen before) and see if we
+ * can just add it on to an existing entry, if so copy it out
+ * and adjust our board.
*
- * First copy it out, we want to return that
- * to our caller for processing.
+ * Otherwise if there is room we copy it back out and
+ * place a new entry on our board.
*/
- memcpy(in, blkboard, (num * sizeof(struct sackblk)));
- numblks = num;
- /* Now go through and add to our board as needed */
- for(i=(num-1); i>=0; i--) {
- if (is_sack_on_board(sf, &blkboard[i])) {
+ for(i=0, at=0; i<num; i++) {
+ if (is_sack_on_board(sf, &blkboard[i], segmax, snd_max)) {
+ memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));
+ at++;
+ continue;
+ }
+ /* First lets see if this guy can be "fit" to an existing block */
+ if (room == 0) {
+ /* Can't copy it out there is no room */
continue;
}
- /* Add this guy its not listed */
+ /* Copy it out to the outbound */
+ memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));
+ at++;
+ /* now lets add it to our sack-board */
sf->sf_cur++;
sf->sf_cur %= SACK_FILTER_BLOCKS;
if ((sack_blk_used(sf, sf->sf_cur)) &&
(sf->sf_used < SACK_FILTER_BLOCKS)) {
sack_move_to_empty(sf, sf->sf_cur);
}
-#ifndef _KERNEL
- if (sack_blk_used(sf, sf->sf_cur)) {
- over_written++;
- if (sf->sf_used < SACK_FILTER_BLOCKS)
- empty_avail++;
- }
-#endif
- memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk));
+ memcpy(&sf->sf_blks[sf->sf_cur], &blkboard[i], sizeof(struct sackblk));
if (sack_blk_used(sf, sf->sf_cur) == 0) {
sf->sf_used++;
#ifndef _KERNEL
@@ -360,7 +421,26 @@
sf->sf_bits = sack_blk_set(sf, sf->sf_cur);
}
}
- return(numblks);
+ return(at);
+}
+
+/*
+ * Collapse entry src into entry into
+ * and free up the src entry afterwards.
+ */
+static void
+sack_collapse(struct sack_filter *sf, int32_t src, int32_t into)
+{
+ if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) {
+ /* src has a lower starting point */
+ sf->sf_blks[into].start = sf->sf_blks[src].start;
+ }
+ if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) {
+ /* src has a higher ending point */
+ sf->sf_blks[into].end = sf->sf_blks[src].end;
+ }
+ sf->sf_bits = sack_blk_clr(sf, src);
+ sf->sf_used--;
}
/*
@@ -415,25 +495,6 @@
return (-1);
}
-/*
- * Collapse entry src into entry into
- * and free up the src entry afterwards.
- */
-static void
-sack_collapse(struct sack_filter *sf, int32_t src, int32_t into)
-{
- if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) {
- /* src has a lower starting point */
- sf->sf_blks[into].start = sf->sf_blks[src].start;
- }
- if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) {
- /* src has a higher ending point */
- sf->sf_blks[into].end = sf->sf_blks[src].end;
- }
- sf->sf_bits = sack_blk_clr(sf, src);
- sf->sf_used--;
-}
-
static void
sack_board_collapse(struct sack_filter *sf)
{
@@ -485,9 +546,12 @@
for(i=0; i<SACK_FILTER_BLOCKS; i++) {
if (sack_blk_used(sf, i)) {
- fprintf(out, "Entry:%d start:%u end:%u\n", i,
- sf->sf_blks[i].start,
- sf->sf_blks[i].end);
+ fprintf(out, "Entry:%d start:%u end:%u the block is %s\n",
+ i,
+ sf->sf_blks[i].start,
+ sf->sf_blks[i].end,
+ (sack_blk_used(sf, i) ? "USED" : "NOT-USED")
+ );
}
}
}
@@ -497,10 +561,11 @@
static
#endif
int
-sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
+sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks,
tcp_seq th_ack)
{
int32_t i, ret;
+ int32_t segmax;
if (numblks > TCP_MAX_SACK) {
#ifdef _KERNEL
@@ -510,14 +575,9 @@
#endif
return(numblks);
}
-#ifndef _KERNEL
- if ((sf->sf_used > 1) && (no_collapse == 0))
- sack_board_collapse(sf);
-
-#else
if (sf->sf_used > 1)
sack_board_collapse(sf);
-#endif
+ segmax = (tp->t_maxseg - 12); /* Allow timestamps .. need to move to kernel get */
if ((sf->sf_used == 0) && numblks) {
/*
* We are brand new add the blocks in
@@ -527,7 +587,15 @@
int cnt_added = 0;
sf->sf_ack = th_ack;
- for(i=(numblks-1), sf->sf_cur=0; i >= 0; i--) {
+ for(i=0, sf->sf_cur=0; i<numblks; i++) {
+ if ((in[i].end != tp->snd_max) &&
+ ((in[i].end - in[i].start) < segmax)) {
+ /*
+ * We do not accept blocks less than a MSS minus all
+ * possible options space that is not at max_seg.
+ */
+ continue;
+ }
memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk));
sf->sf_bits = sack_blk_set(sf, sf->sf_cur);
sf->sf_cur++;
@@ -548,11 +616,9 @@
sack_filter_prune(sf, th_ack);
}
if (numblks) {
- if (SEQ_GEQ(th_ack, sf->sf_ack)) {
- ret = sack_filter_new(sf, in, numblks, th_ack);
- } else {
- ret = sack_filter_old(sf, in, numblks);
- }
+ ret = sack_filter_run(sf, in, numblks, th_ack, segmax, tp->snd_max);
+ if (sf->sf_used > 1)
+ sack_board_collapse(sf);
} else
ret = 0;
return (ret);
@@ -625,7 +691,8 @@
char buffer[512];
struct sackblk blks[TCP_MAX_SACK];
FILE *err;
- tcp_seq th_ack, snd_una, snd_max = 0;
+ tcp_seq th_ack;
+ struct tcpcb tp;
struct sack_filter sf;
int32_t numblks,i;
int snd_una_set=0;
@@ -638,10 +705,13 @@
in = stdin;
out = stdout;
- while ((i = getopt(argc, argv, "ndIi:o:?h")) != -1) {
+ memset(&tp, 0, sizeof(tp));
+ tp.t_maxseg = 1460;
+
+ while ((i = getopt(argc, argv, "dIi:o:?hS:")) != -1) {
switch (i) {
- case 'n':
- no_collapse = 1;
+ case 'S':
+ tp.t_maxseg = strtol(optarg, NULL, 0);
break;
case 'd':
detailed_dump = 1;
@@ -666,7 +736,7 @@
default:
case '?':
case 'h':
- fprintf(stderr, "Use %s [ -i infile -o outfile -I]\n", argv[0]);
+ fprintf(stderr, "Use %s [ -i infile -o outfile -I -S maxseg -n -d ]\n", argv[0]);
return(0);
break;
};
@@ -679,28 +749,28 @@
while (fgets(buffer, sizeof(buffer), in) != NULL) {
sprintf(line_buf[line_buf_at], "%s", buffer);
line_buf_at++;
- if (strncmp(buffer, "QUIT", 4) == 0) {
+ if (strncmp(buffer, "quit", 4) == 0) {
break;
- } else if (strncmp(buffer, "DUMP", 4) == 0) {
+ } else if (strncmp(buffer, "dump", 4) == 0) {
sack_filter_dump(out, &sf);
- } else if (strncmp(buffer, "MAX:", 4) == 0) {
- snd_max = strtoul(&buffer[4], NULL, 0);
- } else if (strncmp(buffer, "COMMIT", 6) == 0) {
+ } else if (strncmp(buffer, "max:", 4) == 0) {
+ tp.snd_max = strtoul(&buffer[4], NULL, 0);
+ } else if (strncmp(buffer, "commit", 6) == 0) {
int nn, ii;
if (numblks) {
uint32_t szof, tot_chg;
+ printf("Dumping line buffer (lines:%d)\n", line_buf_at);
for(ii=0; ii<line_buf_at; ii++) {
fprintf(out, "%s", line_buf[ii]);
}
- fprintf(out, "------------------------------------\n");
- nn = sack_filter_blks(&sf, blks, numblks, th_ack);
+ fprintf(out, "------------------------------------ call sfb() nb:%d\n", numblks);
+ nn = sack_filter_blks(&tp, &sf, blks, numblks, th_ack);
saved += numblks - nn;
tot_sack_blks += numblks;
- fprintf(out, "ACK:%u\n", sf.sf_ack);
for(ii=0, tot_chg=0; ii<nn; ii++) {
szof = blks[ii].end - blks[ii].start;
tot_chg += szof;
- fprintf(out, "SACK:%u:%u [%u]\n",
+ fprintf(out, "sack:%u:%u [%u]\n",
blks[ii].start,
blks[ii].end, szof);
}
@@ -715,7 +785,7 @@
memset(line_buf, 0, sizeof(line_buf));
line_buf_at=0;
numblks = 0;
- } else if (strncmp(buffer, "CHG:", 4) == 0) {
+ } else if (strncmp(buffer, "chg:", 4) == 0) {
sack_chg = strtoul(&buffer[4], NULL, 0);
if ((sack_chg != chg_remembered) &&
(sack_chg > chg_remembered)){
@@ -724,20 +794,21 @@
);
}
sack_chg = chg_remembered = 0;
- } else if (strncmp(buffer, "RXT", 3) == 0) {
- sack_filter_clear(&sf, snd_una);
- } else if (strncmp(buffer, "ACK:", 4) == 0) {
+ } else if (strncmp(buffer, "rxt", 3) == 0) {
+ sack_filter_clear(&sf, tp.snd_una);
+ } else if (strncmp(buffer, "ack:", 4) == 0) {
th_ack = strtoul(&buffer[4], NULL, 0);
if (snd_una_set == 0) {
- snd_una = th_ack;
+ tp.snd_una = th_ack;
snd_una_set = 1;
- } else if (SEQ_GT(th_ack, snd_una)) {
- snd_una = th_ack;
+ } else if (SEQ_GT(th_ack, tp.snd_una)) {
+ tp.snd_una = th_ack;
}
- } else if (strncmp(buffer, "EXIT", 4) == 0) {
- sack_filter_clear(&sf, snd_una);
+ sack_filter_blks(&tp, &sf, NULL, 0, th_ack);
+ } else if (strncmp(buffer, "exit", 4) == 0) {
+ sack_filter_clear(&sf, tp.snd_una);
sack_chg = chg_remembered = 0;
- } else if (strncmp(buffer, "SACK:", 5) == 0) {
+ } else if (strncmp(buffer, "sack:", 5) == 0) {
char *end=NULL;
uint32_t start;
uint32_t endv;
@@ -749,8 +820,8 @@
fprintf(out, "--Sack invalid skip 0 start:%u : ??\n", start);
continue;
}
- if (SEQ_GT(endv, snd_max))
- snd_max = endv;
+ if (SEQ_GT(endv, tp.snd_max))
+ tp.snd_max = endv;
if (SEQ_LT(endv, start)) {
fprintf(out, "--Sack invalid skip 1 endv:%u < start:%u\n", endv, start);
continue;
@@ -762,7 +833,7 @@
blks[numblks].start = start;
blks[numblks].end = endv;
numblks++;
- } else if (strncmp(buffer, "REJ:n:n", 4) == 0) {
+ } else if (strncmp(buffer, "rej:n:n", 4) == 0) {
struct sackblk in;
char *end=NULL;
@@ -772,18 +843,63 @@
sack_filter_reject(&sf, &in);
} else
fprintf(out, "Invalid input END:A:B\n");
- } else if (strncmp(buffer, "HELP", 4) == 0) {
+ } else if (strncmp(buffer, "save", 4) == 0) {
+ FILE *io;
+
+ io = fopen("sack_setup.bin", "w+");
+ if (io != NULL) {
+ if (fwrite(&sf, sizeof(sf), 1, io) != 1) {
+ printf("Failed to write out sf data\n");
+ unlink("sack_setup.bin");
+ goto outwrite;
+ }
+ if (fwrite(&tp, sizeof(tp), 1, io) != 1) {
+ printf("Failed to write out tp data\n");
+ unlink("sack_setup.bin");
+ } else
+ printf("Save completed\n");
+ outwrite:
+ fclose(io);
+ } else {
+ printf("failed to open sack_setup.bin for writting .. sorry\n");
+ }
+ } else if (strncmp(buffer, "restore", 7) == 0) {
+ FILE *io;
+
+ io = fopen("sack_setup.bin", "r");
+ if (io != NULL) {
+ if (fread(&sf, sizeof(sf), 1, io) != 1) {
+ printf("Failed to read out sf data\n");
+ goto outread;
+ }
+ if (fread(&tp, sizeof(tp), 1, io) != 1) {
+ printf("Failed to read out tp data\n");
+ } else {
+ printf("Restore completed\n");
+ sack_filter_dump(out, &sf);
+ }
+ outread:
+ fclose(io);
+ } else {
+ printf("can't open sack_setup.bin -- sorry no load\n");
+ }
+
+ } else if (strncmp(buffer, "help", 4) == 0) {
+help:
fprintf(out, "You can input:\n");
- fprintf(out, "SACK:S:E -- to define a sack block\n");
- fprintf(out, "RXT -- to clear the filter without changing the remembered\n");
- fprintf(out, "EXIT -- To clear the sack filter and start all fresh\n");
- fprintf(out, "ACK:N -- To advance the cum-ack to N\n");
- fprintf(out, "MAX:N -- To set send-max to N\n");
- fprintf(out, "COMMIT -- To apply the sack you built to the filter and dump the filter\n");
- fprintf(out, "DUMP -- To display the current contents of the sack filter\n");
- fprintf(out, "QUIT -- To exit this program\n");
+ fprintf(out, "sack:S:E -- to define a sack block\n");
+ fprintf(out, "rxt -- to clear the filter without changing the remembered\n");
+ fprintf(out, "save -- save current state to sack_setup.bin\n");
+ fprintf(out, "restore -- restore state from sack_setup.bin\n");
+ fprintf(out, "exit -- To clear the sack filter and start all fresh\n");
+ fprintf(out, "ack:N -- To advance the cum-ack to N\n");
+ fprintf(out, "max:N -- To set send-max to N\n");
+ fprintf(out, "commit -- To apply the sack you built to the filter and dump the filter\n");
+ fprintf(out, "dump -- To display the current contents of the sack filter\n");
+ fprintf(out, "quit -- To exit this program\n");
} else {
fprintf(out, "Command %s unknown\n", buffer);
+ goto help;
}
memset(buffer, 0, sizeof(buffer));
}
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -1793,6 +1793,7 @@
*
* NOTE: If m != NULL, then th must point to *inside* the mbuf.
*/
+
void
tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
tcp_seq ack, tcp_seq seq, uint16_t flags)
@@ -3320,8 +3321,19 @@
so = inp->inp_socket;
SOCKBUF_LOCK(&so->so_snd);
/* If the mss is larger than the socket buffer, decrease the mss. */
- if (so->so_snd.sb_hiwat < tp->t_maxseg)
+ if (so->so_snd.sb_hiwat < tp->t_maxseg) {
tp->t_maxseg = so->so_snd.sb_hiwat;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+ }
SOCKBUF_UNLOCK(&so->so_snd);
TCPSTAT_INC(tcps_mturesent);
@@ -3454,8 +3466,19 @@
opt = inp->in6p_outputopts;
if (opt != NULL && opt->ip6po_minmtu == IP6PO_MINMTU_ALL &&
- tp->t_maxseg > TCP6_MSS)
+ tp->t_maxseg > TCP6_MSS) {
tp->t_maxseg = TCP6_MSS;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+ }
}
}
#endif /* INET6 */
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -756,6 +756,16 @@
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not
+ * process incoming SACK's since we are
+ * subject to attack in such a case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
TCPSTAT_INC(tcps_pmtud_blackhole_failed);
/*
* Reset the slow-start flight size as it
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -2203,9 +2203,19 @@
INP_WLOCK_RECHECK(inp);
if (optval > 0 && optval <= tp->t_maxseg &&
- optval + 40 >= V_tcp_minmss)
+ optval + 40 >= V_tcp_minmss) {
tp->t_maxseg = optval;
- else
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+ } else
error = EINVAL;
goto unlock_and_done;
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -846,6 +846,7 @@
#define TF2_MBUF_QUEUE_READY 0x00020000 /* Inputs can be queued */
#define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */
#define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */
+#define TF2_PROC_SACK_PROHIBIT 0x00100000 /* Due to small MSS size do not process sack's */
/*
* Structure to hold TCP options that are only used during segment

File Metadata

Mime Type
text/plain
Expires
Wed, Dec 18, 1:46 AM (1 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15475269
Default Alt Text
D44903.id137896.diff (33 KB)

Event Timeline