Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F105508032
D44903.id137896.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
33 KB
Referenced Files
None
Subscribers
None
D44903.id137896.diff
View Options
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -1615,7 +1615,14 @@
tcp_dooptions(&to, (u_char *)(th + 1),
(th->th_off << 2) - sizeof(struct tcphdr),
(thflags & TH_SYN) ? TO_SYN : 0);
-
+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+ /*
+ * We don't look at sack's from the
+ * peer because the MSS is too small which
+ * can subject us to an attack.
+ */
+ to.to_flags &= ~TOF_SACK;
+ }
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if ((tp->t_flags & TF_SIGNATURE) != 0 &&
(to.to_flags & TOF_SIGNATURE) == 0) {
@@ -3883,6 +3890,17 @@
mss = max(mss, 64);
tp->t_maxseg = mss;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+
}
void
@@ -3934,6 +3952,16 @@
* XXXGL: shouldn't we reserve space for IP/IPv6 options?
*/
tp->t_maxseg = max(mss, 64);
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
SOCKBUF_LOCK(&so->so_rcv);
if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
Index: sys/netinet/tcp_stacks/bbr.c
===================================================================
--- sys/netinet/tcp_stacks/bbr.c
+++ sys/netinet/tcp_stacks/bbr.c
@@ -5134,6 +5134,16 @@
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not
+ * process incoming SACK's since we are
+ * subject to attack in such a case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed);
}
}
@@ -7556,7 +7566,7 @@
* Sort the SACK blocks so we can update the rack scoreboard with
* just one pass.
*/
- new_sb = sack_filter_blks(&bbr->r_ctl.bbr_sf, sack_blocks,
+ new_sb = sack_filter_blks(tp, &bbr->r_ctl.bbr_sf, sack_blocks,
num_sack_blks, th->th_ack);
ctf_log_sack_filter(bbr->rc_tp, new_sb, sack_blocks);
BBR_STAT_ADD(bbr_sack_blocks, num_sack_blks);
@@ -11323,7 +11333,14 @@
tcp_dooptions(&to, (u_char *)(th + 1),
(th->th_off << 2) - sizeof(struct tcphdr),
(thflags & TH_SYN) ? TO_SYN : 0);
-
+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+ /*
+ * We don't look at sack's from the
+ * peer because the MSS is too small which
+ * can subject us to an attack.
+ */
+ to.to_flags &= ~TOF_SACK;
+ }
/*
* If timestamps were negotiated during SYN/ACK and a
* segment without a timestamp is received, silently drop
@@ -13773,6 +13790,16 @@
if (old_maxseg <= tp->t_maxseg) {
/* Huh it did not shrink? */
tp->t_maxseg = old_maxseg - 40;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not
+ * process incoming SACK's since we are
+ * subject to attack in such a case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts);
}
/*
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -8583,6 +8583,16 @@
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not
+ * process incoming SACK's since we are
+ * subject to attack in such a case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed);
}
}
@@ -11197,7 +11207,7 @@
* If we have some sack blocks in the filter
* lets prune them out by calling sfb with no blocks.
*/
- sack_filter_blks(&rack->r_ctl.rack_sf, NULL, 0, th_ack);
+ sack_filter_blks(tp, &rack->r_ctl.rack_sf, NULL, 0, th_ack);
}
if (SEQ_GT(th_ack, tp->snd_una)) {
/* Clear any app ack remembered settings */
@@ -12052,7 +12062,7 @@
* just one pass.
*/
o_cnt = num_sack_blks;
- num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks,
+ num_sack_blks = sack_filter_blks(tp, &rack->r_ctl.rack_sf, sack_blocks,
num_sack_blks, th->th_ack);
ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks);
if (sacks_seen != NULL)
@@ -17933,7 +17943,14 @@
__func__));
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
__func__));
-
+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+ /*
+ * We don't look at sack's from the
+ * peer because the MSS is too small which
+ * can subject us to an attack.
+ */
+ to.to_flags &= ~TOF_SACK;
+ }
if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
(tp->t_flags & TF_GPUTINPROG)) {
/*
Index: sys/netinet/tcp_stacks/sack_filter.h
===================================================================
--- sys/netinet/tcp_stacks/sack_filter.h
+++ sys/netinet/tcp_stacks/sack_filter.h
@@ -1,4 +1,4 @@
-#ifndef __sack_filter_h__
+`#ifndef __sack_filter_h__
#define __sack_filter_h__
/*-
* Copyright (c) 2017-9 Netflix, Inc.
@@ -25,19 +25,84 @@
* SUCH DAMAGE.
*/
-/*
- * Seven entry's is carefully choosen to
- * fit in one cache line. We can easily
- * change this to 15 (but it gets very
- * little extra filtering). To change it
- * to be larger than 15 would require either
- * sf_bits becoming a uint32_t and then you
- * could go to 31.. or change it to a full
- * bitstring.. It is really doubtful you
- * will get much benefit beyond 7, in testing
- * there was a small amount but very very small.
+/**
+ *
+ * The Sack filter is designed to do two functions, first it trys to reduce
+ * the processing of sacks. Consider that often times you get something like
+ *
+ * ack 1 (sack 100:200)
+ * ack 1 (sack 100:300)
+ * ack 1 (sack(100:400)
+ *
+ * You really want to process the 100:200 and then on the next sack process
+ * only 200:300 (the new data) and then finally on the third 300:400. The filter
+ * removes from your processing routines the already processed sack information so
+ * that after the filter completes you only have "new" sacks that you have not
+ * processed. This saves computation time so you do not need to worry about
+ * previously processed sack information.
+ *
+ * The second thing that the sack filter does is help protect against malicious
+ * attackers that are trying to attack any linked lists (or other data structures)
+ * that are used in sack processing. Consider an attacker sending in sacks for
+ * every other byte of data outstanding. This could in theory drastically split
+ * up any scoreboard you are maintaining and make you search through a very large
+ * linked list (or other structure) eatting up CPU. If you split far enough and
+ * fracture your data structure enough you could potentially be crippled by a malicious
+ * peer. How the filter works here is it filters out sacks that are less than an MSS.
+ * We do this because generally a packet (aka MSS) should be kept whole. The only place
+ * we allow a smaller SACK is when the SACK touches the end of our socket buffer. This allows
+ * TLP to still work properly and yet protects us from splitting. The filter also only allows
+ * a set number of splits (defined in SACK_FILTER_BLOCKS). If more than that many sacks locations
+ * are being sent we discard additional ones until the earlier holes are filled up. The maximum
+ * the current filter can be is 15, which we have moved to since we want to be as generous as
+ * possible with allowing for loss. However, in previous testing of the filter it was found
+ * that there was very little benefit from moving from 7 to 15 sack points. Though at
+ * that previous set of tests, we would just discard earlier information in the filter. Now
+ * that we do not do that i.e. discard information and instead drop sack data we have raised
+ * the value to the max i.e. 15. If you want to expand beyond 15 one would have to either increase
+ * the size of the sf_bits to a uint32_t which could then get you a maximum of 31 splits or
+ * move to a true bitstring. If this is done however it further increases your risk to
+ * sack attacks, the bigger the number of splits (filter blocks) that are allowed
+ * the larger your processing arrays will grow as well as the filter.
+ *
+ * Note that this protection does not prevent an attacker from asking for a 20 byte
+ * MSS, that protection must be done elsewhere during the negotiation of the connection
+ * and is done now by just ignoring sack's from connections with too small of MSS which
+ * prevents sack from working and thus makes the connection less efficient but protects
+ * the system from harm.
+ *
+ * We may actually want to consider dropping the size of the array back to 7 to further
+ * protect the system which would be a more cautious approach.
+ *
+ * TCP Developer information:
+ *
+ * To use the sack filter its actually pretty simple. All you do is the normal sorting
+ * and sanity checks of your sacks but then after that you call out to sack_filter_blks()
+ * passing in the tcpcb, the sack-filter you are using (memory you have allocated) the
+ * pointer to the sackblk array and how many sorted valid blocks there are as well
+ * as what the new th_ack point is. The fitler will return to you the number of
+ * blocks left after filtering. It will reshape the blocks based on the previous
+ * sacks you have received and processed. If sack_filter_blks() returns 0 then no
+ * new sack data is present to be processed.
+ *
+ * Whenever you reach the point of snd_una == snd_max, you should call sack_filter_clear with
+ * the snd_una point. You also need to call this if you invalidate your sack array for any
+ * reason (such as RTO's or MTU changes or some other thing that makes you think all
+ * data is now un-acknowledged). You can also pass in sack_filter_blks(tp, sf, NULL, 0, th_ack) to
+ * advance the cum-ack point. You can use sack_filter_blks_used(sf) to determine if you have filter blocks as
+ * well. So putting these two together, anytime the cum-ack moves forward you probably want to
+ * do:
+ * if (sack_filter_blks_used(sf))
+ * sack_filter_blks(tp, sf, NULL, 0, th_ack);
+ *
+ * If for some reason you have ran the sack-filter and something goes wrong (you can't allocate space
+ * for example to split your sack-array. You can "undo" the data within the sack filter by calling
+ * sack_filter_rject(sf, in) passing in the list of blocks to be "removed" from the sack-filter.
+ * You can see an example of this use in bbr.c though rack.c has never found it needed.
+ *
*/
-#define SACK_FILTER_BLOCKS 7
+
+#define SACK_FILTER_BLOCKS 15
struct sack_filter {
tcp_seq sf_ack;
@@ -48,7 +113,7 @@
};
#ifdef _KERNEL
void sack_filter_clear(struct sack_filter *sf, tcp_seq seq);
-int sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
+int sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks,
tcp_seq th_ack);
void sack_filter_reject(struct sack_filter *sf, struct sackblk *in);
static inline uint8_t sack_filter_blks_used(struct sack_filter *sf)
Index: sys/netinet/tcp_stacks/sack_filter.c
===================================================================
--- sys/netinet/tcp_stacks/sack_filter.c
+++ sys/netinet/tcp_stacks/sack_filter.c
@@ -35,7 +35,13 @@
#include <sys/sockopt.h>
#endif
#include <netinet/in.h>
+#ifdef _KERNEL
#include <netinet/in_pcb.h>
+#else
+struct inpcb {
+ uint32_t stuff;
+};
+#endif
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_seq.h>
@@ -86,9 +92,9 @@
int highest_used=0;
int over_written=0;
int empty_avail=0;
-int no_collapse = 0;
FILE *out = NULL;
FILE *in = NULL;
+
#endif
#define sack_blk_used(sf, i) ((1 << i) & sf->sf_bits)
@@ -118,7 +124,7 @@
/* start with the oldest */
for (i = 0; i < SACK_FILTER_BLOCKS; i++) {
if (sack_blk_used(sf, i)) {
- if (SEQ_GT(th_ack, sf->sf_blks[i].end)) {
+ if (SEQ_GEQ(th_ack, sf->sf_blks[i].end)) {
/* This block is consumed */
sf->sf_bits = sack_blk_clr(sf, i);
sf->sf_used--;
@@ -143,10 +149,14 @@
* if part of it is on the board.
*/
static int32_t
-is_sack_on_board(struct sack_filter *sf, struct sackblk *b)
+is_sack_on_board(struct sack_filter *sf, struct sackblk *b, int32_t segmax, uint32_t snd_max)
{
int32_t i, cnt;
+ int span_cnt = 0;
+ uint32_t span_start, span_end;
+ span_start = b->start;
+ span_end = b->end;
for (i = sf->sf_cur, cnt=0; cnt < SACK_FILTER_BLOCKS; cnt++) {
if (sack_blk_used(sf, i)) {
if (SEQ_LT(b->start, sf->sf_ack)) {
@@ -184,6 +194,15 @@
* board |---|
* sack |---|
*/
+ if ((b->end != snd_max) &&
+ (span_cnt < 2) &&
+ ((b->end - b->start) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
+ }
goto nxt_blk;
}
/* Jonathans Rule 3 */
@@ -194,6 +213,16 @@
* board |---|
* sack |---|
*/
+ if ((b->end != snd_max) &&
+ (sf->sf_blks[i].end != snd_max) &&
+ (span_cnt < 2) &&
+ ((b->end - b->start) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
+ }
goto nxt_blk;
}
if (SEQ_LEQ(sf->sf_blks[i].start, b->start)) {
@@ -207,12 +236,36 @@
* sack |--------------|
*
* up with this one (we have part of it).
+ *
* 1) Update the board block to the new end
* and
* 2) Update the start of this block to my end.
+ *
+ * We only do this if the new piece is large enough.
*/
+ if (((b->end != snd_max) || (sf->sf_blks[i].end == snd_max)) &&
+ (span_cnt == 0) &&
+ ((b->end - sf->sf_blks[i].end) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
+ }
b->start = sf->sf_blks[i].end;
sf->sf_blks[i].end = b->end;
+ if (span_cnt == 0) {
+ span_start = sf->sf_blks[i].start;
+ span_end = sf->sf_blks[i].end;
+ } else {
+ if (SEQ_LT(span_start, sf->sf_blks[i].start)) {
+ span_start = sf->sf_blks[i].start;
+ }
+ if (SEQ_GT(span_end, sf->sf_blks[i].end)) {
+ span_end = sf->sf_blks[i].end;
+ }
+ }
+ span_cnt++;
goto nxt_blk;
}
if (SEQ_GEQ(sf->sf_blks[i].end, b->end)) {
@@ -224,12 +277,36 @@
* <or>
* board |----|
* sack |----------|
+ *
* 1) Update the board block to the new start
* and
* 2) Update the start of this block to my end.
+ *
+ * We only do this if the new piece is large enough.
*/
+ if (((b->end != snd_max) || (sf->sf_blks[i].end == snd_max)) &&
+ (span_cnt == 0) &&
+ ((sf->sf_blks[i].start - b->start) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
+ }
b->end = sf->sf_blks[i].start;
sf->sf_blks[i].start = b->start;
+ if (span_cnt == 0) {
+ span_start = sf->sf_blks[i].start;
+ span_end = sf->sf_blks[i].end;
+ } else {
+ if (SEQ_LT(span_start, sf->sf_blks[i].start)) {
+ span_start = sf->sf_blks[i].start;
+ }
+ if (SEQ_GT(span_end, sf->sf_blks[i].end)) {
+ span_end = sf->sf_blks[i].end;
+ }
+ }
+ span_cnt++;
goto nxt_blk;
}
}
@@ -238,46 +315,23 @@
i %= SACK_FILTER_BLOCKS;
}
/* Did we totally consume it in pieces? */
- if (b->start != b->end)
- return(0);
- else
- return(1);
-}
-
-static int32_t
-sack_filter_old(struct sack_filter *sf, struct sackblk *in, int numblks)
-{
- int32_t num, i;
- struct sackblk blkboard[TCP_MAX_SACK];
- /*
- * An old sack has arrived. It may contain data
- * we do not have. We might not have it since
- * we could have had a lost ack <or> we might have the
- * entire thing on our current board. We want to prune
- * off anything we have. With this function though we
- * won't add to the board.
- */
- for( i = 0, num = 0; i<numblks; i++ ) {
- if (is_sack_on_board(sf, &in[i])) {
-#ifndef _KERNEL
- cnt_skipped_oldsack++;
-#endif
- continue;
+ if (b->start != b->end) {
+ if ((b->end != snd_max) &&
+ ((b->end - b->start) < segmax) &&
+ ((span_end - span_start) < segmax)) {
+ /*
+ * Too small for us to mess with so we
+ * pretend its on the board.
+ */
+ return (1);
}
- /* Did not find it (or found only
- * a piece of it). Copy it to
- * our outgoing board.
+ return(0);
+ } else {
+ /*
+ * It was all consumed by the board.
*/
- memcpy(&blkboard[num], &in[i], sizeof(struct sackblk));
-#ifndef _KERNEL
- cnt_used_oldsack++;
-#endif
- num++;
- }
- if (num) {
- memcpy(in, blkboard, (num * sizeof(struct sackblk)));
+ return(1);
}
- return (num);
}
/*
@@ -303,54 +357,61 @@
}
static int32_t
-sack_filter_new(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack)
+sack_filter_run(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack, int32_t segmax, uint32_t snd_max)
{
struct sackblk blkboard[TCP_MAX_SACK];
- int32_t num, i;
+ int32_t num, i, room, at;
/*
* First lets trim the old and possibly
* throw any away we have.
*/
for(i=0, num=0; i<numblks; i++) {
- if (is_sack_on_board(sf, &in[i]))
+ if (is_sack_on_board(sf, &in[i], segmax, snd_max))
continue;
memcpy(&blkboard[num], &in[i], sizeof(struct sackblk));
num++;
}
- if (num == 0)
+ if (num == 0) {
return(num);
+ }
+
+ /*
+ * Calculate the space we have in the filter table.
+ */
+ room = SACK_FILTER_BLOCKS - sf->sf_used;
- /* Now what we are left with is either
- * completely merged on to the board
- * from the above steps, or is new
- * and need to be added to the board
- * with the last one updated to current.
+ /*
+ * Now lets walk through our filtered blkboard (the previous loop
+ * trimmed off anything on the board we already have so anything
+ * in blkboard is unique and not seen before) and see if we
+ * can just add it on to an existing entry, if so copy it out
+ * and adjust our board.
*
- * First copy it out, we want to return that
- * to our caller for processing.
+ * Otherwise if there is room we copy it back out and
+ * place a new entry on our board.
*/
- memcpy(in, blkboard, (num * sizeof(struct sackblk)));
- numblks = num;
- /* Now go through and add to our board as needed */
- for(i=(num-1); i>=0; i--) {
- if (is_sack_on_board(sf, &blkboard[i])) {
+ for(i=0, at=0; i<num; i++) {
+ if (is_sack_on_board(sf, &blkboard[i], segmax, snd_max)) {
+ memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));
+ at++;
+ continue;
+ }
+ /* First lets see if this guy can be "fit" to an existing block */
+ if (room == 0) {
+ /* Can't copy it out there is no room */
continue;
}
- /* Add this guy its not listed */
+ /* Copy it out to the outbound */
+ memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));
+ at++;
+ /* now lets add it to our sack-board */
sf->sf_cur++;
sf->sf_cur %= SACK_FILTER_BLOCKS;
if ((sack_blk_used(sf, sf->sf_cur)) &&
(sf->sf_used < SACK_FILTER_BLOCKS)) {
sack_move_to_empty(sf, sf->sf_cur);
}
-#ifndef _KERNEL
- if (sack_blk_used(sf, sf->sf_cur)) {
- over_written++;
- if (sf->sf_used < SACK_FILTER_BLOCKS)
- empty_avail++;
- }
-#endif
- memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk));
+ memcpy(&sf->sf_blks[sf->sf_cur], &blkboard[i], sizeof(struct sackblk));
if (sack_blk_used(sf, sf->sf_cur) == 0) {
sf->sf_used++;
#ifndef _KERNEL
@@ -360,7 +421,26 @@
sf->sf_bits = sack_blk_set(sf, sf->sf_cur);
}
}
- return(numblks);
+ return(at);
+}
+
+/*
+ * Collapse entry src into entry into
+ * and free up the src entry afterwards.
+ */
+static void
+sack_collapse(struct sack_filter *sf, int32_t src, int32_t into)
+{
+ if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) {
+ /* src has a lower starting point */
+ sf->sf_blks[into].start = sf->sf_blks[src].start;
+ }
+ if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) {
+ /* src has a higher ending point */
+ sf->sf_blks[into].end = sf->sf_blks[src].end;
+ }
+ sf->sf_bits = sack_blk_clr(sf, src);
+ sf->sf_used--;
}
/*
@@ -415,25 +495,6 @@
return (-1);
}
-/*
- * Collapse entry src into entry into
- * and free up the src entry afterwards.
- */
-static void
-sack_collapse(struct sack_filter *sf, int32_t src, int32_t into)
-{
- if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) {
- /* src has a lower starting point */
- sf->sf_blks[into].start = sf->sf_blks[src].start;
- }
- if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) {
- /* src has a higher ending point */
- sf->sf_blks[into].end = sf->sf_blks[src].end;
- }
- sf->sf_bits = sack_blk_clr(sf, src);
- sf->sf_used--;
-}
-
static void
sack_board_collapse(struct sack_filter *sf)
{
@@ -485,9 +546,12 @@
for(i=0; i<SACK_FILTER_BLOCKS; i++) {
if (sack_blk_used(sf, i)) {
- fprintf(out, "Entry:%d start:%u end:%u\n", i,
- sf->sf_blks[i].start,
- sf->sf_blks[i].end);
+ fprintf(out, "Entry:%d start:%u end:%u the block is %s\n",
+ i,
+ sf->sf_blks[i].start,
+ sf->sf_blks[i].end,
+ (sack_blk_used(sf, i) ? "USED" : "NOT-USED")
+ );
}
}
}
@@ -497,10 +561,11 @@
static
#endif
int
-sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
+sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks,
tcp_seq th_ack)
{
int32_t i, ret;
+ int32_t segmax;
if (numblks > TCP_MAX_SACK) {
#ifdef _KERNEL
@@ -510,14 +575,9 @@
#endif
return(numblks);
}
-#ifndef _KERNEL
- if ((sf->sf_used > 1) && (no_collapse == 0))
- sack_board_collapse(sf);
-
-#else
if (sf->sf_used > 1)
sack_board_collapse(sf);
-#endif
+ segmax = (tp->t_maxseg - 12); /* Allow timestamps .. need to move to kernel get */
if ((sf->sf_used == 0) && numblks) {
/*
* We are brand new add the blocks in
@@ -527,7 +587,15 @@
int cnt_added = 0;
sf->sf_ack = th_ack;
- for(i=(numblks-1), sf->sf_cur=0; i >= 0; i--) {
+ for(i=0, sf->sf_cur=0; i<numblks; i++) {
+ if ((in[i].end != tp->snd_max) &&
+ ((in[i].end - in[i].start) < segmax)) {
+ /*
+ * We do not accept blocks less than a MSS minus all
+ * possible options space that is not at max_seg.
+ */
+ continue;
+ }
memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk));
sf->sf_bits = sack_blk_set(sf, sf->sf_cur);
sf->sf_cur++;
@@ -548,11 +616,9 @@
sack_filter_prune(sf, th_ack);
}
if (numblks) {
- if (SEQ_GEQ(th_ack, sf->sf_ack)) {
- ret = sack_filter_new(sf, in, numblks, th_ack);
- } else {
- ret = sack_filter_old(sf, in, numblks);
- }
+ ret = sack_filter_run(sf, in, numblks, th_ack, segmax, tp->snd_max);
+ if (sf->sf_used > 1)
+ sack_board_collapse(sf);
} else
ret = 0;
return (ret);
@@ -625,7 +691,8 @@
char buffer[512];
struct sackblk blks[TCP_MAX_SACK];
FILE *err;
- tcp_seq th_ack, snd_una, snd_max = 0;
+ tcp_seq th_ack;
+ struct tcpcb tp;
struct sack_filter sf;
int32_t numblks,i;
int snd_una_set=0;
@@ -638,10 +705,13 @@
in = stdin;
out = stdout;
- while ((i = getopt(argc, argv, "ndIi:o:?h")) != -1) {
+ memset(&tp, 0, sizeof(tp));
+ tp.t_maxseg = 1460;
+
+ while ((i = getopt(argc, argv, "dIi:o:?hS:")) != -1) {
switch (i) {
- case 'n':
- no_collapse = 1;
+ case 'S':
+ tp.t_maxseg = strtol(optarg, NULL, 0);
break;
case 'd':
detailed_dump = 1;
@@ -666,7 +736,7 @@
default:
case '?':
case 'h':
- fprintf(stderr, "Use %s [ -i infile -o outfile -I]\n", argv[0]);
+ fprintf(stderr, "Use %s [ -i infile -o outfile -I -S maxseg -n -d ]\n", argv[0]);
return(0);
break;
};
@@ -679,28 +749,28 @@
while (fgets(buffer, sizeof(buffer), in) != NULL) {
sprintf(line_buf[line_buf_at], "%s", buffer);
line_buf_at++;
- if (strncmp(buffer, "QUIT", 4) == 0) {
+ if (strncmp(buffer, "quit", 4) == 0) {
break;
- } else if (strncmp(buffer, "DUMP", 4) == 0) {
+ } else if (strncmp(buffer, "dump", 4) == 0) {
sack_filter_dump(out, &sf);
- } else if (strncmp(buffer, "MAX:", 4) == 0) {
- snd_max = strtoul(&buffer[4], NULL, 0);
- } else if (strncmp(buffer, "COMMIT", 6) == 0) {
+ } else if (strncmp(buffer, "max:", 4) == 0) {
+ tp.snd_max = strtoul(&buffer[4], NULL, 0);
+ } else if (strncmp(buffer, "commit", 6) == 0) {
int nn, ii;
if (numblks) {
uint32_t szof, tot_chg;
+ printf("Dumping line buffer (lines:%d)\n", line_buf_at);
for(ii=0; ii<line_buf_at; ii++) {
fprintf(out, "%s", line_buf[ii]);
}
- fprintf(out, "------------------------------------\n");
- nn = sack_filter_blks(&sf, blks, numblks, th_ack);
+ fprintf(out, "------------------------------------ call sfb() nb:%d\n", numblks);
+ nn = sack_filter_blks(&tp, &sf, blks, numblks, th_ack);
saved += numblks - nn;
tot_sack_blks += numblks;
- fprintf(out, "ACK:%u\n", sf.sf_ack);
for(ii=0, tot_chg=0; ii<nn; ii++) {
szof = blks[ii].end - blks[ii].start;
tot_chg += szof;
- fprintf(out, "SACK:%u:%u [%u]\n",
+ fprintf(out, "sack:%u:%u [%u]\n",
blks[ii].start,
blks[ii].end, szof);
}
@@ -715,7 +785,7 @@
memset(line_buf, 0, sizeof(line_buf));
line_buf_at=0;
numblks = 0;
- } else if (strncmp(buffer, "CHG:", 4) == 0) {
+ } else if (strncmp(buffer, "chg:", 4) == 0) {
sack_chg = strtoul(&buffer[4], NULL, 0);
if ((sack_chg != chg_remembered) &&
(sack_chg > chg_remembered)){
@@ -724,20 +794,21 @@
);
}
sack_chg = chg_remembered = 0;
- } else if (strncmp(buffer, "RXT", 3) == 0) {
- sack_filter_clear(&sf, snd_una);
- } else if (strncmp(buffer, "ACK:", 4) == 0) {
+ } else if (strncmp(buffer, "rxt", 3) == 0) {
+ sack_filter_clear(&sf, tp.snd_una);
+ } else if (strncmp(buffer, "ack:", 4) == 0) {
th_ack = strtoul(&buffer[4], NULL, 0);
if (snd_una_set == 0) {
- snd_una = th_ack;
+ tp.snd_una = th_ack;
snd_una_set = 1;
- } else if (SEQ_GT(th_ack, snd_una)) {
- snd_una = th_ack;
+ } else if (SEQ_GT(th_ack, tp.snd_una)) {
+ tp.snd_una = th_ack;
}
- } else if (strncmp(buffer, "EXIT", 4) == 0) {
- sack_filter_clear(&sf, snd_una);
+ sack_filter_blks(&tp, &sf, NULL, 0, th_ack);
+ } else if (strncmp(buffer, "exit", 4) == 0) {
+ sack_filter_clear(&sf, tp.snd_una);
sack_chg = chg_remembered = 0;
- } else if (strncmp(buffer, "SACK:", 5) == 0) {
+ } else if (strncmp(buffer, "sack:", 5) == 0) {
char *end=NULL;
uint32_t start;
uint32_t endv;
@@ -749,8 +820,8 @@
fprintf(out, "--Sack invalid skip 0 start:%u : ??\n", start);
continue;
}
- if (SEQ_GT(endv, snd_max))
- snd_max = endv;
+ if (SEQ_GT(endv, tp.snd_max))
+ tp.snd_max = endv;
if (SEQ_LT(endv, start)) {
fprintf(out, "--Sack invalid skip 1 endv:%u < start:%u\n", endv, start);
continue;
@@ -762,7 +833,7 @@
blks[numblks].start = start;
blks[numblks].end = endv;
numblks++;
- } else if (strncmp(buffer, "REJ:n:n", 4) == 0) {
+ } else if (strncmp(buffer, "rej:n:n", 4) == 0) {
struct sackblk in;
char *end=NULL;
@@ -772,18 +843,63 @@
sack_filter_reject(&sf, &in);
} else
fprintf(out, "Invalid input END:A:B\n");
- } else if (strncmp(buffer, "HELP", 4) == 0) {
+ } else if (strncmp(buffer, "save", 4) == 0) {
+ FILE *io;
+
+ io = fopen("sack_setup.bin", "w+");
+ if (io != NULL) {
+ if (fwrite(&sf, sizeof(sf), 1, io) != 1) {
+ printf("Failed to write out sf data\n");
+ unlink("sack_setup.bin");
+ goto outwrite;
+ }
+ if (fwrite(&tp, sizeof(tp), 1, io) != 1) {
+ printf("Failed to write out tp data\n");
+ unlink("sack_setup.bin");
+ } else
+ printf("Save completed\n");
+ outwrite:
+ fclose(io);
+ } else {
+ printf("failed to open sack_setup.bin for writting .. sorry\n");
+ }
+ } else if (strncmp(buffer, "restore", 7) == 0) {
+ FILE *io;
+
+ io = fopen("sack_setup.bin", "r");
+ if (io != NULL) {
+ if (fread(&sf, sizeof(sf), 1, io) != 1) {
+ printf("Failed to read out sf data\n");
+ goto outread;
+ }
+ if (fread(&tp, sizeof(tp), 1, io) != 1) {
+ printf("Failed to read out tp data\n");
+ } else {
+ printf("Restore completed\n");
+ sack_filter_dump(out, &sf);
+ }
+ outread:
+ fclose(io);
+ } else {
+ printf("can't open sack_setup.bin -- sorry no load\n");
+ }
+
+ } else if (strncmp(buffer, "help", 4) == 0) {
+help:
fprintf(out, "You can input:\n");
- fprintf(out, "SACK:S:E -- to define a sack block\n");
- fprintf(out, "RXT -- to clear the filter without changing the remembered\n");
- fprintf(out, "EXIT -- To clear the sack filter and start all fresh\n");
- fprintf(out, "ACK:N -- To advance the cum-ack to N\n");
- fprintf(out, "MAX:N -- To set send-max to N\n");
- fprintf(out, "COMMIT -- To apply the sack you built to the filter and dump the filter\n");
- fprintf(out, "DUMP -- To display the current contents of the sack filter\n");
- fprintf(out, "QUIT -- To exit this program\n");
+ fprintf(out, "sack:S:E -- to define a sack block\n");
+ fprintf(out, "rxt -- to clear the filter without changing the remembered\n");
+ fprintf(out, "save -- save current state to sack_setup.bin\n");
+ fprintf(out, "restore -- restore state from sack_setup.bin\n");
+ fprintf(out, "exit -- To clear the sack filter and start all fresh\n");
+ fprintf(out, "ack:N -- To advance the cum-ack to N\n");
+ fprintf(out, "max:N -- To set send-max to N\n");
+ fprintf(out, "commit -- To apply the sack you built to the filter and dump the filter\n");
+ fprintf(out, "dump -- To display the current contents of the sack filter\n");
+ fprintf(out, "quit -- To exit this program\n");
} else {
fprintf(out, "Command %s unknown\n", buffer);
+ goto help;
}
memset(buffer, 0, sizeof(buffer));
}
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -1793,6 +1793,7 @@
*
* NOTE: If m != NULL, then th must point to *inside* the mbuf.
*/
+
void
tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
tcp_seq ack, tcp_seq seq, uint16_t flags)
@@ -3320,8 +3321,19 @@
so = inp->inp_socket;
SOCKBUF_LOCK(&so->so_snd);
/* If the mss is larger than the socket buffer, decrease the mss. */
- if (so->so_snd.sb_hiwat < tp->t_maxseg)
+ if (so->so_snd.sb_hiwat < tp->t_maxseg) {
tp->t_maxseg = so->so_snd.sb_hiwat;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+ }
SOCKBUF_UNLOCK(&so->so_snd);
TCPSTAT_INC(tcps_mturesent);
@@ -3454,8 +3466,19 @@
opt = inp->in6p_outputopts;
if (opt != NULL && opt->ip6po_minmtu == IP6PO_MINMTU_ALL &&
- tp->t_maxseg > TCP6_MSS)
+ tp->t_maxseg > TCP6_MSS) {
tp->t_maxseg = TCP6_MSS;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+ }
}
}
#endif /* INET6 */
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -756,6 +756,16 @@
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not
+ * process incoming SACK's since we are
+ * subject to attack in such a case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
TCPSTAT_INC(tcps_pmtud_blackhole_failed);
/*
* Reset the slow-start flight size as it
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -2203,9 +2203,19 @@
INP_WLOCK_RECHECK(inp);
if (optval > 0 && optval <= tp->t_maxseg &&
- optval + 40 >= V_tcp_minmss)
+ optval + 40 >= V_tcp_minmss) {
tp->t_maxseg = optval;
- else
+ if (tp->t_maxseg < V_tcp_mssdflt) {
+ /*
+ * The MSS is so small we should not process incoming
+ * SACK's since we are subject to attack in such a
+ * case.
+ */
+ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+ } else {
+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+ }
+ } else
error = EINVAL;
goto unlock_and_done;
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -846,6 +846,7 @@
#define TF2_MBUF_QUEUE_READY 0x00020000 /* Inputs can be queued */
#define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */
#define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */
+#define TF2_PROC_SACK_PROHIBIT 0x00100000 /* Due to small MSS size do not process sack's */
/*
* Structure to hold TCP options that are only used during segment
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 1:46 AM (1 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15475269
Default Alt Text
D44903.id137896.diff (33 KB)
Attached To
Mode
D44903: TCP can be subject to Sack Attacks lets fix this issue.
Attached
Detach File
Event Timeline
Log In to Comment