D44903.id137896.diff
No OneTemporary
Actions

Size

33 KB

Referenced Files

None

Subscribers

None

D44903.id137896.diff
View Options

	Index: sys/netinet/tcp_input.c
	===================================================================
	--- sys/netinet/tcp_input.c
	+++ sys/netinet/tcp_input.c
	@@ -1615,7 +1615,14 @@
	tcp_dooptions(&to, (u_char *)(th + 1),
	(th->th_off << 2) - sizeof(struct tcphdr),
	(thflags & TH_SYN) ? TO_SYN : 0);
	-
	+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
	+ /*
	+ * We don't look at sack's from the
	+ * peer because the MSS is too small which
	+ * can subject us to an attack.
	+ */
	+ to.to_flags &= ~TOF_SACK;
	+ }
	#if defined(IPSEC_SUPPORT) \|\| defined(TCP_SIGNATURE)
	if ((tp->t_flags & TF_SIGNATURE) != 0 &&
	(to.to_flags & TOF_SIGNATURE) == 0) {
	@@ -3883,6 +3890,17 @@
	mss = max(mss, 64);

	tp->t_maxseg = mss;
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not process incoming
	+ * SACK's since we are subject to attack in such a
	+ * case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }
	+
	}

	void
	@@ -3934,6 +3952,16 @@
	* XXXGL: shouldn't we reserve space for IP/IPv6 options?
	*/
	tp->t_maxseg = max(mss, 64);
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not process incoming
	+ * SACK's since we are subject to attack in such a
	+ * case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }

	SOCKBUF_LOCK(&so->so_rcv);
	if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
	Index: sys/netinet/tcp_stacks/bbr.c
	===================================================================
	--- sys/netinet/tcp_stacks/bbr.c
	+++ sys/netinet/tcp_stacks/bbr.c
	@@ -5134,6 +5134,16 @@
	tp->t_flags2 \|= TF2_PLPMTU_PMTUD;
	tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
	tp->t_maxseg = tp->t_pmtud_saved_maxseg;
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not
	+ * process incoming SACK's since we are
	+ * subject to attack in such a case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }
	KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed);
	}
	}
	@@ -7556,7 +7566,7 @@
	* Sort the SACK blocks so we can update the rack scoreboard with
	* just one pass.
	*/
	- new_sb = sack_filter_blks(&bbr->r_ctl.bbr_sf, sack_blocks,
	+ new_sb = sack_filter_blks(tp, &bbr->r_ctl.bbr_sf, sack_blocks,
	num_sack_blks, th->th_ack);
	ctf_log_sack_filter(bbr->rc_tp, new_sb, sack_blocks);
	BBR_STAT_ADD(bbr_sack_blocks, num_sack_blks);
	@@ -11323,7 +11333,14 @@
	tcp_dooptions(&to, (u_char *)(th + 1),
	(th->th_off << 2) - sizeof(struct tcphdr),
	(thflags & TH_SYN) ? TO_SYN : 0);
	-
	+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
	+ /*
	+ * We don't look at sack's from the
	+ * peer because the MSS is too small which
	+ * can subject us to an attack.
	+ */
	+ to.to_flags &= ~TOF_SACK;
	+ }
	/*
	* If timestamps were negotiated during SYN/ACK and a
	* segment without a timestamp is received, silently drop
	@@ -13773,6 +13790,16 @@
	if (old_maxseg <= tp->t_maxseg) {
	/* Huh it did not shrink? */
	tp->t_maxseg = old_maxseg - 40;
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not
	+ * process incoming SACK's since we are
	+ * subject to attack in such a case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }
	bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts);
	}
	/*
	Index: sys/netinet/tcp_stacks/rack.c
	===================================================================
	--- sys/netinet/tcp_stacks/rack.c
	+++ sys/netinet/tcp_stacks/rack.c
	@@ -8583,6 +8583,16 @@
	tp->t_flags2 \|= TF2_PLPMTU_PMTUD;
	tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
	tp->t_maxseg = tp->t_pmtud_saved_maxseg;
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not
	+ * process incoming SACK's since we are
	+ * subject to attack in such a case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }
	KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed);
	}
	}
	@@ -11197,7 +11207,7 @@
	* If we have some sack blocks in the filter
	* lets prune them out by calling sfb with no blocks.
	*/
	- sack_filter_blks(&rack->r_ctl.rack_sf, NULL, 0, th_ack);
	+ sack_filter_blks(tp, &rack->r_ctl.rack_sf, NULL, 0, th_ack);
	}
	if (SEQ_GT(th_ack, tp->snd_una)) {
	/* Clear any app ack remembered settings */
	@@ -12052,7 +12062,7 @@
	* just one pass.
	*/
	o_cnt = num_sack_blks;
	- num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks,
	+ num_sack_blks = sack_filter_blks(tp, &rack->r_ctl.rack_sf, sack_blocks,
	num_sack_blks, th->th_ack);
	ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks);
	if (sacks_seen != NULL)
	@@ -17933,7 +17943,14 @@
	__func__));
	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
	__func__));
	-
	+ if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
	+ /*
	+ * We don't look at sack's from the
	+ * peer because the MSS is too small which
	+ * can subject us to an attack.
	+ */
	+ to.to_flags &= ~TOF_SACK;
	+ }
	if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
	(tp->t_flags & TF_GPUTINPROG)) {
	/*
	Index: sys/netinet/tcp_stacks/sack_filter.h
	===================================================================
	--- sys/netinet/tcp_stacks/sack_filter.h
	+++ sys/netinet/tcp_stacks/sack_filter.h
	@@ -1,4 +1,4 @@
	-#ifndef __sack_filter_h__
	+`#ifndef __sack_filter_h__
	#define __sack_filter_h__
	/*-
	* Copyright (c) 2017-9 Netflix, Inc.
	@@ -25,19 +25,84 @@
	* SUCH DAMAGE.
	*/

	-/*
	- * Seven entry's is carefully choosen to
	- * fit in one cache line. We can easily
	- * change this to 15 (but it gets very
	- * little extra filtering). To change it
	- * to be larger than 15 would require either
	- * sf_bits becoming a uint32_t and then you
	- * could go to 31.. or change it to a full
	- * bitstring.. It is really doubtful you
	- * will get much benefit beyond 7, in testing
	- * there was a small amount but very very small.
	+/**
	+ *
	+ * The Sack filter is designed to do two functions, first it trys to reduce
	+ * the processing of sacks. Consider that often times you get something like
	+ *
	+ * ack 1 (sack 100:200)
	+ * ack 1 (sack 100:300)
	+ * ack 1 (sack(100:400)
	+ *
	+ * You really want to process the 100:200 and then on the next sack process
	+ * only 200:300 (the new data) and then finally on the third 300:400. The filter
	+ * removes from your processing routines the already processed sack information so
	+ * that after the filter completes you only have "new" sacks that you have not
	+ * processed. This saves computation time so you do not need to worry about
	+ * previously processed sack information.
	+ *
	+ * The second thing that the sack filter does is help protect against malicious
	+ * attackers that are trying to attack any linked lists (or other data structures)
	+ * that are used in sack processing. Consider an attacker sending in sacks for
	+ * every other byte of data outstanding. This could in theory drastically split
	+ * up any scoreboard you are maintaining and make you search through a very large
	+ * linked list (or other structure) eatting up CPU. If you split far enough and
	+ * fracture your data structure enough you could potentially be crippled by a malicious
	+ * peer. How the filter works here is it filters out sacks that are less than an MSS.
	+ * We do this because generally a packet (aka MSS) should be kept whole. The only place
	+ * we allow a smaller SACK is when the SACK touches the end of our socket buffer. This allows
	+ * TLP to still work properly and yet protects us from splitting. The filter also only allows
	+ * a set number of splits (defined in SACK_FILTER_BLOCKS). If more than that many sacks locations
	+ * are being sent we discard additional ones until the earlier holes are filled up. The maximum
	+ * the current filter can be is 15, which we have moved to since we want to be as generous as
	+ * possible with allowing for loss. However, in previous testing of the filter it was found
	+ * that there was very little benefit from moving from 7 to 15 sack points. Though at
	+ * that previous set of tests, we would just discard earlier information in the filter. Now
	+ * that we do not do that i.e. discard information and instead drop sack data we have raised
	+ * the value to the max i.e. 15. If you want to expand beyond 15 one would have to either increase
	+ * the size of the sf_bits to a uint32_t which could then get you a maximum of 31 splits or
	+ * move to a true bitstring. If this is done however it further increases your risk to
	+ * sack attacks, the bigger the number of splits (filter blocks) that are allowed
	+ * the larger your processing arrays will grow as well as the filter.
	+ *
	+ * Note that this protection does not prevent an attacker from asking for a 20 byte
	+ * MSS, that protection must be done elsewhere during the negotiation of the connection
	+ * and is done now by just ignoring sack's from connections with too small of MSS which
	+ * prevents sack from working and thus makes the connection less efficient but protects
	+ * the system from harm.
	+ *
	+ * We may actually want to consider dropping the size of the array back to 7 to further
	+ * protect the system which would be a more cautious approach.
	+ *
	+ * TCP Developer information:
	+ *
	+ * To use the sack filter its actually pretty simple. All you do is the normal sorting
	+ * and sanity checks of your sacks but then after that you call out to sack_filter_blks()
	+ * passing in the tcpcb, the sack-filter you are using (memory you have allocated) the
	+ * pointer to the sackblk array and how many sorted valid blocks there are as well
	+ * as what the new th_ack point is. The fitler will return to you the number of
	+ * blocks left after filtering. It will reshape the blocks based on the previous
	+ * sacks you have received and processed. If sack_filter_blks() returns 0 then no
	+ * new sack data is present to be processed.
	+ *
	+ * Whenever you reach the point of snd_una == snd_max, you should call sack_filter_clear with
	+ * the snd_una point. You also need to call this if you invalidate your sack array for any
	+ * reason (such as RTO's or MTU changes or some other thing that makes you think all
	+ * data is now un-acknowledged). You can also pass in sack_filter_blks(tp, sf, NULL, 0, th_ack) to
	+ * advance the cum-ack point. You can use sack_filter_blks_used(sf) to determine if you have filter blocks as
	+ * well. So putting these two together, anytime the cum-ack moves forward you probably want to
	+ * do:
	+ * if (sack_filter_blks_used(sf))
	+ * sack_filter_blks(tp, sf, NULL, 0, th_ack);
	+ *
	+ * If for some reason you have ran the sack-filter and something goes wrong (you can't allocate space
	+ * for example to split your sack-array. You can "undo" the data within the sack filter by calling
	+ * sack_filter_rject(sf, in) passing in the list of blocks to be "removed" from the sack-filter.
	+ * You can see an example of this use in bbr.c though rack.c has never found it needed.
	+ *
	*/
	-#define SACK_FILTER_BLOCKS 7
	+
	+#define SACK_FILTER_BLOCKS 15

	struct sack_filter {
	tcp_seq sf_ack;
	@@ -48,7 +113,7 @@
	};
	#ifdef _KERNEL
	void sack_filter_clear(struct sack_filter *sf, tcp_seq seq);
	-int sack_filter_blks(struct sack_filter sf, struct sackblk in, int numblks,
	+int sack_filter_blks(struct tcpcb tp, struct sack_filter sf, struct sackblk *in, int numblks,
	tcp_seq th_ack);
	void sack_filter_reject(struct sack_filter sf, struct sackblk in);
	static inline uint8_t sack_filter_blks_used(struct sack_filter *sf)
	Index: sys/netinet/tcp_stacks/sack_filter.c
	===================================================================
	--- sys/netinet/tcp_stacks/sack_filter.c
	+++ sys/netinet/tcp_stacks/sack_filter.c
	@@ -35,7 +35,13 @@
	#include <sys/sockopt.h>
	#endif
	#include <netinet/in.h>
	+#ifdef _KERNEL
	#include <netinet/in_pcb.h>
	+#else
	+struct inpcb {
	+ uint32_t stuff;
	+};
	+#endif
	#include <netinet/tcp.h>
	#include <netinet/tcp_var.h>
	#include <netinet/tcp_seq.h>
	@@ -86,9 +92,9 @@
	int highest_used=0;
	int over_written=0;
	int empty_avail=0;
	-int no_collapse = 0;
	FILE *out = NULL;
	FILE *in = NULL;
	+
	#endif

	#define sack_blk_used(sf, i) ((1 << i) & sf->sf_bits)
	@@ -118,7 +124,7 @@
	/* start with the oldest */
	for (i = 0; i < SACK_FILTER_BLOCKS; i++) {
	if (sack_blk_used(sf, i)) {
	- if (SEQ_GT(th_ack, sf->sf_blks[i].end)) {
	+ if (SEQ_GEQ(th_ack, sf->sf_blks[i].end)) {
	/* This block is consumed */
	sf->sf_bits = sack_blk_clr(sf, i);
	sf->sf_used--;
	@@ -143,10 +149,14 @@
	* if part of it is on the board.
	*/
	static int32_t
	-is_sack_on_board(struct sack_filter sf, struct sackblk b)
	+is_sack_on_board(struct sack_filter sf, struct sackblk b, int32_t segmax, uint32_t snd_max)
	{
	int32_t i, cnt;
	+ int span_cnt = 0;
	+ uint32_t span_start, span_end;

	+ span_start = b->start;
	+ span_end = b->end;
	for (i = sf->sf_cur, cnt=0; cnt < SACK_FILTER_BLOCKS; cnt++) {
	if (sack_blk_used(sf, i)) {
	if (SEQ_LT(b->start, sf->sf_ack)) {
	@@ -184,6 +194,15 @@
	* board \|---\|
	* sack \|---\|
	*/
	+ if ((b->end != snd_max) &&
	+ (span_cnt < 2) &&
	+ ((b->end - b->start) < segmax)) {
	+ /*
	+ * Too small for us to mess with so we
	+ * pretend its on the board.
	+ */
	+ return (1);
	+ }
	goto nxt_blk;
	}
	/* Jonathans Rule 3 */
	@@ -194,6 +213,16 @@
	* board \|---\|
	* sack \|---\|
	*/
	+ if ((b->end != snd_max) &&
	+ (sf->sf_blks[i].end != snd_max) &&
	+ (span_cnt < 2) &&
	+ ((b->end - b->start) < segmax)) {
	+ /*
	+ * Too small for us to mess with so we
	+ * pretend its on the board.
	+ */
	+ return (1);
	+ }
	goto nxt_blk;
	}
	if (SEQ_LEQ(sf->sf_blks[i].start, b->start)) {
	@@ -207,12 +236,36 @@
	* sack \|--------------\|
	*
	* up with this one (we have part of it).
	+ *
	* 1) Update the board block to the new end
	* and
	* 2) Update the start of this block to my end.
	+ *
	+ * We only do this if the new piece is large enough.
	*/
	+ if (((b->end != snd_max) \|\| (sf->sf_blks[i].end == snd_max)) &&
	+ (span_cnt == 0) &&
	+ ((b->end - sf->sf_blks[i].end) < segmax)) {
	+ /*
	+ * Too small for us to mess with so we
	+ * pretend its on the board.
	+ */
	+ return (1);
	+ }
	b->start = sf->sf_blks[i].end;
	sf->sf_blks[i].end = b->end;
	+ if (span_cnt == 0) {
	+ span_start = sf->sf_blks[i].start;
	+ span_end = sf->sf_blks[i].end;
	+ } else {
	+ if (SEQ_LT(span_start, sf->sf_blks[i].start)) {
	+ span_start = sf->sf_blks[i].start;
	+ }
	+ if (SEQ_GT(span_end, sf->sf_blks[i].end)) {
	+ span_end = sf->sf_blks[i].end;
	+ }
	+ }
	+ span_cnt++;
	goto nxt_blk;
	}
	if (SEQ_GEQ(sf->sf_blks[i].end, b->end)) {
	@@ -224,12 +277,36 @@
	* <or>
	* board \|----\|
	* sack \|----------\|
	+ *
	* 1) Update the board block to the new start
	* and
	* 2) Update the start of this block to my end.
	+ *
	+ * We only do this if the new piece is large enough.
	*/
	+ if (((b->end != snd_max) \|\| (sf->sf_blks[i].end == snd_max)) &&
	+ (span_cnt == 0) &&
	+ ((sf->sf_blks[i].start - b->start) < segmax)) {
	+ /*
	+ * Too small for us to mess with so we
	+ * pretend its on the board.
	+ */
	+ return (1);
	+ }
	b->end = sf->sf_blks[i].start;
	sf->sf_blks[i].start = b->start;
	+ if (span_cnt == 0) {
	+ span_start = sf->sf_blks[i].start;
	+ span_end = sf->sf_blks[i].end;
	+ } else {
	+ if (SEQ_LT(span_start, sf->sf_blks[i].start)) {
	+ span_start = sf->sf_blks[i].start;
	+ }
	+ if (SEQ_GT(span_end, sf->sf_blks[i].end)) {
	+ span_end = sf->sf_blks[i].end;
	+ }
	+ }
	+ span_cnt++;
	goto nxt_blk;
	}
	}
	@@ -238,46 +315,23 @@
	i %= SACK_FILTER_BLOCKS;
	}
	/* Did we totally consume it in pieces? */
	- if (b->start != b->end)
	- return(0);
	- else
	- return(1);
	-}
	-
	-static int32_t
	-sack_filter_old(struct sack_filter sf, struct sackblk in, int numblks)
	-{
	- int32_t num, i;
	- struct sackblk blkboard[TCP_MAX_SACK];
	- /*
	- * An old sack has arrived. It may contain data
	- * we do not have. We might not have it since
	- * we could have had a lost ack <or> we might have the
	- * entire thing on our current board. We want to prune
	- * off anything we have. With this function though we
	- * won't add to the board.
	- */
	- for( i = 0, num = 0; i<numblks; i++ ) {
	- if (is_sack_on_board(sf, &in[i])) {
	-#ifndef _KERNEL
	- cnt_skipped_oldsack++;
	-#endif
	- continue;
	+ if (b->start != b->end) {
	+ if ((b->end != snd_max) &&
	+ ((b->end - b->start) < segmax) &&
	+ ((span_end - span_start) < segmax)) {
	+ /*
	+ * Too small for us to mess with so we
	+ * pretend its on the board.
	+ */
	+ return (1);
	}
	- /* Did not find it (or found only
	- * a piece of it). Copy it to
	- * our outgoing board.
	+ return(0);
	+ } else {
	+ /*
	+ * It was all consumed by the board.
	*/
	- memcpy(&blkboard[num], &in[i], sizeof(struct sackblk));
	-#ifndef _KERNEL
	- cnt_used_oldsack++;
	-#endif
	- num++;
	- }
	- if (num) {
	- memcpy(in, blkboard, (num * sizeof(struct sackblk)));
	+ return(1);
	}
	- return (num);
	}

	/*
	@@ -303,54 +357,61 @@
	}

	static int32_t
	-sack_filter_new(struct sack_filter sf, struct sackblk in, int numblks, tcp_seq th_ack)
	+sack_filter_run(struct sack_filter sf, struct sackblk in, int numblks, tcp_seq th_ack, int32_t segmax, uint32_t snd_max)
	{
	struct sackblk blkboard[TCP_MAX_SACK];
	- int32_t num, i;
	+ int32_t num, i, room, at;
	/*
	* First lets trim the old and possibly
	* throw any away we have.
	*/
	for(i=0, num=0; i<numblks; i++) {
	- if (is_sack_on_board(sf, &in[i]))
	+ if (is_sack_on_board(sf, &in[i], segmax, snd_max))
	continue;
	memcpy(&blkboard[num], &in[i], sizeof(struct sackblk));
	num++;
	}
	- if (num == 0)
	+ if (num == 0) {
	return(num);
	+ }
	+
	+ /*
	+ * Calculate the space we have in the filter table.
	+ */
	+ room = SACK_FILTER_BLOCKS - sf->sf_used;

	- /* Now what we are left with is either
	- * completely merged on to the board
	- * from the above steps, or is new
	- * and need to be added to the board
	- * with the last one updated to current.
	+ /*
	+ * Now lets walk through our filtered blkboard (the previous loop
	+ * trimmed off anything on the board we already have so anything
	+ * in blkboard is unique and not seen before) and see if we
	+ * can just add it on to an existing entry, if so copy it out
	+ * and adjust our board.
	*
	- * First copy it out, we want to return that
	- * to our caller for processing.
	+ * Otherwise if there is room we copy it back out and
	+ * place a new entry on our board.
	*/
	- memcpy(in, blkboard, (num * sizeof(struct sackblk)));
	- numblks = num;
	- /* Now go through and add to our board as needed */
	- for(i=(num-1); i>=0; i--) {
	- if (is_sack_on_board(sf, &blkboard[i])) {
	+ for(i=0, at=0; i<num; i++) {
	+ if (is_sack_on_board(sf, &blkboard[i], segmax, snd_max)) {
	+ memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));
	+ at++;
	+ continue;
	+ }
	+ /* First lets see if this guy can be "fit" to an existing block */
	+ if (room == 0) {
	+ /* Can't copy it out there is no room */
	continue;
	}
	- /* Add this guy its not listed */
	+ /* Copy it out to the outbound */
	+ memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));
	+ at++;
	+ /* now lets add it to our sack-board */
	sf->sf_cur++;
	sf->sf_cur %= SACK_FILTER_BLOCKS;
	if ((sack_blk_used(sf, sf->sf_cur)) &&
	(sf->sf_used < SACK_FILTER_BLOCKS)) {
	sack_move_to_empty(sf, sf->sf_cur);
	}
	-#ifndef _KERNEL
	- if (sack_blk_used(sf, sf->sf_cur)) {
	- over_written++;
	- if (sf->sf_used < SACK_FILTER_BLOCKS)
	- empty_avail++;
	- }
	-#endif
	- memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk));
	+ memcpy(&sf->sf_blks[sf->sf_cur], &blkboard[i], sizeof(struct sackblk));
	if (sack_blk_used(sf, sf->sf_cur) == 0) {
	sf->sf_used++;
	#ifndef _KERNEL
	@@ -360,7 +421,26 @@
	sf->sf_bits = sack_blk_set(sf, sf->sf_cur);
	}
	}
	- return(numblks);
	+ return(at);
	+}
	+
	+/*
	+ * Collapse entry src into entry into
	+ * and free up the src entry afterwards.
	+ */
	+static void
	+sack_collapse(struct sack_filter *sf, int32_t src, int32_t into)
	+{
	+ if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) {
	+ /* src has a lower starting point */
	+ sf->sf_blks[into].start = sf->sf_blks[src].start;
	+ }
	+ if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) {
	+ /* src has a higher ending point */
	+ sf->sf_blks[into].end = sf->sf_blks[src].end;
	+ }
	+ sf->sf_bits = sack_blk_clr(sf, src);
	+ sf->sf_used--;
	}

	/*
	@@ -415,25 +495,6 @@
	return (-1);
	}

	-/*
	- * Collapse entry src into entry into
	- * and free up the src entry afterwards.
	- */
	-static void
	-sack_collapse(struct sack_filter *sf, int32_t src, int32_t into)
	-{
	- if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) {
	- /* src has a lower starting point */
	- sf->sf_blks[into].start = sf->sf_blks[src].start;
	- }
	- if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) {
	- /* src has a higher ending point */
	- sf->sf_blks[into].end = sf->sf_blks[src].end;
	- }
	- sf->sf_bits = sack_blk_clr(sf, src);
	- sf->sf_used--;
	-}
	-
	static void
	sack_board_collapse(struct sack_filter *sf)
	{
	@@ -485,9 +546,12 @@

	for(i=0; i<SACK_FILTER_BLOCKS; i++) {
	if (sack_blk_used(sf, i)) {
	- fprintf(out, "Entry:%d start:%u end:%u\n", i,
	- sf->sf_blks[i].start,
	- sf->sf_blks[i].end);
	+ fprintf(out, "Entry:%d start:%u end:%u the block is %s\n",
	+ i,
	+ sf->sf_blks[i].start,
	+ sf->sf_blks[i].end,
	+ (sack_blk_used(sf, i) ? "USED" : "NOT-USED")
	+ );
	}
	}
	}
	@@ -497,10 +561,11 @@
	static
	#endif
	int
	-sack_filter_blks(struct sack_filter sf, struct sackblk in, int numblks,
	+sack_filter_blks(struct tcpcb tp, struct sack_filter sf, struct sackblk *in, int numblks,
	tcp_seq th_ack)
	{
	int32_t i, ret;
	+ int32_t segmax;

	if (numblks > TCP_MAX_SACK) {
	#ifdef _KERNEL
	@@ -510,14 +575,9 @@
	#endif
	return(numblks);
	}
	-#ifndef _KERNEL
	- if ((sf->sf_used > 1) && (no_collapse == 0))
	- sack_board_collapse(sf);
	-
	-#else
	if (sf->sf_used > 1)
	sack_board_collapse(sf);
	-#endif
	+ segmax = (tp->t_maxseg - 12); /* Allow timestamps .. need to move to kernel get */
	if ((sf->sf_used == 0) && numblks) {
	/*
	* We are brand new add the blocks in
	@@ -527,7 +587,15 @@
	int cnt_added = 0;

	sf->sf_ack = th_ack;
	- for(i=(numblks-1), sf->sf_cur=0; i >= 0; i--) {
	+ for(i=0, sf->sf_cur=0; i<numblks; i++) {
	+ if ((in[i].end != tp->snd_max) &&
	+ ((in[i].end - in[i].start) < segmax)) {
	+ /*
	+ * We do not accept blocks less than a MSS minus all
	+ * possible options space that is not at max_seg.
	+ */
	+ continue;
	+ }
	memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk));
	sf->sf_bits = sack_blk_set(sf, sf->sf_cur);
	sf->sf_cur++;
	@@ -548,11 +616,9 @@
	sack_filter_prune(sf, th_ack);
	}
	if (numblks) {
	- if (SEQ_GEQ(th_ack, sf->sf_ack)) {
	- ret = sack_filter_new(sf, in, numblks, th_ack);
	- } else {
	- ret = sack_filter_old(sf, in, numblks);
	- }
	+ ret = sack_filter_run(sf, in, numblks, th_ack, segmax, tp->snd_max);
	+ if (sf->sf_used > 1)
	+ sack_board_collapse(sf);
	} else
	ret = 0;
	return (ret);
	@@ -625,7 +691,8 @@
	char buffer[512];
	struct sackblk blks[TCP_MAX_SACK];
	FILE *err;
	- tcp_seq th_ack, snd_una, snd_max = 0;
	+ tcp_seq th_ack;
	+ struct tcpcb tp;
	struct sack_filter sf;
	int32_t numblks,i;
	int snd_una_set=0;
	@@ -638,10 +705,13 @@

	in = stdin;
	out = stdout;
	- while ((i = getopt(argc, argv, "ndIi:o:?h")) != -1) {
	+ memset(&tp, 0, sizeof(tp));
	+ tp.t_maxseg = 1460;
	+
	+ while ((i = getopt(argc, argv, "dIi:o:?hS:")) != -1) {
	switch (i) {
	- case 'n':
	- no_collapse = 1;
	+ case 'S':
	+ tp.t_maxseg = strtol(optarg, NULL, 0);
	break;
	case 'd':
	detailed_dump = 1;
	@@ -666,7 +736,7 @@
	default:
	case '?':
	case 'h':
	- fprintf(stderr, "Use %s [ -i infile -o outfile -I]\n", argv[0]);
	+ fprintf(stderr, "Use %s [ -i infile -o outfile -I -S maxseg -n -d ]\n", argv[0]);
	return(0);
	break;
	};
	@@ -679,28 +749,28 @@
	while (fgets(buffer, sizeof(buffer), in) != NULL) {
	sprintf(line_buf[line_buf_at], "%s", buffer);
	line_buf_at++;
	- if (strncmp(buffer, "QUIT", 4) == 0) {
	+ if (strncmp(buffer, "quit", 4) == 0) {
	break;
	- } else if (strncmp(buffer, "DUMP", 4) == 0) {
	+ } else if (strncmp(buffer, "dump", 4) == 0) {
	sack_filter_dump(out, &sf);
	- } else if (strncmp(buffer, "MAX:", 4) == 0) {
	- snd_max = strtoul(&buffer[4], NULL, 0);
	- } else if (strncmp(buffer, "COMMIT", 6) == 0) {
	+ } else if (strncmp(buffer, "max:", 4) == 0) {
	+ tp.snd_max = strtoul(&buffer[4], NULL, 0);
	+ } else if (strncmp(buffer, "commit", 6) == 0) {
	int nn, ii;
	if (numblks) {
	uint32_t szof, tot_chg;
	+ printf("Dumping line buffer (lines:%d)\n", line_buf_at);
	for(ii=0; ii<line_buf_at; ii++) {
	fprintf(out, "%s", line_buf[ii]);
	}
	- fprintf(out, "------------------------------------\n");
	- nn = sack_filter_blks(&sf, blks, numblks, th_ack);
	+ fprintf(out, "------------------------------------ call sfb() nb:%d\n", numblks);
	+ nn = sack_filter_blks(&tp, &sf, blks, numblks, th_ack);
	saved += numblks - nn;
	tot_sack_blks += numblks;
	- fprintf(out, "ACK:%u\n", sf.sf_ack);
	for(ii=0, tot_chg=0; ii<nn; ii++) {
	szof = blks[ii].end - blks[ii].start;
	tot_chg += szof;
	- fprintf(out, "SACK:%u:%u [%u]\n",
	+ fprintf(out, "sack:%u:%u [%u]\n",
	blks[ii].start,
	blks[ii].end, szof);
	}
	@@ -715,7 +785,7 @@
	memset(line_buf, 0, sizeof(line_buf));
	line_buf_at=0;
	numblks = 0;
	- } else if (strncmp(buffer, "CHG:", 4) == 0) {
	+ } else if (strncmp(buffer, "chg:", 4) == 0) {
	sack_chg = strtoul(&buffer[4], NULL, 0);
	if ((sack_chg != chg_remembered) &&
	(sack_chg > chg_remembered)){
	@@ -724,20 +794,21 @@
	);
	}
	sack_chg = chg_remembered = 0;
	- } else if (strncmp(buffer, "RXT", 3) == 0) {
	- sack_filter_clear(&sf, snd_una);
	- } else if (strncmp(buffer, "ACK:", 4) == 0) {
	+ } else if (strncmp(buffer, "rxt", 3) == 0) {
	+ sack_filter_clear(&sf, tp.snd_una);
	+ } else if (strncmp(buffer, "ack:", 4) == 0) {
	th_ack = strtoul(&buffer[4], NULL, 0);
	if (snd_una_set == 0) {
	- snd_una = th_ack;
	+ tp.snd_una = th_ack;
	snd_una_set = 1;
	- } else if (SEQ_GT(th_ack, snd_una)) {
	- snd_una = th_ack;
	+ } else if (SEQ_GT(th_ack, tp.snd_una)) {
	+ tp.snd_una = th_ack;
	}
	- } else if (strncmp(buffer, "EXIT", 4) == 0) {
	- sack_filter_clear(&sf, snd_una);
	+ sack_filter_blks(&tp, &sf, NULL, 0, th_ack);
	+ } else if (strncmp(buffer, "exit", 4) == 0) {
	+ sack_filter_clear(&sf, tp.snd_una);
	sack_chg = chg_remembered = 0;
	- } else if (strncmp(buffer, "SACK:", 5) == 0) {
	+ } else if (strncmp(buffer, "sack:", 5) == 0) {
	char *end=NULL;
	uint32_t start;
	uint32_t endv;
	@@ -749,8 +820,8 @@
	fprintf(out, "--Sack invalid skip 0 start:%u : ??\n", start);
	continue;
	}
	- if (SEQ_GT(endv, snd_max))
	- snd_max = endv;
	+ if (SEQ_GT(endv, tp.snd_max))
	+ tp.snd_max = endv;
	if (SEQ_LT(endv, start)) {
	fprintf(out, "--Sack invalid skip 1 endv:%u < start:%u\n", endv, start);
	continue;
	@@ -762,7 +833,7 @@
	blks[numblks].start = start;
	blks[numblks].end = endv;
	numblks++;
	- } else if (strncmp(buffer, "REJ:n:n", 4) == 0) {
	+ } else if (strncmp(buffer, "rej:n:n", 4) == 0) {
	struct sackblk in;
	char *end=NULL;

	@@ -772,18 +843,63 @@
	sack_filter_reject(&sf, &in);
	} else
	fprintf(out, "Invalid input END:A:B\n");
	- } else if (strncmp(buffer, "HELP", 4) == 0) {
	+ } else if (strncmp(buffer, "save", 4) == 0) {
	+ FILE *io;
	+
	+ io = fopen("sack_setup.bin", "w+");
	+ if (io != NULL) {
	+ if (fwrite(&sf, sizeof(sf), 1, io) != 1) {
	+ printf("Failed to write out sf data\n");
	+ unlink("sack_setup.bin");
	+ goto outwrite;
	+ }
	+ if (fwrite(&tp, sizeof(tp), 1, io) != 1) {
	+ printf("Failed to write out tp data\n");
	+ unlink("sack_setup.bin");
	+ } else
	+ printf("Save completed\n");
	+ outwrite:
	+ fclose(io);
	+ } else {
	+ printf("failed to open sack_setup.bin for writting .. sorry\n");
	+ }
	+ } else if (strncmp(buffer, "restore", 7) == 0) {
	+ FILE *io;
	+
	+ io = fopen("sack_setup.bin", "r");
	+ if (io != NULL) {
	+ if (fread(&sf, sizeof(sf), 1, io) != 1) {
	+ printf("Failed to read out sf data\n");
	+ goto outread;
	+ }
	+ if (fread(&tp, sizeof(tp), 1, io) != 1) {
	+ printf("Failed to read out tp data\n");
	+ } else {
	+ printf("Restore completed\n");
	+ sack_filter_dump(out, &sf);
	+ }
	+ outread:
	+ fclose(io);
	+ } else {
	+ printf("can't open sack_setup.bin -- sorry no load\n");
	+ }
	+
	+ } else if (strncmp(buffer, "help", 4) == 0) {
	+help:
	fprintf(out, "You can input:\n");
	- fprintf(out, "SACK:S:E -- to define a sack block\n");
	- fprintf(out, "RXT -- to clear the filter without changing the remembered\n");
	- fprintf(out, "EXIT -- To clear the sack filter and start all fresh\n");
	- fprintf(out, "ACK:N -- To advance the cum-ack to N\n");
	- fprintf(out, "MAX:N -- To set send-max to N\n");
	- fprintf(out, "COMMIT -- To apply the sack you built to the filter and dump the filter\n");
	- fprintf(out, "DUMP -- To display the current contents of the sack filter\n");
	- fprintf(out, "QUIT -- To exit this program\n");
	+ fprintf(out, "sack:S:E -- to define a sack block\n");
	+ fprintf(out, "rxt -- to clear the filter without changing the remembered\n");
	+ fprintf(out, "save -- save current state to sack_setup.bin\n");
	+ fprintf(out, "restore -- restore state from sack_setup.bin\n");
	+ fprintf(out, "exit -- To clear the sack filter and start all fresh\n");
	+ fprintf(out, "ack:N -- To advance the cum-ack to N\n");
	+ fprintf(out, "max:N -- To set send-max to N\n");
	+ fprintf(out, "commit -- To apply the sack you built to the filter and dump the filter\n");
	+ fprintf(out, "dump -- To display the current contents of the sack filter\n");
	+ fprintf(out, "quit -- To exit this program\n");
	} else {
	fprintf(out, "Command %s unknown\n", buffer);
	+ goto help;
	}
	memset(buffer, 0, sizeof(buffer));
	}
	Index: sys/netinet/tcp_subr.c
	===================================================================
	--- sys/netinet/tcp_subr.c
	+++ sys/netinet/tcp_subr.c
	@@ -1793,6 +1793,7 @@
	*
	* NOTE: If m != NULL, then th must point to inside the mbuf.
	*/
	+
	void
	tcp_respond(struct tcpcb tp, void ipgen, struct tcphdr th, struct mbuf m,
	tcp_seq ack, tcp_seq seq, uint16_t flags)
	@@ -3320,8 +3321,19 @@
	so = inp->inp_socket;
	SOCKBUF_LOCK(&so->so_snd);
	/* If the mss is larger than the socket buffer, decrease the mss. */
	- if (so->so_snd.sb_hiwat < tp->t_maxseg)
	+ if (so->so_snd.sb_hiwat < tp->t_maxseg) {
	tp->t_maxseg = so->so_snd.sb_hiwat;
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not process incoming
	+ * SACK's since we are subject to attack in such a
	+ * case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }
	+ }
	SOCKBUF_UNLOCK(&so->so_snd);

	TCPSTAT_INC(tcps_mturesent);
	@@ -3454,8 +3466,19 @@

	opt = inp->in6p_outputopts;
	if (opt != NULL && opt->ip6po_minmtu == IP6PO_MINMTU_ALL &&
	- tp->t_maxseg > TCP6_MSS)
	+ tp->t_maxseg > TCP6_MSS) {
	tp->t_maxseg = TCP6_MSS;
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not process incoming
	+ * SACK's since we are subject to attack in such a
	+ * case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }
	+ }
	}
	}
	#endif /* INET6 */
	Index: sys/netinet/tcp_timer.c
	===================================================================
	--- sys/netinet/tcp_timer.c
	+++ sys/netinet/tcp_timer.c
	@@ -756,6 +756,16 @@
	tp->t_flags2 \|= TF2_PLPMTU_PMTUD;
	tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
	tp->t_maxseg = tp->t_pmtud_saved_maxseg;
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not
	+ * process incoming SACK's since we are
	+ * subject to attack in such a case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }
	TCPSTAT_INC(tcps_pmtud_blackhole_failed);
	/*
	* Reset the slow-start flight size as it
	Index: sys/netinet/tcp_usrreq.c
	===================================================================
	--- sys/netinet/tcp_usrreq.c
	+++ sys/netinet/tcp_usrreq.c
	@@ -2203,9 +2203,19 @@

	INP_WLOCK_RECHECK(inp);
	if (optval > 0 && optval <= tp->t_maxseg &&
	- optval + 40 >= V_tcp_minmss)
	+ optval + 40 >= V_tcp_minmss) {
	tp->t_maxseg = optval;
	- else
	+ if (tp->t_maxseg < V_tcp_mssdflt) {
	+ /*
	+ * The MSS is so small we should not process incoming
	+ * SACK's since we are subject to attack in such a
	+ * case.
	+ */
	+ tp->t_flags2 \|= TF2_PROC_SACK_PROHIBIT;
	+ } else {
	+ tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
	+ }
	+ } else
	error = EINVAL;
	goto unlock_and_done;

	Index: sys/netinet/tcp_var.h
	===================================================================
	--- sys/netinet/tcp_var.h
	+++ sys/netinet/tcp_var.h
	@@ -846,6 +846,7 @@
	#define TF2_MBUF_QUEUE_READY 0x00020000 /* Inputs can be queued */
	#define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */
	#define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */
	+#define TF2_PROC_SACK_PROHIBIT 0x00100000 /* Due to small MSS size do not process sack's */

	/*
	* Structure to hold TCP options that are only used during segment

File Metadata

Mime Type: text/plain
Expires: Wed, Dec 18, 1:46 AM (1 h, 9 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 15475269
Default Alt Text: D44903.id137896.diff (33 KB)

D44903.id137896.diffNo OneTemporaryActions

D44903.id137896.diffView Options

File Metadata

Event Timeline

D44903.id137896.diff
No OneTemporary
Actions

D44903.id137896.diff
View Options