Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -229,6 +229,12 @@ TCPHPTS opt_inet.h TCP_REQUEST_TRK opt_global.h TCP_ACCOUNTING opt_inet.h +# +# TCP SaD Detection is an experimental Sack attack Detection (SaD) +# algorithm that uses "normal" behaviour with SACK's to detect +# a possible attack. It is strictly experimental at this point. +# +TCP_SAD_DETECTION opt_inet.h TURNSTILE_PROFILING UMTX_PROFILING UMTX_CHAINS opt_global.h Index: sys/netinet/tcp.h =================================================================== --- sys/netinet/tcp.h +++ sys/netinet/tcp.h @@ -424,6 +424,9 @@ u_int32_t __tcpi_received_e0_bytes; u_int32_t __tcpi_received_ce_bytes; + u_int32_t tcpi_total_tlp; /* tail loss probes sent */ + u_int64_t tcpi_total_tlp_bytes; /* tail loss probe bytes sent */ + /* Padding to grow without breaking ABI. */ u_int32_t __tcpi_pad[19]; /* Padding. */ }; Index: sys/netinet/tcp_log_buf.h =================================================================== --- sys/netinet/tcp_log_buf.h +++ sys/netinet/tcp_log_buf.h @@ -255,7 +255,7 @@ TCP_LOG_CONNEND, /* End of connection 54 */ TCP_LOG_LRO, /* LRO entry 55 */ TCP_SACK_FILTER_RES, /* Results of SACK Filter 56 */ - TCP_SAD_DETECTION, /* Sack Attack Detection 57 */ + TCP_SAD_DETECT, /* Sack Attack Detection 57 */ TCP_TIMELY_WORK, /* Logs regarding Timely CC tweaks 58 */ TCP_LOG_USER_EVENT, /* User space event data 59 */ TCP_LOG_SENDFILE, /* sendfile() logging for TCP connections 60 */ Index: sys/netinet/tcp_stacks/bbr.c =================================================================== --- sys/netinet/tcp_stacks/bbr.c +++ sys/netinet/tcp_stacks/bbr.c @@ -2991,13 +2991,6 @@ bw = bbr->r_ctl.red_bw; else bw = get_filter_value(&bbr->r_ctl.rc_delrate); - if (bbr->rc_tp->t_peakrate_thr && (bbr->rc_use_google == 0)) { - /* - * Enforce user set rate limit, keep in mind that - * t_peakrate_thr is in B/s already - */ - bw = uqmin((uint64_t)bbr->rc_tp->t_peakrate_thr, bw); - } if (bw == 0) { /* We should not be at 0, go to the initial window then */ goto use_initial_window; @@ -10071,9 +10064,6 @@ bbr->r_ctl.rc_initial_hptsi_bw = bbr_initial_bw_bps; if (bbr_resends_use_tso) bbr->rc_resends_use_tso = 1; -#ifdef NETFLIX_PEAKRATE - tp->t_peakrate_thr = tp->t_maxpeakrate; -#endif if (tp->snd_una != tp->snd_max) { /* Create a send map for the current outstanding data */ struct bbr_sendmap *rsm; @@ -11668,20 +11658,10 @@ return (len); } -static inline void -bbr_do_error_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap *rsm, int32_t len, int32_t error) -{ -#ifdef NETFLIX_STATS - KMOD_TCPSTAT_INC(tcps_sndpack_error); - KMOD_TCPSTAT_ADD(tcps_sndbyte_error, len); -#endif -} - static inline void bbr_do_send_accounting(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap *rsm, int32_t len, int32_t error) { if (error) { - bbr_do_error_accounting(tp, bbr, rsm, len, error); return; } if (rsm) { @@ -11690,10 +11670,8 @@ * TLP should not count in retran count, but in its * own bin */ -#ifdef NETFLIX_STATS KMOD_TCPSTAT_INC(tcps_tlpresends); KMOD_TCPSTAT_ADD(tcps_tlpresend_bytes, len); -#endif } else { /* Retransmit */ tp->t_sndrexmitpack++; @@ -14206,9 +14184,6 @@ case TCP_BBR_PACE_SEG_MIN: case TCP_BBR_PACE_CROSS: case TCP_BBR_PACE_OH: -#ifdef NETFLIX_PEAKRATE - case TCP_MAXPEAKRATE: -#endif case TCP_BBR_TMR_PACE_OH: case TCP_BBR_RACK_RTT_USE: case TCP_BBR_RETRAN_WTSO: @@ -14474,14 +14449,7 @@ BBR_OPTS_INC(tcp_rack_pkt_delay); bbr->r_ctl.rc_pkt_delay = optval; break; -#ifdef NETFLIX_PEAKRATE - case TCP_MAXPEAKRATE: - BBR_OPTS_INC(tcp_maxpeak); - error = tcp_set_maxpeakrate(tp, optval); - if (!error) - tp->t_peakrate_thr = tp->t_maxpeakrate; - break; -#endif + case TCP_BBR_RETRAN_WTSO: BBR_OPTS_INC(tcp_retran_wtso); if (optval) @@ -14553,9 +14521,7 @@ return (tcp_default_ctloutput(inp, sopt)); break; } -#ifdef NETFLIX_STATS tcp_log_socket_option(tp, sopt->sopt_name, optval, error); -#endif INP_WUNLOCK(inp); return (error); } Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -746,18 +746,6 @@ } } -#ifdef NETFLIX_PEAKRATE -static inline void -rack_update_peakrate_thr(struct tcpcb *tp) -{ - /* Keep in mind that t_maxpeakrate is in B/s. */ - uint64_t peak; - peak = uqmax((tp->t_maxseg * 2), - (((uint64_t)tp->t_maxpeakrate * (uint64_t)(tp->t_srtt)) / (uint64_t)HPTS_USEC_IN_SEC)); - tp->t_peakrate_thr = (uint32_t)uqmin(peak, UINT32_MAX); -} -#endif - static int sysctl_rack_clear(SYSCTL_HANDLER_ARGS) { @@ -2346,15 +2334,6 @@ return (rack_get_fixed_pacing_bw(rack)); } bw = rack_get_gp_est(rack); -#ifdef NETFLIX_PEAKRATE - if ((rack->rc_tp->t_maxpeakrate) && - (bw > rack->rc_tp->t_maxpeakrate)) { - /* The user has set a peak rate to pace at - * don't allow us to pace faster than that. - */ - return (rack->rc_tp->t_maxpeakrate); - } -#endif return (bw); } @@ -3187,7 +3166,7 @@ } } -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION static void rack_log_sad(struct tcp_rack *rack, int event) { @@ -3215,7 +3194,7 @@ TCP_LOG_EVENTP(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, - TCP_SAD_DETECTION, 0, + TCP_SAD_DETECT, 0, 0, &log, false, &tv); } } @@ -3358,7 +3337,7 @@ counter_u64_add(rack_alloc_limited_conns, 1); } return (NULL); -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION } else if ((tcp_sad_limit != 0) && (rack->do_detection == 1) && (rack->r_ctl.rc_num_split_allocs >= tcp_sad_limit)) { @@ -5274,18 +5253,6 @@ rack_enough_for_measurement(tp, rack, th_ack, &quality)) { /* Measure the Goodput */ rack_do_goodput_measurement(tp, rack, th_ack, __LINE__, quality); -#ifdef NETFLIX_PEAKRATE - if ((type == CC_ACK) && - (tp->t_maxpeakrate)) { - /* - * We update t_peakrate_thr. This gives us roughly - * one update per round trip time. Note - * it will only be used if pace_always is off i.e - * we don't do this for paced flows. - */ - rack_update_peakrate_thr(tp); - } -#endif } /* Which way our we limited, if not cwnd limited no advance in CA */ if (tp->snd_cwnd <= tp->snd_wnd) @@ -5366,14 +5333,6 @@ if (rack->r_ctl.rc_rack_largest_cwnd < rack->r_ctl.cwnd_to_use) { rack->r_ctl.rc_rack_largest_cwnd = rack->r_ctl.cwnd_to_use; } -#ifdef NETFLIX_PEAKRATE - /* we enforce max peak rate if it is set and we are not pacing */ - if ((rack->rc_always_pace == 0) && - tp->t_peakrate_thr && - (tp->snd_cwnd > tp->t_peakrate_thr)) { - tp->snd_cwnd = tp->t_peakrate_thr; - } -#endif } static void @@ -5926,11 +5885,6 @@ INP_WLOCK_ASSERT(tptoinpcb(tp)); -#ifdef NETFLIX_STATS - KMOD_TCPSTAT_INC(tcps_idle_restarts); - if (tp->t_state == TCPS_ESTABLISHED) - KMOD_TCPSTAT_INC(tcps_idle_estrestarts); -#endif if (CC_ALGO(tp)->after_idle != NULL) CC_ALGO(tp)->after_idle(&tp->t_ccv); @@ -6744,7 +6698,7 @@ } } hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack); -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION if (rack->sack_attack_disable && (rack->r_ctl.ack_during_sd > 0) && (slot < tcp_sad_pacing_interval)) { @@ -7662,7 +7616,7 @@ rack_log_to_prr(rack, 6, 0, __LINE__); rack->r_timer_override = 1; if ((((tp->t_flags & TF_SACK_PERMIT) == 0) -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION || (rack->sack_attack_disable != 0) #endif ) && ((tp->t_flags & TF_SENTFIN) == 0)) { @@ -9343,7 +9297,7 @@ int insret __diagused; int32_t used_ref = 1; int moved = 0; -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION int allow_segsiz; int first_time_through = 1; #endif @@ -9353,7 +9307,8 @@ start = sack->start; end = sack->end; rsm = *prsm; -#ifdef NETFLIX_EXP_DETECTION + +#ifdef TCP_SAD_DETECTION /* * There are a strange number of proxys and meddle boxes in the world * that seem to cut up segments on different boundaries. This gets us @@ -9384,7 +9339,7 @@ /* TSNH */ goto out; } -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION /* Now we must check for suspicous activity */ if ((first_time_through == 1) && ((end - start) < min((rsm->r_end - rsm->r_start), allow_segsiz)) && @@ -10252,7 +10207,7 @@ * Current default is 800 so it decays * 80% every second. */ -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION uint32_t pkt_delta; pkt_delta = rack->r_ctl.input_pkt - rack->r_ctl.saved_input_pkt; @@ -10261,7 +10216,7 @@ rack->r_ctl.saved_input_pkt = rack->r_ctl.input_pkt; rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time; /* Now do we escape without decay? */ -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION if (rack->rc_in_persist || (rack->rc_tp->snd_max == rack->rc_tp->snd_una) || (pkt_delta < tcp_sad_low_pps)){ @@ -10706,7 +10661,7 @@ } } -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION static void rack_merge_out_sacks(struct tcp_rack *rack) @@ -11384,7 +11339,7 @@ counter_u64_add(rack_move_some, 1); } out: -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION rack_do_detection(tp, rack, BYTES_THIS_ACK(tp, th), ctf_fixed_maxseg(rack->rc_tp)); #endif if (changed) { @@ -14275,9 +14230,6 @@ } } else if (rack->rc_always_pace) { if (rack->r_ctl.gp_bw || -#ifdef NETFLIX_PEAKRATE - rack->rc_tp->t_maxpeakrate || -#endif rack->r_ctl.init_rate) { /* We have a rate of some sort set */ uint32_t orig; @@ -15034,7 +14986,7 @@ rack->rack_hdw_pace_ena = 1; if (rack_hw_rate_caps) rack->r_rack_hw_rate_caps = 1; -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION rack->do_detection = 1; #else rack->do_detection = 0; @@ -15604,7 +15556,7 @@ uint32_t orig_snd_una; uint8_t xx = 0; -#ifdef NETFLIX_HTTP_LOGGING +#ifdef TCP_REQUEST_TRK struct http_sendfile_track *http_req; if (SEQ_GT(ae->ack, tp->snd_una)) { @@ -15651,7 +15603,7 @@ log.u_bbr.timeStamp = tcp_get_usecs(<v); /* Log the rcv time */ log.u_bbr.delRate = ae->timestamp; -#ifdef NETFLIX_HTTP_LOGGING +#ifdef TCP_REQUEST_TRK log.u_bbr.applimited = tp->t_http_closed; log.u_bbr.applimited <<= 8; log.u_bbr.applimited |= tp->t_http_open; @@ -16163,7 +16115,7 @@ } if (acked > sbavail(&so->so_snd)) acked_amount = sbavail(&so->so_snd); -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION /* * We only care on a cum-ack move if we are in a sack-disabled * state. We have already added in to the ack_count, and we never @@ -16641,7 +16593,7 @@ if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval ltv; -#ifdef NETFLIX_HTTP_LOGGING +#ifdef TCP_REQUEST_TRK struct http_sendfile_track *http_req; if (SEQ_GT(th->th_ack, tp->snd_una)) { @@ -16687,7 +16639,7 @@ log.u_bbr.timeStamp = tcp_get_usecs(<v); /* Log the rcv time */ log.u_bbr.delRate = m->m_pkthdr.rcv_tstmp; -#ifdef NETFLIX_HTTP_LOGGING +#ifdef TCP_REQUEST_TRK log.u_bbr.applimited = tp->t_http_closed; log.u_bbr.applimited <<= 8; log.u_bbr.applimited |= tp->t_http_open; @@ -17474,9 +17426,6 @@ if (rack->use_fixed_rate) { rate_wanted = bw_est = rack_get_fixed_pacing_bw(rack); } else if ((rack->r_ctl.init_rate == 0) && -#ifdef NETFLIX_PEAKRATE - (rack->rc_tp->t_maxpeakrate == 0) && -#endif (rack->r_ctl.gp_bw == 0)) { /* no way to yet do an estimate */ bw_est = rate_wanted = 0; @@ -17717,9 +17666,6 @@ done_w_hdwr: if (rack_limit_time_with_srtt && (rack->use_fixed_rate == 0) && -#ifdef NETFLIX_PEAKRATE - (rack->rc_tp->t_maxpeakrate == 0) && -#endif (rack->rack_hdrw_pacing == 0)) { /* * Sanity check, we do not allow the pacing delay @@ -23043,9 +22989,6 @@ snt = 0; if ((snt < win) && (tp->t_srtt | -#ifdef NETFLIX_PEAKRATE - tp->t_maxpeakrate | -#endif rack->r_ctl.init_rate)) { /* * We are not past the initial window @@ -23324,9 +23267,7 @@ default: break; } -#ifdef NETFLIX_STATS tcp_log_socket_option(tp, sopt_name, optval, error); -#endif return (error); } @@ -23668,9 +23609,9 @@ ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; ti->tcpi_rcv_ooopack = tp->t_rcvoopack; ti->tcpi_snd_zerowin = tp->t_sndzerowin; -#ifdef NETFLIX_STATS ti->tcpi_total_tlp = tp->t_sndtlppack; ti->tcpi_total_tlp_bytes = tp->t_sndtlpbyte; +#ifdef NETFLIX_STATS memcpy(&ti->tcpi_rxsyninfo, &tp->t_rxsyninfo, sizeof(struct tcpsyninfo)); #endif #ifdef TCP_OFFLOAD Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -143,7 +143,7 @@ VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; #endif -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION /* Sack attack detection thresholds and such */ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, @@ -154,11 +154,6 @@ &tcp_force_detection, 0, "Do we force detection even if the INP has it off?"); int32_t tcp_sad_limit = 10000; -SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, limit, - CTLFLAG_RW, - &tcp_sad_limit, 10000, - "If SaD is enabled, what is the limit to sendmap entries (0 = unlimited)?"); -int32_t tcp_sad_limit = 10000; SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, limit, CTLFLAG_RW, &tcp_sad_limit, 10000, @@ -4579,3 +4574,22 @@ (void)tcp_http_alloc_req_full(tp, &user->http_req, ts, 1); } #endif + +void +tcp_log_socket_option(struct tcpcb *tp, uint32_t option_num, uint32_t option_val, int err) +{ + if (tcp_bblogging_on(tp)) { + struct tcp_log_buffer *l; + + l = tcp_log_event(tp, NULL, + &tptosocket(tp)->so_rcv, + &tptosocket(tp)->so_snd, + TCP_LOG_SOCKET_OPT, + err, 0, NULL, 1, + NULL, NULL, 0, NULL); + if (l) { + l->tlb_flex1 = option_num; + l->tlb_flex2 = option_val; + } + } +} Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -1710,7 +1710,6 @@ * Ensure the new stack takes ownership with a * clean slate on peak rate threshold. */ - tp->t_peakrate_thr = 0; #ifdef TCPHPTS /* Assure that we are not on any hpts */ tcp_hpts_remove(tptoinpcb(tp)); Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -332,7 +332,6 @@ tcp_seq snd_up; /* send urgent pointer */ uint32_t snd_wnd; /* send window */ uint32_t snd_cwnd; /* congestion-controlled window */ - uint32_t t_peakrate_thr; /* pre-calculated peak rate threshold */ uint32_t ts_offset; /* our timestamp offset */ uint32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ int rcv_numsacks; /* # distinct sack blks present */ @@ -1086,6 +1085,16 @@ uint64_t tcps_ecn_sndect0; /* ECN Capable Transport */ uint64_t tcps_ecn_sndect1; /* ECN Capable Transport */ + /* + * BBR and Rack implement TLP's these values count TLP bytes in + * two catagories, bytes that were retransmitted and bytes that + * were newly transmited. Both types can serve as TLP's but they + * are accounted differently. + */ + uint64_t tcps_tlpresends; /* number of tlp resends */ + uint64_t tcps_tlpresend_bytes; /* number of bytes resent by tlp */ + + uint64_t _pad[4]; /* 4 TBD placeholder for STABLE */ }; @@ -1390,6 +1399,9 @@ find_and_ref_tcp_fb(struct tcp_function_block *fs); int tcp_default_ctloutput(struct inpcb *inp, struct sockopt *sopt); int tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt); +void tcp_log_socket_option(struct tcpcb *tp, uint32_t option_num, + uint32_t option_val, int err); + extern counter_u64_t tcp_inp_lro_direct_queue; extern counter_u64_t tcp_inp_lro_wokeup_queue; @@ -1401,7 +1413,7 @@ extern counter_u64_t tcp_uncomp_total; extern counter_u64_t tcp_bad_csums; -#ifdef NETFLIX_EXP_DETECTION +#ifdef TCP_SAD_DETECTION /* Various SACK attack thresholds */ extern int32_t tcp_force_detection; extern int32_t tcp_sad_limit;