Index: netinet/tcp_stacks/bbr.c =================================================================== --- netinet/tcp_stacks/bbr.c +++ netinet/tcp_stacks/bbr.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016-2019 + * Copyright (c) 2016-9 * Netflix Inc. * All rights reserved. * @@ -42,6 +42,7 @@ #include "opt_ratelimit.h" #include "opt_kern_tls.h" #include +#include #include #include #ifdef TCP_HHOOK @@ -57,9 +58,9 @@ #endif #include #include +#ifdef STATS #include #include -#ifdef NETFLIX_STATS #include /* Must come after qmath.h and tree.h */ #endif #include @@ -161,9 +162,8 @@ static int32_t bbr_hardware_pacing_limit = 8000; static int32_t bbr_quanta = 3; /* How much extra quanta do we get? */ static int32_t bbr_no_retran = 0; -static int32_t bbr_tcp_map_entries_limit = 1500; -static int32_t bbr_tcp_map_split_limit = 256; + static int32_t bbr_error_base_paceout = 10000; /* usec to pace */ static int32_t bbr_max_net_error_cnt = 10; /* Should the following be dynamic too -- loss wise */ @@ -3381,8 +3381,8 @@ static struct bbr_sendmap * bbr_alloc_full_limit(struct tcp_bbr *bbr) { - if ((bbr_tcp_map_entries_limit > 0) && - (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) { + if ((V_tcp_map_entries_limit > 0) && + (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) { BBR_STAT_INC(bbr_alloc_limited); if (!bbr->alloc_limit_reported) { bbr->alloc_limit_reported = 1; @@ -3402,8 +3402,8 @@ if (limit_type) { /* currently there is only one limit type */ - if (bbr_tcp_map_split_limit > 0 && - bbr->r_ctl.rc_num_split_allocs >= bbr_tcp_map_split_limit) { + if (V_tcp_map_split_limit > 0 && + bbr->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) { BBR_STAT_INC(bbr_split_limited); if (!bbr->alloc_limit_reported) { bbr->alloc_limit_reported = 1; @@ -3685,7 +3685,7 @@ uint32_t cwnd, target_cwnd, saved_bytes, maxseg; int32_t meth; -#ifdef NETFLIX_STATS +#ifdef STATS if ((tp->t_flags & TF_GPUTINPROG) && SEQ_GEQ(th->th_ack, tp->gput_ack)) { /* @@ -6510,7 +6510,7 @@ } TCPSTAT_INC(tcps_rttupdated); tp->t_rttupdated++; -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt_ticks)); #endif /* @@ -8490,6 +8490,7 @@ return (0); } } + #endif if (DELAY_ACK(tp, bbr, nsegs) || tfo_syn) { bbr->bbr_segs_rcvd += max(1, nsegs); @@ -8698,6 +8699,7 @@ * reassembly queue and we have enough buffer space to take it. */ nsegs = max(1, m->m_pkthdr.lro_nsegs); + #ifdef NETFLIX_SB_LIMITS if (so->so_rcv.sb_shlim) { mcnt = m_memcnt(m); @@ -8746,6 +8748,7 @@ newsize, so, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; m_adj(m, drop_hdrlen); /* delayed header drop */ + #ifdef NETFLIX_SB_LIMITS appended = #endif @@ -11561,7 +11564,7 @@ * the scale is zero. */ tiwin = th->th_win << tp->snd_scale; -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin); #endif /* @@ -11960,7 +11963,7 @@ if ((tp->t_flags & TF_FORCEDATA) && len == 1) { /* Window probe */ TCPSTAT_INC(tcps_sndprobe); -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB, len); #endif @@ -11981,7 +11984,7 @@ tp->t_sndrexmitpack++; TCPSTAT_INC(tcps_sndrexmitpack); TCPSTAT_ADD(tcps_sndrexmitbyte, len); -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB, len); #endif @@ -12017,7 +12020,7 @@ /* Place in 17's the total sent */ counter_u64_add(bbr_state_resend[17], len); counter_u64_add(bbr_state_lost[17], len); -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB, len); #endif @@ -12517,8 +12520,8 @@ * as long as we are not retransmiting. */ if ((rsm == NULL) && - (bbr_tcp_map_entries_limit > 0) && - (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) { + (V_tcp_map_entries_limit > 0) && + (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) { BBR_STAT_INC(bbr_alloc_limited); if (!bbr->alloc_limit_reported) { bbr->alloc_limit_reported = 1; @@ -13256,7 +13259,6 @@ SOCKBUF_UNLOCK(&so->so_snd); return (EHOSTUNREACH); } - hdrlen += sizeof(struct udphdr); } #endif @@ -14276,7 +14278,7 @@ bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0); return (error); } -#ifdef NETFLIX_STATS +#ifdef STATS } else if (((tp->t_flags & TF_GPUTINPROG) == 0) && len && (rsm == NULL) && Index: netinet/tcp_stacks/rack.c =================================================================== --- netinet/tcp_stacks/rack.c +++ netinet/tcp_stacks/rack.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016-2019 Netflix, Inc. + * Copyright (c) 2016-9 Netflix, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -34,6 +34,7 @@ #include "opt_ratelimit.h" #include "opt_kern_tls.h" #include +#include #include #include #ifdef TCP_HHOOK @@ -52,7 +53,7 @@ #endif #include #include -#ifdef NETFLIX_STATS +#ifdef STATS #include #include #include /* Must come after qmath.h and tree.h */ @@ -187,21 +188,6 @@ static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */ static int32_t rack_hw_tls_max_seg = 0; /* 0 means use hw-tls single segment */ -/* Sack attack detection thresholds and such */ -static int32_t tcp_force_detection = 0; - -#ifdef NETFLIX_EXP_DETECTION -static int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */ -static int32_t tcp_sack_to_move_thresh = 600; /* 60 % */ -static int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */ -static int32_t tcp_attack_on_turns_on_logging = 0; -static int32_t tcp_map_minimum = 500; -#endif -static int32_t tcp_sad_decay_val = 800; -static int32_t tcp_sad_pacing_interval = 2000; -static int32_t tcp_sad_low_pps = 100; - - /* * Currently regular tcp has a rto_min of 30ms * the backoff goes 12 times so that ends up @@ -226,10 +212,7 @@ static int32_t rack_use_sack_filter = 1; static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE; static int32_t rack_per_of_gp = 50; -static int32_t rack_tcp_map_entries_limit = 1500; -static int32_t rack_tcp_map_split_limit = 256; - /* Rack specific counters */ counter_u64_t rack_badfr; counter_u64_t rack_badfr_bytes; @@ -1577,9 +1560,9 @@ static struct rack_sendmap * rack_alloc_full_limit(struct tcp_rack *rack) { - if ((rack_tcp_map_entries_limit > 0) && + if ((V_tcp_map_entries_limit > 0) && (rack->do_detection == 0) && - (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) { + (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) { counter_u64_add(rack_to_alloc_limited, 1); if (!rack->alloc_limit_reported) { rack->alloc_limit_reported = 1; @@ -1598,9 +1581,9 @@ if (limit_type) { /* currently there is only one limit type */ - if (rack_tcp_map_split_limit > 0 && + if (V_tcp_map_split_limit > 0 && (rack->do_detection == 0) && - rack->r_ctl.rc_num_split_allocs >= rack_tcp_map_split_limit) { + rack->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) { counter_u64_add(rack_split_limited, 1); if (!rack->alloc_limit_reported) { rack->alloc_limit_reported = 1; @@ -1648,7 +1631,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery) { -#ifdef NETFLIX_STATS +#ifdef STATS int32_t gput; #endif @@ -1669,7 +1652,7 @@ tp->ccv->flags &= ~CCF_CWND_LIMITED; if (type == CC_ACK) { -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF, ((int32_t) tp->snd_cwnd) - tp->snd_wnd); if ((tp->t_flags & TF_GPUTINPROG) && @@ -1723,7 +1706,7 @@ tp->ccv->curack = th->th_ack; CC_ALGO(tp)->ack_received(tp->ccv, type); } -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd); #endif if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) { @@ -2434,6 +2417,7 @@ slot = 1; } hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack); +#ifdef NETFLIX_EXP_DETECTION if (rack->sack_attack_disable && (slot < USEC_TO_MSEC(tcp_sad_pacing_interval))) { /* @@ -2448,6 +2432,7 @@ */ slot = USEC_TO_MSEC(tcp_sad_pacing_interval); } +#endif if (tp->t_flags & TF_DELACK) { delayed_ack = TICKS_2_MSEC(tcp_delacktime); rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK; @@ -3774,7 +3759,8 @@ seq_out = rack_update_entry(tp, rack, nrsm, ts, &len); if (len == 0) { return; - } + } else if (len > 0) + goto refind; } } /* @@ -3910,7 +3896,7 @@ TCPSTAT_INC(tcps_rttupdated); rack_log_rtt_upd(tp, rack, rtt, o_srtt, o_var); tp->t_rttupdated++; -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt)); #endif tp->t_rxtshift = 0; @@ -4618,6 +4604,7 @@ static void rack_do_decay(struct tcp_rack *rack) { +#ifdef NETFLIX_EXP_DETECTION struct timeval res; #define timersub(tvp, uvp, vvp) \ @@ -4668,6 +4655,7 @@ rack->r_ctl.sack_noextra_move = ctf_decay_count(rack->r_ctl.sack_noextra_move, tcp_sad_decay_val); } +#endif } static void @@ -7404,9 +7392,11 @@ rack->r_ctl.rc_last_time_decay = rack->r_ctl.rc_last_ack; rack->r_ctl.rc_tlp_rxt_last_time = tcp_ts_getticks(); /* Do we force on detection? */ +#ifdef NETFLIX_EXP_DETECTION if (tcp_force_detection) rack->do_detection = 1; else +#endif rack->do_detection = 0; if (tp->snd_una != tp->snd_max) { /* Create a send map for the current outstanding data */ @@ -7699,7 +7689,7 @@ * the scale is zero. */ tiwin = th->th_win << tp->snd_scale; -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin); #endif if (tiwin > rack->r_ctl.rc_high_rwnd) @@ -8388,8 +8378,8 @@ */ if ((rsm == NULL) && (rack->do_detection == 0) && - (rack_tcp_map_entries_limit > 0) && - (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) { + (V_tcp_map_entries_limit > 0) && + (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) { counter_u64_add(rack_to_alloc_limited, 1); if (!rack->alloc_limit_reported) { rack->alloc_limit_reported = 1; @@ -9316,7 +9306,7 @@ } if ((tp->t_flags & TF_FORCEDATA) && len == 1) { TCPSTAT_INC(tcps_sndprobe); -#ifdef NETFLIX_STATS +#ifdef STATS if (SEQ_LT(tp->snd_nxt, tp->snd_max)) stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB, len); @@ -9337,7 +9327,7 @@ TCPSTAT_INC(tcps_sndrexmitpack); TCPSTAT_ADD(tcps_sndrexmitbyte, len); } -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB, len); #endif @@ -9344,7 +9334,7 @@ } else { TCPSTAT_INC(tcps_sndpack); TCPSTAT_ADD(tcps_sndbyte, len); -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB, len); #endif @@ -9927,7 +9917,7 @@ tp->t_rtseq = startseq; TCPSTAT_INC(tcps_segstimed); } -#ifdef NETFLIX_STATS +#ifdef STATS if (!(tp->t_flags & TF_GPUTINPROG) && len) { tp->t_flags |= TF_GPUTINPROG; tp->gput_seq = startseq; @@ -10140,7 +10130,7 @@ rack = (struct tcp_rack *)tp->t_fb_ptr; switch (sopt->sopt_name) { case TCP_RACK_DO_DETECTION: - RACK_OPTS_INC(tcp_rack_no_sack); + RACK_OPTS_INC(tcp_rack_do_detection); if (optval == 0) rack->do_detection = 0; else Index: netinet/tcp_stacks/rack_bbr_common.h =================================================================== --- netinet/tcp_stacks/rack_bbr_common.h +++ netinet/tcp_stacks/rack_bbr_common.h @@ -1,5 +1,5 @@ -#ifndef __pacer_timer_h__ -#define __pacer_timer_h__ +#ifndef __rack_bbr_common_h__ +#define __rack_bbr_common_h__ /*- * Copyright (c) 2017-9 Netflix, Inc. * @@ -26,6 +26,12 @@ * * __FBSDID("$FreeBSD$"); */ + +/* XXXLAS: Couple STATS to NETFLIX_STATS until stats(3) is fully upstreamed. */ +#ifndef NETFLIX_STATS +#undef STATS +#endif + /* Common defines and such used by both RACK and BBR */ /* Special values for mss accounting array */ #define TCP_MSS_ACCT_JUSTRET 0 @@ -46,7 +52,24 @@ #define PROGRESS_CLEAR 3 #define PROGRESS_START 4 +/* codes for just-return */ +#define CTF_JR_SENT_DATA 0 +#define CTF_JR_CWND_LIMITED 1 +#define CTF_JR_RWND_LIMITED 2 +#define CTF_JR_APP_LIMITED 3 +#define CTF_JR_ASSESSING 4 +#define CTF_JR_PERSISTS 5 +#define CTF_JR_PRR 6 +/* Compat. */ +#define BBR_JR_SENT_DATA CTF_JR_SENT_DATA +#define BBR_JR_CWND_LIMITED CTF_JR_CWND_LIMITED +#define BBR_JR_RWND_LIMITED CTF_JR_RWND_LIMITED +#define BBR_JR_APP_LIMITED CTF_JR_APP_LIMITED +#define BBR_JR_ASSESSING CTF_JR_ASSESSING +#define BBR_JR_PERSISTS CTF_JR_PERSISTS +#define BBR_JR_PRR CTF_JR_PRR + /* RTT sample methods */ #define USE_RTT_HIGH 0 #define USE_RTT_LOW 1 @@ -59,6 +82,13 @@ #define USEC_TO_MSEC(x) (x / MS_IN_USEC) #define TCP_TS_OVERHEAD 12 /* Overhead of having Timestamps on */ +/* Bits per second in bytes per second */ +#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */ +#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */ +#define TWENTY_THREE_MBPS 2896000 +#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */ +#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */ + #ifdef _KERNEL /* We have only 7 bits in rack so assert its true */ CTASSERT((PACE_TMR_MASK & 0x80) == 0); Index: netinet/tcp_stacks/rack_bbr_common.c =================================================================== --- netinet/tcp_stacks/rack_bbr_common.c +++ netinet/tcp_stacks/rack_bbr_common.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016-2018 + * Copyright (c) 2016-9 * Netflix Inc. * All rights reserved. * @@ -42,6 +42,7 @@ #include "opt_ratelimit.h" #include "opt_kern_tls.h" #include +#include #include #include #ifdef TCP_HHOOK @@ -133,8 +134,6 @@ * Common TCP Functions - These are shared by borth * rack and BBR. */ - - #ifdef KERN_TLS uint32_t ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd) Index: netinet/tcp_stacks/sack_filter.c =================================================================== --- netinet/tcp_stacks/sack_filter.c +++ netinet/tcp_stacks/sack_filter.c @@ -25,11 +25,16 @@ */ #include __FBSDID("$FreeBSD$"); +#ifndef _KERNEL +#define _WANT_TCPCB 1 +#endif #include #include #include +#ifdef _KERNEL #include #include +#endif #include #include #include Index: netinet/tcp_stacks/tcp_bbr.h =================================================================== --- netinet/tcp_stacks/tcp_bbr.h +++ netinet/tcp_stacks/tcp_bbr.h @@ -128,12 +128,6 @@ * an clear to start measuring */ #define BBR_RED_BW_PE_NOEARLY_OUT 7 /* Set pkt epoch judged that we do not * get out of jail early */ -/* codes for just-return */ -#define BBR_JR_SENT_DATA 0 -#define BBR_JR_CWND_LIMITED 1 -#define BBR_JR_RWND_LIMITED 2 -#define BBR_JR_APP_LIMITED 3 -#define BBR_JR_ASSESSING 4 /* For calculating a rate */ #define BBR_CALC_BW 1 #define BBR_CALC_LOSS 2 @@ -385,13 +379,6 @@ #define BBR_BIG_LOG_SIZE 300000 -/* Bits per second in bytes per second */ -#define FORTY_EIGHT_MBPS 6000000 /* 48 megabits in bytes */ -#define THIRTY_MBPS 3750000 /* 30 megabits in bytes */ -#define TWENTY_THREE_MBPS 2896000 -#define FIVETWELVE_MBPS 64000000 /* 512 megabits in bytes */ -#define ONE_POINT_TWO_MEG 150000 /* 1.2 megabits in bytes */ - struct bbr_stats { uint64_t bbr_badfr; /* 0 */ uint64_t bbr_badfr_bytes; /* 1 */ Index: netinet/tcp_stacks/tcp_rack.h =================================================================== --- netinet/tcp_stacks/tcp_rack.h +++ netinet/tcp_stacks/tcp_rack.h @@ -137,7 +137,7 @@ uint64_t tcp_rack_min_pace_seg; uint64_t tcp_rack_min_pace; uint64_t tcp_rack_cheat; - uint64_t tcp_rack_no_sack; + uint64_t tcp_rack_do_detection; }; #define TLP_USE_ID 1 /* Internet draft behavior */ Index: netinet/tcp_subr.c =================================================================== --- netinet/tcp_subr.c +++ netinet/tcp_subr.c @@ -135,6 +135,58 @@ VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; #endif +#ifdef NETFLIX_EXP_DETECTION +/* Sack attack detection thresholds and such */ +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack, CTLFLAG_RW, 0, + "Sack Attack detection thresholds"); +int32_t tcp_force_detection = 0; +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection, + CTLFLAG_RW, + &tcp_force_detection, 0, + "Do we force detection even if the INP has it off?"); +int32_t tcp_sack_to_ack_thresh = 700; /* 70 % */ +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh, + CTLFLAG_RW, + &tcp_sack_to_ack_thresh, 700, + "Percentage of sacks to acks we must see above (10.1 percent is 101)?"); +int32_t tcp_sack_to_move_thresh = 600; /* 60 % */ +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh, + CTLFLAG_RW, + &tcp_sack_to_move_thresh, 600, + "Percentage of sack moves we must see above (10.1 percent is 101)"); +int32_t tcp_restoral_thresh = 650; /* 65 % (sack:2:ack -5%) */ +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh, + CTLFLAG_RW, + &tcp_restoral_thresh, 550, + "Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)"); +int32_t tcp_sad_decay_val = 800; +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per, + CTLFLAG_RW, + &tcp_sad_decay_val, 800, + "The decay percentage (10.1 percent equals 101 )"); +int32_t tcp_map_minimum = 500; +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps, + CTLFLAG_RW, + &tcp_map_minimum, 500, + "Number of Map enteries before we start detection"); +int32_t tcp_attack_on_turns_on_logging = 0; +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged, + CTLFLAG_RW, + &tcp_attack_on_turns_on_logging, 0, + "When we have a positive hit on attack, do we turn on logging?"); +int32_t tcp_sad_pacing_interval = 2000; +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int, + CTLFLAG_RW, + &tcp_sad_pacing_interval, 2000, + "What is the minimum pacing interval for a classified attacker?"); + +int32_t tcp_sad_low_pps = 100; +SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps, + CTLFLAG_RW, + &tcp_sad_low_pps, 100, + "What is the input pps that below which we do not decay?"); +#endif + struct rwlock tcp_function_lock; static int @@ -237,7 +289,35 @@ VNET_DEFINE(uma_zone_t, sack_hole_zone); #define V_sack_hole_zone VNET(sack_hole_zone) +VNET_DEFINE(uint32_t, tcp_map_entries_limit) = 0; /* unlimited */ +static int +sysctl_net_inet_tcp_map_limit_check(SYSCTL_HANDLER_ARGS) +{ + int error; + uint32_t new; + new = V_tcp_map_entries_limit; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + /* only allow "0" and value > minimum */ + if (new > 0 && new < TCP_MIN_MAP_ENTRIES_LIMIT) + error = EINVAL; + else + V_tcp_map_entries_limit = new; + } + return (error); +} +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, map_limit, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, + &VNET_NAME(tcp_map_entries_limit), 0, + &sysctl_net_inet_tcp_map_limit_check, "IU", + "Total sendmap entries limit"); + +VNET_DEFINE(uint32_t, tcp_map_split_limit) = 0; /* unlimited */ +SYSCTL_UINT(_net_inet_tcp, OID_AUTO, split_limit, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(tcp_map_split_limit), 0, + "Total sendmap split entries limit"); + #ifdef TCP_HHOOK VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]); #endif Index: netinet/tcp_var.h =================================================================== --- netinet/tcp_var.h +++ netinet/tcp_var.h @@ -230,6 +230,9 @@ struct tcphdr tt_t; }; +/* Minimum map entries limit value, if set */ +#define TCP_MIN_MAP_ENTRIES_LIMIT 128 + /* * TODO: We yet need to brave plowing in * to tcp_input() and the pru_usrreq() block. @@ -778,6 +781,8 @@ VNET_DECLARE(int, tcp_initcwnd_segments); VNET_DECLARE(int, tcp_insecure_rst); VNET_DECLARE(int, tcp_insecure_syn); +VNET_DECLARE(uint32_t, tcp_map_entries_limit); +VNET_DECLARE(uint32_t, tcp_map_split_limit); VNET_DECLARE(int, tcp_minmss); VNET_DECLARE(int, tcp_mssdflt); VNET_DECLARE(int, tcp_recvspace); @@ -813,6 +818,8 @@ #define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) #define V_tcp_insecure_rst VNET(tcp_insecure_rst) #define V_tcp_insecure_syn VNET(tcp_insecure_syn) +#define V_tcp_map_entries_limit VNET(tcp_map_entries_limit) +#define V_tcp_map_split_limit VNET(tcp_map_split_limit) #define V_tcp_minmss VNET(tcp_minmss) #define V_tcp_mssdflt VNET(tcp_mssdflt) #define V_tcp_recvspace VNET(tcp_recvspace) @@ -824,7 +831,6 @@ #define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead) #define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port) - #ifdef TCP_HHOOK VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]); #define V_tcp_hhh VNET(tcp_hhh) @@ -894,6 +900,19 @@ extern counter_u64_t tcp_inp_lro_locks_taken; extern counter_u64_t tcp_inp_lro_sack_wake; +#ifdef NETFLIX_EXP_DETECTION +/* Various SACK attack thresholds */ +extern int32_t tcp_force_detection; +extern int32_t tcp_sack_to_ack_thresh; +extern int32_t tcp_sack_to_move_thresh; +extern int32_t tcp_restoral_thresh; +extern int32_t tcp_sad_decay_val; +extern int32_t tcp_sad_pacing_interval; +extern int32_t tcp_sad_low_pps; +extern int32_t tcp_map_minimum; +extern int32_t tcp_attack_on_turns_on_logging; +#endif + uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *); uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *); u_int tcp_maxseg(const struct tcpcb *);