Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_stacks/bbr.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 437 Lines • ▼ Show 20 Lines | |||||
static inline uint64_t bbr_get_bw(struct tcp_bbr *bbr); | static inline uint64_t bbr_get_bw(struct tcp_bbr *bbr); | ||||
/* | /* | ||||
* Static defintions we need for forward declarations. | * Static defintions we need for forward declarations. | ||||
*/ | */ | ||||
static uint32_t | static uint32_t | ||||
bbr_get_pacing_length(struct tcp_bbr *bbr, uint16_t gain, | bbr_get_pacing_length(struct tcp_bbr *bbr, uint16_t gain, | ||||
uint32_t useconds_time, uint64_t bw); | uint32_t useconds_time, uint64_t bw); | ||||
static uint32_t | static uint32_t | ||||
bbr_get_a_state_target(struct tcp_bbr *bbr, uint32_t gain); | bbr_get_a_state_target(struct tcp_bbr *bbr, uint32_t gain); | ||||
static void | static void | ||||
bbr_set_state(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t win); | bbr_set_state(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t win); | ||||
static void | static void | ||||
bbr_set_probebw_gains(struct tcp_bbr *bbr, uint32_t cts, uint32_t losses); | bbr_set_probebw_gains(struct tcp_bbr *bbr, uint32_t cts, uint32_t losses); | ||||
static void | static void | ||||
bbr_substate_change(struct tcp_bbr *bbr, uint32_t cts, int line, | bbr_substate_change(struct tcp_bbr *bbr, uint32_t cts, int line, | ||||
int dolog); | int dolog); | ||||
static uint32_t | static uint32_t | ||||
bbr_get_target_cwnd(struct tcp_bbr *bbr, uint64_t bw, uint32_t gain); | bbr_get_target_cwnd(struct tcp_bbr *bbr, uint64_t bw, uint32_t gain); | ||||
static void | static void | ||||
bbr_state_change(struct tcp_bbr *bbr, uint32_t cts, int32_t epoch, | bbr_state_change(struct tcp_bbr *bbr, uint32_t cts, int32_t epoch, | ||||
int32_t pkt_epoch, uint32_t losses); | int32_t pkt_epoch, uint32_t losses); | ||||
static uint32_t | static uint32_t | ||||
bbr_calc_thresh_rack(struct tcp_bbr *bbr, uint32_t srtt, uint32_t cts, struct bbr_sendmap *rsm); | bbr_calc_thresh_rack(struct tcp_bbr *bbr, uint32_t srtt, uint32_t cts, | ||||
static uint32_t bbr_initial_cwnd(struct tcp_bbr *bbr, struct tcpcb *tp); | struct bbr_sendmap *rsm); | ||||
static uint32_t | static uint32_t | ||||
bbr_initial_cwnd(struct tcp_bbr *bbr, struct tcpcb *tp); | |||||
static uint32_t | |||||
bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, | bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, | ||||
struct bbr_sendmap *rsm, uint32_t srtt, | struct bbr_sendmap *rsm, uint32_t srtt, uint32_t cts); | ||||
uint32_t cts); | |||||
static void | static void | ||||
bbr_exit_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, | bbr_exit_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, | ||||
int32_t line); | int32_t line); | ||||
static void | static void | ||||
bbr_set_state_target(struct tcp_bbr *bbr, int line); | bbr_set_state_target(struct tcp_bbr *bbr, int line); | ||||
static void | static void | ||||
bbr_enter_probe_rtt(struct tcp_bbr *bbr, uint32_t cts, int32_t line); | bbr_enter_probe_rtt(struct tcp_bbr *bbr, uint32_t cts, int32_t line); | ||||
static void | static void | ||||
bbr_log_progress_event(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t tick, int event, int line); | bbr_log_progress_event(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t tick, | ||||
int event, int line); | |||||
static void | static void | ||||
tcp_bbr_tso_size_check(struct tcp_bbr *bbr, uint32_t cts); | tcp_bbr_tso_size_check(struct tcp_bbr *bbr, uint32_t cts); | ||||
static void | static void | ||||
bbr_setup_red_bw(struct tcp_bbr *bbr, uint32_t cts); | bbr_setup_red_bw(struct tcp_bbr *bbr, uint32_t cts); | ||||
static void | static void | ||||
bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied, uint32_t rtt, | bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied, | ||||
uint32_t line, uint8_t is_start, uint16_t set); | uint32_t rtt, uint32_t line, uint8_t is_start, uint16_t set); | ||||
static struct bbr_sendmap * | static struct bbr_sendmap * | ||||
bbr_find_lowest_rsm(struct tcp_bbr *bbr); | bbr_find_lowest_rsm(struct tcp_bbr *bbr); | ||||
static __inline uint32_t | static __inline uint32_t | ||||
bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type); | bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type); | ||||
static void | static void | ||||
bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, uint8_t which); | bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, | ||||
uint8_t which); | |||||
static void | static void | ||||
bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts, uint32_t time_since_sent, uint32_t srtt, | bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts, | ||||
uint32_t time_since_sent, uint32_t srtt, | |||||
uint32_t thresh, uint32_t to); | uint32_t thresh, uint32_t to); | ||||
static void | static void | ||||
bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag); | bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag); | ||||
static void | static void | ||||
bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot, | bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot, | ||||
uint32_t del_by, uint32_t cts, uint32_t sloton, uint32_t prev_delay); | uint32_t del_by, uint32_t cts, uint32_t sloton, | ||||
uint32_t prev_delay); | |||||
static void | static void | ||||
bbr_enter_persist(struct tcpcb *tp, struct tcp_bbr *bbr, | bbr_enter_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, | ||||
uint32_t cts, int32_t line); | int32_t line); | ||||
static void | static void | ||||
bbr_stop_all_timers(struct tcpcb *tp); | bbr_stop_all_timers(struct tcpcb *tp); | ||||
static void | static void | ||||
bbr_exit_probe_rtt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts); | bbr_exit_probe_rtt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts); | ||||
static void | static void | ||||
bbr_check_probe_rtt_limits(struct tcp_bbr *bbr, uint32_t cts); | bbr_check_probe_rtt_limits(struct tcp_bbr *bbr, uint32_t cts); | ||||
static void | static void | ||||
bbr_timer_cancel(struct tcp_bbr *bbr, int32_t line, uint32_t cts); | bbr_timer_cancel(struct tcp_bbr *bbr, int32_t line, uint32_t cts); | ||||
static void | static void | ||||
bbr_log_pacing_delay_calc(struct tcp_bbr *bbr, uint16_t gain, uint32_t len, | bbr_log_pacing_delay_calc(struct tcp_bbr *bbr, uint16_t gain, uint32_t len, | ||||
uint32_t cts, uint32_t usecs, uint64_t bw, uint32_t override, int mod); | uint32_t cts, uint32_t usecs, uint64_t bw, | ||||
uint32_t override, int mod); | |||||
static inline uint8_t | static inline uint8_t | ||||
bbr_state_val(struct tcp_bbr *bbr) | bbr_state_val(struct tcp_bbr *bbr) | ||||
{ | { | ||||
return(bbr->rc_bbr_substate); | return(bbr->rc_bbr_substate); | ||||
} | } | ||||
static inline uint32_t | static inline uint32_t | ||||
▲ Show 20 Lines • Show All 1,082 Lines • ▼ Show 20 Lines | SYSCTL_ADD_S32(&bbr_sysctl_ctx, | ||||
OID_AUTO, "target_is_unit", CTLFLAG_RW, | OID_AUTO, "target_is_unit", CTLFLAG_RW, | ||||
&bbr_target_is_bbunit, 0, | &bbr_target_is_bbunit, 0, | ||||
"Is the state target the pacing_gain or BBR_UNIT?"); | "Is the state target the pacing_gain or BBR_UNIT?"); | ||||
SYSCTL_ADD_S32(&bbr_sysctl_ctx, | SYSCTL_ADD_S32(&bbr_sysctl_ctx, | ||||
SYSCTL_CHILDREN(bbr_cwnd), | SYSCTL_CHILDREN(bbr_cwnd), | ||||
OID_AUTO, "drop_limit", CTLFLAG_RW, | OID_AUTO, "drop_limit", CTLFLAG_RW, | ||||
&bbr_drop_limit, 0, | &bbr_drop_limit, 0, | ||||
"Number of segments limit for drop (0=use min_cwnd w/flight)?"); | "Number of segments limit for drop (0=use min_cwnd w/flight)?"); | ||||
/* Timeout controls */ | /* Timeout controls */ | ||||
bbr_timeout = SYSCTL_ADD_NODE(&bbr_sysctl_ctx, | bbr_timeout = SYSCTL_ADD_NODE(&bbr_sysctl_ctx, | ||||
SYSCTL_CHILDREN(bbr_sysctl_root), | SYSCTL_CHILDREN(bbr_sysctl_root), | ||||
OID_AUTO, | OID_AUTO, | ||||
"timeout", | "timeout", | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"Time out controls"); | "Time out controls"); | ||||
SYSCTL_ADD_S32(&bbr_sysctl_ctx, | SYSCTL_ADD_S32(&bbr_sysctl_ctx, | ||||
SYSCTL_CHILDREN(bbr_timeout), | SYSCTL_CHILDREN(bbr_timeout), | ||||
▲ Show 20 Lines • Show All 4,109 Lines • ▼ Show 20 Lines | tcp_bbr_tso_size_check(struct tcp_bbr *bbr, uint32_t cts) | ||||
* | * | ||||
* if ( bw <= per-tcb-cross-over) | * if ( bw <= per-tcb-cross-over) | ||||
* goal_tso = calculate how much with this bw we | * goal_tso = calculate how much with this bw we | ||||
* can send in goal-time seconds. | * can send in goal-time seconds. | ||||
* if (goal_tso > mss) | * if (goal_tso > mss) | ||||
* seg = goal_tso / mss | * seg = goal_tso / mss | ||||
* tso = seg * mss | * tso = seg * mss | ||||
* else | * else | ||||
* tso = mss | * tso = mss | ||||
* if (tso > per-tcb-max) | * if (tso > per-tcb-max) | ||||
* tso = per-tcb-max | * tso = per-tcb-max | ||||
* else if ( bw > 512Mbps) | * else if ( bw > 512Mbps) | ||||
* tso = max-tso (64k/mss) | * tso = max-tso (64k/mss) | ||||
* else | * else | ||||
* goal_tso = bw / per-tcb-divsor | * goal_tso = bw / per-tcb-divsor | ||||
* seg = (goal_tso + mss-1)/mss | * seg = (goal_tso + mss-1)/mss | ||||
* tso = seg * mss | * tso = seg * mss | ||||
▲ Show 20 Lines • Show All 1,028 Lines • ▼ Show 20 Lines | bbr_update_bbr_info(struct tcp_bbr *bbr, struct bbr_sendmap *rsm, uint32_t rtt, uint32_t cts, uint32_t tsin, | ||||
} | } | ||||
/* Only update srtt if we know by exact match */ | /* Only update srtt if we know by exact match */ | ||||
tcp_bbr_xmit_timer(bbr, rtt, rsm_send_time, rsm->r_start, tsin); | tcp_bbr_xmit_timer(bbr, rtt, rsm_send_time, rsm->r_start, tsin); | ||||
if (ack_type == BBR_CUM_ACKED) | if (ack_type == BBR_CUM_ACKED) | ||||
bbr->rc_ack_is_cumack = 1; | bbr->rc_ack_is_cumack = 1; | ||||
else | else | ||||
bbr->rc_ack_is_cumack = 0; | bbr->rc_ack_is_cumack = 0; | ||||
old_rttprop = bbr_get_rtt(bbr, BBR_RTT_PROP); | old_rttprop = bbr_get_rtt(bbr, BBR_RTT_PROP); | ||||
/* | /* | ||||
* Note the following code differs to the original | * Note the following code differs to the original | ||||
* BBR spec. It calls for <= not <. However after a | * BBR spec. It calls for <= not <. However after a | ||||
* long discussion in email with Neal, he acknowledged | * long discussion in email with Neal, he acknowledged | ||||
* that it should be < than so that we will have flows | * that it should be < than so that we will have flows | ||||
* going into probe-rtt (we were seeing cases where that | * going into probe-rtt (we were seeing cases where that | ||||
* did not happen and caused ugly things to occur). We | * did not happen and caused ugly things to occur). We | ||||
* have added this agreed upon fix to our code base. | * have added this agreed upon fix to our code base. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 1,003 Lines • ▼ Show 20 Lines | if (SEQ_GT(th->th_ack, tp->snd_max)) { | ||||
bbr->r_wanted_output = 1; | bbr->r_wanted_output = 1; | ||||
return (1); | return (1); | ||||
} | } | ||||
if (SEQ_GEQ(th->th_ack, tp->snd_una) || to->to_nsacks) { | if (SEQ_GEQ(th->th_ack, tp->snd_una) || to->to_nsacks) { | ||||
/* Process the ack */ | /* Process the ack */ | ||||
if (bbr->rc_in_persist) | if (bbr->rc_in_persist) | ||||
tp->t_rxtshift = 0; | tp->t_rxtshift = 0; | ||||
if ((th->th_ack == tp->snd_una) && (tiwin == tp->snd_wnd)) | if ((th->th_ack == tp->snd_una) && (tiwin == tp->snd_wnd)) | ||||
bbr_strike_dupack(bbr); | bbr_strike_dupack(bbr); | ||||
sack_changed = bbr_log_ack(tp, to, th, &prev_acked); | sack_changed = bbr_log_ack(tp, to, th, &prev_acked); | ||||
} | } | ||||
bbr_lt_bw_sampling(bbr, bbr->r_ctl.rc_rcvtime, (bbr->r_ctl.rc_lost > lost)); | bbr_lt_bw_sampling(bbr, bbr->r_ctl.rc_rcvtime, (bbr->r_ctl.rc_lost > lost)); | ||||
if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) { | if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) { | ||||
/* | /* | ||||
* Old ack, behind the last one rcv'd or a duplicate ack | * Old ack, behind the last one rcv'd or a duplicate ack | ||||
* with SACK info. | * with SACK info. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 3,782 Lines • ▼ Show 20 Lines | #endif | ||||
} else { | } else { | ||||
bbr->r_ctl.rc_ack_hdwr_delay = 0; | bbr->r_ctl.rc_ack_hdwr_delay = 0; | ||||
bbr->rc_ack_was_delayed = 0; | bbr->rc_ack_was_delayed = 0; | ||||
} | } | ||||
bbr_log_ack_event(bbr, th, &to, tlen, nsegs, cts, nxt_pkt, m); | bbr_log_ack_event(bbr, th, &to, tlen, nsegs, cts, nxt_pkt, m); | ||||
if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) { | if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) { | ||||
retval = 0; | retval = 0; | ||||
m_freem(m); | m_freem(m); | ||||
goto done_with_input; | goto done_with_input; | ||||
} | } | ||||
/* | /* | ||||
* If a segment with the ACK-bit set arrives in the SYN-SENT state | * If a segment with the ACK-bit set arrives in the SYN-SENT state | ||||
* check SEQ.ACK first as described on page 66 of RFC 793, section 3.9. | * check SEQ.ACK first as described on page 66 of RFC 793, section 3.9. | ||||
*/ | */ | ||||
if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && | if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && | ||||
(SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { | (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); | tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); | ||||
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
in_recovery = IN_RECOVERY(tp->t_flags); | in_recovery = IN_RECOVERY(tp->t_flags); | ||||
if (tiwin > bbr->r_ctl.rc_high_rwnd) | if (tiwin > bbr->r_ctl.rc_high_rwnd) | ||||
bbr->r_ctl.rc_high_rwnd = tiwin; | bbr->r_ctl.rc_high_rwnd = tiwin; | ||||
#ifdef BBR_INVARIANTS | #ifdef BBR_INVARIANTS | ||||
if ((tp->t_inpcb->inp_flags & INP_DROPPED) || | if ((tp->t_inpcb->inp_flags & INP_DROPPED) || | ||||
(tp->t_inpcb->inp_flags2 & INP_FREED)) { | (tp->t_inpcb->inp_flags2 & INP_FREED)) { | ||||
panic("tp:%p bbr:%p given a dropped inp:%p", | panic("tp:%p bbr:%p given a dropped inp:%p", | ||||
tp, bbr, tp->t_inpcb); | tp, bbr, tp->t_inpcb); | ||||
▲ Show 20 Lines • Show All 3,324 Lines • Show Last 20 Lines |