Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_stacks/rack.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 1,063 Lines • ▼ Show 20 Lines | SYSCTL_ADD_PROC(&rack_sysctl_ctx, | ||||
&rack_clear_counter, 0, sysctl_rack_clear, "IU", "Clear counters"); | &rack_clear_counter, 0, sysctl_rack_clear, "IU", "Clear counters"); | ||||
} | } | ||||
static __inline int | static __inline int | ||||
rb_map_cmp(struct rack_sendmap *b, struct rack_sendmap *a) | rb_map_cmp(struct rack_sendmap *b, struct rack_sendmap *a) | ||||
{ | { | ||||
if (SEQ_GEQ(b->r_start, a->r_start) && | if (SEQ_GEQ(b->r_start, a->r_start) && | ||||
SEQ_LT(b->r_start, a->r_end)) { | SEQ_LT(b->r_start, a->r_end)) { | ||||
/* | /* | ||||
* The entry b is within the | * The entry b is within the | ||||
* block a. i.e.: | * block a. i.e.: | ||||
* a -- |-------------| | * a -- |-------------| | ||||
* b -- |----| | * b -- |----| | ||||
* <or> | * <or> | ||||
* b -- |------| | * b -- |------| | ||||
* <or> | * <or> | ||||
* b -- |-----------| | * b -- |-----------| | ||||
*/ | */ | ||||
return (0); | return (0); | ||||
} else if (SEQ_GEQ(b->r_start, a->r_end)) { | } else if (SEQ_GEQ(b->r_start, a->r_end)) { | ||||
/* | /* | ||||
* b falls as either the next | * b falls as either the next | ||||
* sequence block after a so a | * sequence block after a so a | ||||
* is said to be smaller than b. | * is said to be smaller than b. | ||||
* i.e: | * i.e: | ||||
* a -- |------| | * a -- |------| | ||||
* b -- |--------| | * b -- |--------| | ||||
* or | * or | ||||
* b -- |-----| | * b -- |-----| | ||||
*/ | */ | ||||
return (1); | return (1); | ||||
} | } | ||||
/* | /* | ||||
* Whats left is where a is | * Whats left is where a is | ||||
* larger than b. i.e: | * larger than b. i.e: | ||||
* a -- |-------| | * a -- |-------| | ||||
* b -- |---| | * b -- |---| | ||||
▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines | if (tp->t_logstate != TCP_LOG_STATE_OFF) { | ||||
memset(&log.u_bbr, 0, sizeof(log.u_bbr)); | memset(&log.u_bbr, 0, sizeof(log.u_bbr)); | ||||
log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; | log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; | ||||
log.u_bbr.ininput = rack->rc_inp->inp_in_input; | log.u_bbr.ininput = rack->rc_inp->inp_in_input; | ||||
log.u_bbr.flex1 = t; | log.u_bbr.flex1 = t; | ||||
log.u_bbr.flex2 = o_srtt; | log.u_bbr.flex2 = o_srtt; | ||||
log.u_bbr.flex3 = o_var; | log.u_bbr.flex3 = o_var; | ||||
log.u_bbr.flex4 = rack->r_ctl.rack_rs.rs_rtt_lowest; | log.u_bbr.flex4 = rack->r_ctl.rack_rs.rs_rtt_lowest; | ||||
log.u_bbr.flex5 = rack->r_ctl.rack_rs.rs_rtt_highest; | log.u_bbr.flex5 = rack->r_ctl.rack_rs.rs_rtt_highest; | ||||
log.u_bbr.flex6 = rack->r_ctl.rack_rs.rs_rtt_cnt; | log.u_bbr.flex6 = rack->r_ctl.rack_rs.rs_rtt_cnt; | ||||
log.u_bbr.rttProp = rack->r_ctl.rack_rs.rs_rtt_tot; | log.u_bbr.rttProp = rack->r_ctl.rack_rs.rs_rtt_tot; | ||||
log.u_bbr.flex8 = rack->r_ctl.rc_rate_sample_method; | log.u_bbr.flex8 = rack->r_ctl.rc_rate_sample_method; | ||||
log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt; | log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt; | ||||
log.u_bbr.timeStamp = tcp_get_usecs(&tv); | log.u_bbr.timeStamp = tcp_get_usecs(&tv); | ||||
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); | log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); | ||||
TCP_LOG_EVENTP(tp, NULL, | TCP_LOG_EVENTP(tp, NULL, | ||||
&rack->rc_inp->inp_socket->so_rcv, | &rack->rc_inp->inp_socket->so_rcv, | ||||
&rack->rc_inp->inp_socket->so_snd, | &rack->rc_inp->inp_socket->so_snd, | ||||
BBR_LOG_BBRRTT, 0, | BBR_LOG_BBRRTT, 0, | ||||
0, &log, false, &tv); | 0, &log, false, &tv); | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
rack_log_rtt_sample(struct tcp_rack *rack, uint32_t rtt) | rack_log_rtt_sample(struct tcp_rack *rack, uint32_t rtt) | ||||
{ | { | ||||
/* | /* | ||||
* Log the rtt sample we are | * Log the rtt sample we are | ||||
* applying to the srtt algorithm in | * applying to the srtt algorithm in | ||||
* useconds. | * useconds. | ||||
*/ | */ | ||||
if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { | if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { | ||||
union tcp_log_stackspecific log; | union tcp_log_stackspecific log; | ||||
struct timeval tv; | struct timeval tv; | ||||
▲ Show 20 Lines • Show All 612 Lines • ▼ Show 20 Lines | #ifdef NETFLIX_STATS | ||||
if (tp->t_state == TCPS_ESTABLISHED) | if (tp->t_state == TCPS_ESTABLISHED) | ||||
TCPSTAT_INC(tcps_idle_estrestarts); | TCPSTAT_INC(tcps_idle_estrestarts); | ||||
#endif | #endif | ||||
if (CC_ALGO(tp)->after_idle != NULL) | if (CC_ALGO(tp)->after_idle != NULL) | ||||
CC_ALGO(tp)->after_idle(tp->ccv); | CC_ALGO(tp)->after_idle(tp->ccv); | ||||
if (tp->snd_cwnd == 1) | if (tp->snd_cwnd == 1) | ||||
i_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ | i_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ | ||||
else | else | ||||
i_cwnd = tcp_compute_initwnd(tcp_maxseg(tp)); | i_cwnd = tcp_compute_initwnd(tcp_maxseg(tp)); | ||||
/* | /* | ||||
* Being idle is no differnt than the initial window. If the cc | * Being idle is no differnt than the initial window. If the cc | ||||
* clamps it down below the initial window raise it to the initial | * clamps it down below the initial window raise it to the initial | ||||
* window. | * window. | ||||
*/ | */ | ||||
if (tp->snd_cwnd < i_cwnd) { | if (tp->snd_cwnd < i_cwnd) { | ||||
▲ Show 20 Lines • Show All 329 Lines • ▼ Show 20 Lines | rack_timer_start(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int sup_rack) | ||||
rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); | rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); | ||||
if ((rsm == NULL) || sup_rack) { | if ((rsm == NULL) || sup_rack) { | ||||
/* Nothing on the send map */ | /* Nothing on the send map */ | ||||
activate_rxt: | activate_rxt: | ||||
time_since_sent = 0; | time_since_sent = 0; | ||||
rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); | rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); | ||||
if (rsm) { | if (rsm) { | ||||
idx = rsm->r_rtr_cnt - 1; | idx = rsm->r_rtr_cnt - 1; | ||||
if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], rack->r_ctl.rc_tlp_rxt_last_time)) | if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], rack->r_ctl.rc_tlp_rxt_last_time)) | ||||
tstmp_touse = rsm->r_tim_lastsent[idx]; | tstmp_touse = rsm->r_tim_lastsent[idx]; | ||||
else | else | ||||
tstmp_touse = rack->r_ctl.rc_tlp_rxt_last_time; | tstmp_touse = rack->r_ctl.rc_tlp_rxt_last_time; | ||||
if (TSTMP_GT(tstmp_touse, cts)) | if (TSTMP_GT(tstmp_touse, cts)) | ||||
time_since_sent = cts - tstmp_touse; | time_since_sent = cts - tstmp_touse; | ||||
} | } | ||||
if (SEQ_LT(tp->snd_una, tp->snd_max) || sbavail(&(tp->t_inpcb->inp_socket->so_snd))) { | if (SEQ_LT(tp->snd_una, tp->snd_max) || sbavail(&(tp->t_inpcb->inp_socket->so_snd))) { | ||||
rack->r_ctl.rc_hpts_flags |= PACE_TMR_RXT; | rack->r_ctl.rc_hpts_flags |= PACE_TMR_RXT; | ||||
to = TICKS_2_MSEC(tp->t_rxtcur); | to = TICKS_2_MSEC(tp->t_rxtcur); | ||||
if (to > time_since_sent) | if (to > time_since_sent) | ||||
Show All 32 Lines | if ((tp->t_flags & TF_SENTFIN) && | ||||
* We don't start a rack timer if all we have is a | * We don't start a rack timer if all we have is a | ||||
* FIN outstanding. | * FIN outstanding. | ||||
*/ | */ | ||||
goto activate_rxt; | goto activate_rxt; | ||||
} | } | ||||
if ((rack->use_rack_cheat == 0) && | if ((rack->use_rack_cheat == 0) && | ||||
(IN_RECOVERY(tp->t_flags)) && | (IN_RECOVERY(tp->t_flags)) && | ||||
(rack->r_ctl.rc_prr_sndcnt < ctf_fixed_maxseg(tp))) { | (rack->r_ctl.rc_prr_sndcnt < ctf_fixed_maxseg(tp))) { | ||||
/* | /* | ||||
* We are not cheating, in recovery and | * We are not cheating, in recovery and | ||||
* not enough ack's to yet get our next | * not enough ack's to yet get our next | ||||
* retransmission out. | * retransmission out. | ||||
* | * | ||||
* Note that classified attackers do not | * Note that classified attackers do not | ||||
* get to use the rack-cheat. | * get to use the rack-cheat. | ||||
*/ | */ | ||||
goto activate_tlp; | goto activate_tlp; | ||||
Show All 28 Lines | activate_tlp: | ||||
} | } | ||||
if (rsm->r_flags & RACK_HAS_FIN) { | if (rsm->r_flags & RACK_HAS_FIN) { | ||||
/* If its a FIN we dont do TLP */ | /* If its a FIN we dont do TLP */ | ||||
rsm = NULL; | rsm = NULL; | ||||
goto activate_rxt; | goto activate_rxt; | ||||
} | } | ||||
idx = rsm->r_rtr_cnt - 1; | idx = rsm->r_rtr_cnt - 1; | ||||
time_since_sent = 0; | time_since_sent = 0; | ||||
if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], rack->r_ctl.rc_tlp_rxt_last_time)) | if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], rack->r_ctl.rc_tlp_rxt_last_time)) | ||||
tstmp_touse = rsm->r_tim_lastsent[idx]; | tstmp_touse = rsm->r_tim_lastsent[idx]; | ||||
else | else | ||||
tstmp_touse = rack->r_ctl.rc_tlp_rxt_last_time; | tstmp_touse = rack->r_ctl.rc_tlp_rxt_last_time; | ||||
if (TSTMP_GT(tstmp_touse, cts)) | if (TSTMP_GT(tstmp_touse, cts)) | ||||
time_since_sent = cts - tstmp_touse; | time_since_sent = cts - tstmp_touse; | ||||
is_tlp_timer = 1; | is_tlp_timer = 1; | ||||
if (tp->t_srtt) { | if (tp->t_srtt) { | ||||
srtt_cur = (tp->t_srtt >> TCP_RTT_SHIFT); | srtt_cur = (tp->t_srtt >> TCP_RTT_SHIFT); | ||||
srtt = TICKS_2_MSEC(srtt_cur); | srtt = TICKS_2_MSEC(srtt_cur); | ||||
} else | } else | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | rack_exit_persist(struct tcpcb *tp, struct tcp_rack *rack) | ||||
} | } | ||||
rack->rc_in_persist = 0; | rack->rc_in_persist = 0; | ||||
rack->r_ctl.rc_went_idle_time = 0; | rack->r_ctl.rc_went_idle_time = 0; | ||||
tp->t_flags &= ~TF_FORCEDATA; | tp->t_flags &= ~TF_FORCEDATA; | ||||
tp->t_rxtshift = 0; | tp->t_rxtshift = 0; | ||||
} | } | ||||
static void | static void | ||||
rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts, | rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts, | ||||
int32_t slot, uint32_t tot_len_this_send, int sup_rack) | int32_t slot, uint32_t tot_len_this_send, int sup_rack) | ||||
{ | { | ||||
struct inpcb *inp; | struct inpcb *inp; | ||||
uint32_t delayed_ack = 0; | uint32_t delayed_ack = 0; | ||||
uint32_t hpts_timeout; | uint32_t hpts_timeout; | ||||
uint8_t stopped; | uint8_t stopped; | ||||
uint32_t left = 0; | uint32_t left = 0; | ||||
Show All 9 Lines | rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts, | ||||
stopped = rack->rc_tmr_stopped; | stopped = rack->rc_tmr_stopped; | ||||
if (stopped && TSTMP_GT(rack->r_ctl.rc_timer_exp, cts)) { | if (stopped && TSTMP_GT(rack->r_ctl.rc_timer_exp, cts)) { | ||||
left = rack->r_ctl.rc_timer_exp - cts; | left = rack->r_ctl.rc_timer_exp - cts; | ||||
} | } | ||||
rack->tlp_timer_up = 0; | rack->tlp_timer_up = 0; | ||||
rack->r_ctl.rc_timer_exp = 0; | rack->r_ctl.rc_timer_exp = 0; | ||||
if (rack->rc_inp->inp_in_hpts == 0) { | if (rack->rc_inp->inp_in_hpts == 0) { | ||||
rack->r_ctl.rc_hpts_flags = 0; | rack->r_ctl.rc_hpts_flags = 0; | ||||
} | } | ||||
if (slot) { | if (slot) { | ||||
/* We are hptsi too */ | /* We are hptsi too */ | ||||
rack->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT; | rack->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT; | ||||
} else if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) { | } else if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) { | ||||
/* | /* | ||||
* We are still left on the hpts when the to goes | * We are still left on the hpts when the to goes | ||||
* it will be for output. | * it will be for output. | ||||
*/ | */ | ||||
if (TSTMP_GT(rack->r_ctl.rc_last_output_to, cts)) | if (TSTMP_GT(rack->r_ctl.rc_last_output_to, cts)) | ||||
slot = rack->r_ctl.rc_last_output_to - cts; | slot = rack->r_ctl.rc_last_output_to - cts; | ||||
else | else | ||||
slot = 1; | slot = 1; | ||||
} | } | ||||
hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack); | hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack); | ||||
if (rack->sack_attack_disable && | if (rack->sack_attack_disable && | ||||
(slot < USEC_TO_MSEC(tcp_sad_pacing_interval))) { | (slot < USEC_TO_MSEC(tcp_sad_pacing_interval))) { | ||||
/* | /* | ||||
* We have a potential attacker on | * We have a potential attacker on | ||||
* the line. We have possibly some | * the line. We have possibly some | ||||
* (or now) pacing time set. We want to | * (or now) pacing time set. We want to | ||||
* slow down the processing of sacks by some | * slow down the processing of sacks by some | ||||
* amount (if it is an attacker). Set the default | * amount (if it is an attacker). Set the default | ||||
* slot for attackers in place (unless the orginal | * slot for attackers in place (unless the orginal | ||||
* interval is longer). Its stored in | * interval is longer). Its stored in | ||||
* micro-seconds, so lets convert to msecs. | * micro-seconds, so lets convert to msecs. | ||||
*/ | */ | ||||
slot = USEC_TO_MSEC(tcp_sad_pacing_interval); | slot = USEC_TO_MSEC(tcp_sad_pacing_interval); | ||||
} | } | ||||
if (tp->t_flags & TF_DELACK) { | if (tp->t_flags & TF_DELACK) { | ||||
delayed_ack = TICKS_2_MSEC(tcp_delacktime); | delayed_ack = TICKS_2_MSEC(tcp_delacktime); | ||||
rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK; | rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK; | ||||
} | } | ||||
if (delayed_ack && ((hpts_timeout == 0) || | if (delayed_ack && ((hpts_timeout == 0) || | ||||
(delayed_ack < hpts_timeout))) | (delayed_ack < hpts_timeout))) | ||||
hpts_timeout = delayed_ack; | hpts_timeout = delayed_ack; | ||||
else | else | ||||
rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_DELACK; | rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_DELACK; | ||||
/* | /* | ||||
* If no timers are going to run and we will fall off the hptsi | * If no timers are going to run and we will fall off the hptsi | ||||
* wheel, we resort to a keep-alive timer if its configured. | * wheel, we resort to a keep-alive timer if its configured. | ||||
*/ | */ | ||||
if ((hpts_timeout == 0) && | if ((hpts_timeout == 0) && | ||||
(slot == 0)) { | (slot == 0)) { | ||||
if ((tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && | if ((tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && | ||||
Show All 33 Lines | if (hpts_timeout) { | ||||
* :). | * :). | ||||
*/ | */ | ||||
if (hpts_timeout > 0x7ffffffe) | if (hpts_timeout > 0x7ffffffe) | ||||
hpts_timeout = 0x7ffffffe; | hpts_timeout = 0x7ffffffe; | ||||
rack->r_ctl.rc_timer_exp = cts + hpts_timeout; | rack->r_ctl.rc_timer_exp = cts + hpts_timeout; | ||||
} | } | ||||
if (slot) { | if (slot) { | ||||
rack->rc_inp->inp_flags2 |= INP_MBUF_QUEUE_READY; | rack->rc_inp->inp_flags2 |= INP_MBUF_QUEUE_READY; | ||||
if (rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK) | if (rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK) | ||||
inp->inp_flags2 |= INP_DONT_SACK_QUEUE; | inp->inp_flags2 |= INP_DONT_SACK_QUEUE; | ||||
else | else | ||||
inp->inp_flags2 &= ~INP_DONT_SACK_QUEUE; | inp->inp_flags2 &= ~INP_DONT_SACK_QUEUE; | ||||
rack->r_ctl.rc_last_output_to = cts + slot; | rack->r_ctl.rc_last_output_to = cts + slot; | ||||
if ((hpts_timeout == 0) || (hpts_timeout > slot)) { | if ((hpts_timeout == 0) || (hpts_timeout > slot)) { | ||||
if (rack->rc_inp->inp_in_hpts == 0) | if (rack->rc_inp->inp_in_hpts == 0) | ||||
tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(slot)); | tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(slot)); | ||||
rack_log_to_start(rack, cts, hpts_timeout, slot, 1); | rack_log_to_start(rack, cts, hpts_timeout, slot, 1); | ||||
} else { | } else { | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines | rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, | ||||
} | } | ||||
} | } | ||||
static struct rack_sendmap * | static struct rack_sendmap * | ||||
rack_merge_rsm(struct tcp_rack *rack, | rack_merge_rsm(struct tcp_rack *rack, | ||||
struct rack_sendmap *l_rsm, | struct rack_sendmap *l_rsm, | ||||
struct rack_sendmap *r_rsm) | struct rack_sendmap *r_rsm) | ||||
{ | { | ||||
/* | /* | ||||
* We are merging two ack'd RSM's, | * We are merging two ack'd RSM's, | ||||
* the l_rsm is on the left (lower seq | * the l_rsm is on the left (lower seq | ||||
* values) and the r_rsm is on the right | * values) and the r_rsm is on the right | ||||
* (higher seq value). The simplest way | * (higher seq value). The simplest way | ||||
* to merge these is to move the right | * to merge these is to move the right | ||||
* one into the left. I don't think there | * one into the left. I don't think there | ||||
* is any reason we need to try to find | * is any reason we need to try to find | ||||
* the oldest (or last oldest retransmitted). | * the oldest (or last oldest retransmitted). | ||||
▲ Show 20 Lines • Show All 143 Lines • ▼ Show 20 Lines | if (collapsed_win == 0) { | ||||
if (rsm == NULL) { | if (rsm == NULL) { | ||||
counter_u64_add(rack_tlp_does_nada, 1); | counter_u64_add(rack_tlp_does_nada, 1); | ||||
#ifdef TCP_BLACKBOX | #ifdef TCP_BLACKBOX | ||||
tcp_log_dump_tp_logbuf(tp, "nada counter trips", M_NOWAIT, true); | tcp_log_dump_tp_logbuf(tp, "nada counter trips", M_NOWAIT, true); | ||||
#endif | #endif | ||||
goto out; | goto out; | ||||
} | } | ||||
} else { | } else { | ||||
/* | /* | ||||
* We must find the last segment | * We must find the last segment | ||||
* that was acceptable by the client. | * that was acceptable by the client. | ||||
*/ | */ | ||||
RB_FOREACH_REVERSE(rsm, rack_rb_tree_head, &rack->r_ctl.rc_mtree) { | RB_FOREACH_REVERSE(rsm, rack_rb_tree_head, &rack->r_ctl.rc_mtree) { | ||||
if ((rsm->r_flags & RACK_RWND_COLLAPSED) == 0) { | if ((rsm->r_flags & RACK_RWND_COLLAPSED) == 0) { | ||||
/* Found one */ | /* Found one */ | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,209 Lines • ▼ Show 20 Lines | if ((rsm->r_flags & RACK_TLP) && | ||||
rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh; | rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh; | ||||
rack_cong_signal(tp, NULL, CC_NDUPACK); | rack_cong_signal(tp, NULL, CC_NDUPACK); | ||||
/* | /* | ||||
* When we enter recovery we need to assure | * When we enter recovery we need to assure | ||||
* we send one packet. | * we send one packet. | ||||
*/ | */ | ||||
rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp); | rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp); | ||||
rack_log_to_prr(rack, 7); | rack_log_to_prr(rack, 7); | ||||
} | } | ||||
} | } | ||||
if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) { | if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) { | ||||
/* New more recent rack_tmit_time */ | /* New more recent rack_tmit_time */ | ||||
rack->r_ctl.rc_rack_tmit_time = rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]; | rack->r_ctl.rc_rack_tmit_time = rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]; | ||||
rack->rc_rack_rtt = t; | rack->rc_rack_rtt = t; | ||||
} | } | ||||
return (1); | return (1); | ||||
} | } | ||||
/* | /* | ||||
* We clear the soft/rxtshift since we got an ack. | * We clear the soft/rxtshift since we got an ack. | ||||
* There is no assurance we will call the commit() function | * There is no assurance we will call the commit() function | ||||
* so we need to clear these to avoid incorrect handling. | * so we need to clear these to avoid incorrect handling. | ||||
*/ | */ | ||||
tp->t_rxtshift = 0; | tp->t_rxtshift = 0; | ||||
tp->t_softerror = 0; | tp->t_softerror = 0; | ||||
if ((to->to_flags & TOF_TS) && | if ((to->to_flags & TOF_TS) && | ||||
(ack_type == CUM_ACKED) && | (ack_type == CUM_ACKED) && | ||||
(to->to_tsecr) && | (to->to_tsecr) && | ||||
Show All 19 Lines | for (i = 0; i < rsm->r_rtr_cnt; i++) { | ||||
rack->r_ctl.rc_rack_min_rtt = 1; | rack->r_ctl.rc_rack_min_rtt = 1; | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Note the following calls to | * Note the following calls to | ||||
* tcp_rack_xmit_timer() are being commented | * tcp_rack_xmit_timer() are being commented | ||||
* out for now. They give us no more accuracy | * out for now. They give us no more accuracy | ||||
* and often lead to a wrong choice. We have | * and often lead to a wrong choice. We have | ||||
* enough samples that have not been | * enough samples that have not been | ||||
* retransmitted. I leave the commented out | * retransmitted. I leave the commented out | ||||
* code in here in case in the future we | * code in here in case in the future we | ||||
* decide to add it back (though I can't forsee | * decide to add it back (though I can't forsee | ||||
* doing that). That way we will easily see | * doing that). That way we will easily see | ||||
* where they need to be placed. | * where they need to be placed. | ||||
*/ | */ | ||||
if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, | if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, | ||||
rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) { | rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) { | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | rack_log_sack_passed(struct tcpcb *tp, | ||||
nrsm = rsm; | nrsm = rsm; | ||||
TAILQ_FOREACH_REVERSE_FROM(nrsm, &rack->r_ctl.rc_tmap, | TAILQ_FOREACH_REVERSE_FROM(nrsm, &rack->r_ctl.rc_tmap, | ||||
rack_head, r_tnext) { | rack_head, r_tnext) { | ||||
if (nrsm == rsm) { | if (nrsm == rsm) { | ||||
/* Skip orginal segment he is acked */ | /* Skip orginal segment he is acked */ | ||||
continue; | continue; | ||||
} | } | ||||
if (nrsm->r_flags & RACK_ACKED) { | if (nrsm->r_flags & RACK_ACKED) { | ||||
/* | /* | ||||
* Skip ack'd segments, though we | * Skip ack'd segments, though we | ||||
* should not see these, since tmap | * should not see these, since tmap | ||||
* should not have ack'd segments. | * should not have ack'd segments. | ||||
*/ | */ | ||||
continue; | continue; | ||||
} | } | ||||
if (nrsm->r_flags & RACK_SACK_PASSED) { | if (nrsm->r_flags & RACK_SACK_PASSED) { | ||||
/* | /* | ||||
* We found one that is already marked | * We found one that is already marked | ||||
* passed, we have been here before and | * passed, we have been here before and | ||||
* so all others below this are marked. | * so all others below this are marked. | ||||
*/ | */ | ||||
break; | break; | ||||
} | } | ||||
nrsm->r_flags |= RACK_SACK_PASSED; | nrsm->r_flags |= RACK_SACK_PASSED; | ||||
nrsm->r_flags &= ~RACK_WAS_SACKPASS; | nrsm->r_flags &= ~RACK_WAS_SACKPASS; | ||||
Show All 14 Lines | rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack, | ||||
end = sack->end; | end = sack->end; | ||||
rsm = *prsm; | rsm = *prsm; | ||||
memset(&fe, 0, sizeof(fe)); | memset(&fe, 0, sizeof(fe)); | ||||
do_rest_ofb: | do_rest_ofb: | ||||
if ((rsm == NULL) || | if ((rsm == NULL) || | ||||
(SEQ_LT(end, rsm->r_start)) || | (SEQ_LT(end, rsm->r_start)) || | ||||
(SEQ_GEQ(start, rsm->r_end)) || | (SEQ_GEQ(start, rsm->r_end)) || | ||||
(SEQ_LT(start, rsm->r_start))) { | (SEQ_LT(start, rsm->r_start))) { | ||||
/* | /* | ||||
* We are not in the right spot, | * We are not in the right spot, | ||||
* find the correct spot in the tree. | * find the correct spot in the tree. | ||||
*/ | */ | ||||
used_ref = 0; | used_ref = 0; | ||||
fe.r_start = start; | fe.r_start = start; | ||||
rsm = RB_FIND(rack_rb_tree_head, &rack->r_ctl.rc_mtree, &fe); | rsm = RB_FIND(rack_rb_tree_head, &rack->r_ctl.rc_mtree, &fe); | ||||
moved++; | moved++; | ||||
} | } | ||||
Show All 11 Lines | if ((rsm->r_flags & RACK_ACKED) == 0) { | ||||
* rsm |--------------| | * rsm |--------------| | ||||
* sackblk |-------> | * sackblk |-------> | ||||
* rsm will become | * rsm will become | ||||
* rsm |---| | * rsm |---| | ||||
* and nrsm will be the sacked piece | * and nrsm will be the sacked piece | ||||
* nrsm |----------| | * nrsm |----------| | ||||
* | * | ||||
* But before we start down that path lets | * But before we start down that path lets | ||||
* see if the sack spans over on top of | * see if the sack spans over on top of | ||||
* the next guy and it is already sacked. | * the next guy and it is already sacked. | ||||
*/ | */ | ||||
next = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | next = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | ||||
if (next && (next->r_flags & RACK_ACKED) && | if (next && (next->r_flags & RACK_ACKED) && | ||||
SEQ_GEQ(end, next->r_start)) { | SEQ_GEQ(end, next->r_start)) { | ||||
/** | /** | ||||
* So the next one is already acked, and | * So the next one is already acked, and | ||||
* we can thus by hookery use our stack_map | * we can thus by hookery use our stack_map | ||||
Show All 24 Lines | if ((rsm->r_flags & RACK_ACKED) == 0) { | ||||
/* Now lets update all the stats and such */ | /* Now lets update all the stats and such */ | ||||
rack_update_rtt(tp, rack, nrsm, to, cts, SACKED); | rack_update_rtt(tp, rack, nrsm, to, cts, SACKED); | ||||
changed += (nrsm->r_end - nrsm->r_start); | changed += (nrsm->r_end - nrsm->r_start); | ||||
rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start); | rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start); | ||||
if (nrsm->r_flags & RACK_SACK_PASSED) { | if (nrsm->r_flags & RACK_SACK_PASSED) { | ||||
counter_u64_add(rack_reorder_seen, 1); | counter_u64_add(rack_reorder_seen, 1); | ||||
rack->r_ctl.rc_reorder_ts = cts; | rack->r_ctl.rc_reorder_ts = cts; | ||||
} | } | ||||
/* | /* | ||||
* Now we want to go up from rsm (the | * Now we want to go up from rsm (the | ||||
* one left un-acked) to the next one | * one left un-acked) to the next one | ||||
* in the tmap. We do this so when | * in the tmap. We do this so when | ||||
* we walk backwards we include marking | * we walk backwards we include marking | ||||
* sack-passed on rsm (The one passed in | * sack-passed on rsm (The one passed in | ||||
* is skipped since it is generally called | * is skipped since it is generally called | ||||
* on something sacked before removing it | * on something sacked before removing it | ||||
* from the tmap). | * from the tmap). | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | #endif | ||||
counter_u64_add(rack_sack_skipped_acked, 1); | counter_u64_add(rack_sack_skipped_acked, 1); | ||||
moved++; | moved++; | ||||
if (end == rsm->r_end) { | if (end == rsm->r_end) { | ||||
/* Done with block */ | /* Done with block */ | ||||
rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | ||||
goto out; | goto out; | ||||
} else if (SEQ_LT(end, rsm->r_end)) { | } else if (SEQ_LT(end, rsm->r_end)) { | ||||
/* A partial sack to a already sacked block */ | /* A partial sack to a already sacked block */ | ||||
moved++; | moved++; | ||||
rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | ||||
goto out; | goto out; | ||||
} else { | } else { | ||||
/* | /* | ||||
* The end goes beyond this guy | * The end goes beyond this guy | ||||
* repostion the start to the | * repostion the start to the | ||||
* next block. | * next block. | ||||
*/ | */ | ||||
start = rsm->r_end; | start = rsm->r_end; | ||||
rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | ||||
if (rsm == NULL) | if (rsm == NULL) | ||||
goto out; | goto out; | ||||
} | } | ||||
Show All 31 Lines | if (SEQ_GEQ(end, rsm->r_end)) { | ||||
} else { | } else { | ||||
counter_u64_add(rack_sack_skipped_acked, 1); | counter_u64_add(rack_sack_skipped_acked, 1); | ||||
moved++; | moved++; | ||||
} | } | ||||
if (end == rsm->r_end) { | if (end == rsm->r_end) { | ||||
/* This block only - done, setup for next */ | /* This block only - done, setup for next */ | ||||
goto out; | goto out; | ||||
} | } | ||||
/* | /* | ||||
* There is more not coverend by this rsm move on | * There is more not coverend by this rsm move on | ||||
* to the next block in the RB tree. | * to the next block in the RB tree. | ||||
*/ | */ | ||||
nrsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | nrsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | ||||
start = rsm->r_end; | start = rsm->r_end; | ||||
rsm = nrsm; | rsm = nrsm; | ||||
if (rsm == NULL) | if (rsm == NULL) | ||||
goto out; | goto out; | ||||
goto do_rest_ofb; | goto do_rest_ofb; | ||||
Show All 20 Lines | if (prev && (prev->r_flags & RACK_ACKED)) { | ||||
* prev |----------| (acked) | * prev |----------| (acked) | ||||
* rsm |-----| (non-acked) | * rsm |-----| (non-acked) | ||||
* nrsm |-| (temporary) | * nrsm |-| (temporary) | ||||
*/ | */ | ||||
nrsm = &stack_map; | nrsm = &stack_map; | ||||
memcpy(nrsm, rsm, sizeof(struct rack_sendmap)); | memcpy(nrsm, rsm, sizeof(struct rack_sendmap)); | ||||
prev->r_end = end; | prev->r_end = end; | ||||
rsm->r_start = end; | rsm->r_start = end; | ||||
/* Now adjust nrsm (stack copy) to be | /* | ||||
* Now adjust nrsm (stack copy) to be | |||||
* the one that is the small | * the one that is the small | ||||
* piece that was "sacked". | * piece that was "sacked". | ||||
*/ | */ | ||||
nrsm->r_end = end; | nrsm->r_end = end; | ||||
rsm->r_dupack = 0; | rsm->r_dupack = 0; | ||||
rack_log_retran_reason(rack, rsm, __LINE__, 0, 2); | rack_log_retran_reason(rack, rsm, __LINE__, 0, 2); | ||||
/* | /* | ||||
* Now nrsm is our new little piece | * Now nrsm is our new little piece | ||||
* that is acked (which was merged | * that is acked (which was merged | ||||
* to prev). Update the rtt and changed | * to prev). Update the rtt and changed | ||||
* based on that. Also check for reordering. | * based on that. Also check for reordering. | ||||
*/ | */ | ||||
rack_update_rtt(tp, rack, nrsm, to, cts, SACKED); | rack_update_rtt(tp, rack, nrsm, to, cts, SACKED); | ||||
changed += (nrsm->r_end - nrsm->r_start); | changed += (nrsm->r_end - nrsm->r_start); | ||||
rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start); | rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start); | ||||
Show All 10 Lines | if (prev && (prev->r_flags & RACK_ACKED)) { | ||||
* split the block in two. | * split the block in two. | ||||
*/ | */ | ||||
nrsm = rack_alloc_limit(rack, RACK_LIMIT_TYPE_SPLIT); | nrsm = rack_alloc_limit(rack, RACK_LIMIT_TYPE_SPLIT); | ||||
if (nrsm == NULL) { | if (nrsm == NULL) { | ||||
/* failed rrs what can we do but loose the sack info? */ | /* failed rrs what can we do but loose the sack info? */ | ||||
goto out; | goto out; | ||||
} | } | ||||
/** | /** | ||||
* In this case nrsm becomes | * In this case nrsm becomes | ||||
* nrsm->r_start = end; | * nrsm->r_start = end; | ||||
* nrsm->r_end = rsm->r_end; | * nrsm->r_end = rsm->r_end; | ||||
* which is un-acked. | * which is un-acked. | ||||
* <and> | * <and> | ||||
* rsm->r_end = nrsm->r_start; | * rsm->r_end = nrsm->r_start; | ||||
* i.e. the remaining un-acked | * i.e. the remaining un-acked | ||||
* piece is left on the left | * piece is left on the left | ||||
* hand side. | * hand side. | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | #endif | ||||
/* | /* | ||||
* The block was already acked. | * The block was already acked. | ||||
*/ | */ | ||||
counter_u64_add(rack_sack_skipped_acked, 1); | counter_u64_add(rack_sack_skipped_acked, 1); | ||||
moved++; | moved++; | ||||
} | } | ||||
out: | out: | ||||
if (rsm && (rsm->r_flags & RACK_ACKED)) { | if (rsm && (rsm->r_flags & RACK_ACKED)) { | ||||
/* | /* | ||||
* Now can we merge where we worked | * Now can we merge where we worked | ||||
* with either the previous or | * with either the previous or | ||||
* next block? | * next block? | ||||
*/ | */ | ||||
next = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | next = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | ||||
while (next) { | while (next) { | ||||
if (next->r_flags & RACK_ACKED) { | if (next->r_flags & RACK_ACKED) { | ||||
/* yep this and next can be merged */ | /* yep this and next can be merged */ | ||||
rsm = rack_merge_rsm(rack, rsm, next); | rsm = rack_merge_rsm(rack, rsm, next); | ||||
Show All 13 Lines | if (rsm && (rsm->r_flags & RACK_ACKED)) { | ||||
} | } | ||||
} | } | ||||
if (used_ref == 0) { | if (used_ref == 0) { | ||||
counter_u64_add(rack_sack_proc_all, 1); | counter_u64_add(rack_sack_proc_all, 1); | ||||
} else { | } else { | ||||
counter_u64_add(rack_sack_proc_short, 1); | counter_u64_add(rack_sack_proc_short, 1); | ||||
} | } | ||||
/* Save off the next one for quick reference. */ | /* Save off the next one for quick reference. */ | ||||
if (rsm) | if (rsm) | ||||
nrsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | nrsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | ||||
else | else | ||||
nrsm = NULL; | nrsm = NULL; | ||||
*prsm = rack->r_ctl.rc_sacklast = nrsm; | *prsm = rack->r_ctl.rc_sacklast = nrsm; | ||||
/* Pass back the moved. */ | /* Pass back the moved. */ | ||||
*moved_two = moved; | *moved_two = moved; | ||||
return (changed); | return (changed); | ||||
} | } | ||||
static void inline | static void inline | ||||
rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ack) | rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ack) | ||||
{ | { | ||||
struct rack_sendmap *tmap; | struct rack_sendmap *tmap; | ||||
tmap = NULL; | tmap = NULL; | ||||
while (rsm && (rsm->r_flags & RACK_ACKED)) { | while (rsm && (rsm->r_flags & RACK_ACKED)) { | ||||
/* Its no longer sacked, mark it so */ | /* Its no longer sacked, mark it so */ | ||||
rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start); | rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start); | ||||
Show All 10 Lines | if (tmap == NULL) { | ||||
tmap = rsm; | tmap = rsm; | ||||
} else { | } else { | ||||
TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, tmap, rsm, r_tnext); | TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, tmap, rsm, r_tnext); | ||||
tmap = rsm; | tmap = rsm; | ||||
} | } | ||||
tmap->r_in_tmap = 1; | tmap->r_in_tmap = 1; | ||||
rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | rsm = RB_NEXT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, rsm); | ||||
} | } | ||||
/* | /* | ||||
* Now lets possibly clear the sack filter so we start | * Now lets possibly clear the sack filter so we start | ||||
* recognizing sacks that cover this area. | * recognizing sacks that cover this area. | ||||
*/ | */ | ||||
if (rack_use_sack_filter) | if (rack_use_sack_filter) | ||||
sack_filter_clear(&rack->r_ctl.rack_sf, th_ack); | sack_filter_clear(&rack->r_ctl.rack_sf, th_ack); | ||||
} | } | ||||
static void | static void | ||||
rack_do_decay(struct tcp_rack *rack) | rack_do_decay(struct tcp_rack *rack) | ||||
{ | { | ||||
struct timeval res; | struct timeval res; | ||||
#define timersub(tvp, uvp, vvp) \ | #define timersub(tvp, uvp, vvp) \ | ||||
do { \ | do { \ | ||||
(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ | (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ | ||||
(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ | (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ | ||||
if ((vvp)->tv_usec < 0) { \ | if ((vvp)->tv_usec < 0) { \ | ||||
(vvp)->tv_sec--; \ | (vvp)->tv_sec--; \ | ||||
(vvp)->tv_usec += 1000000; \ | (vvp)->tv_usec += 1000000; \ | ||||
} \ | } \ | ||||
} while (0) | } while (0) | ||||
timersub(&rack->r_ctl.rc_last_ack, &rack->r_ctl.rc_last_time_decay, &res); | timersub(&rack->r_ctl.rc_last_ack, &rack->r_ctl.rc_last_time_decay, &res); | ||||
#undef timersub | #undef timersub | ||||
rack->r_ctl.input_pkt++; | rack->r_ctl.input_pkt++; | ||||
if ((rack->rc_in_persist) || | if ((rack->rc_in_persist) || | ||||
(res.tv_sec >= 1) || | (res.tv_sec >= 1) || | ||||
(rack->rc_tp->snd_max == rack->rc_tp->snd_una)) { | (rack->rc_tp->snd_max == rack->rc_tp->snd_una)) { | ||||
/* | /* | ||||
* Check for decay of non-SAD, | * Check for decay of non-SAD, | ||||
* we want all SAD detection metrics to | * we want all SAD detection metrics to | ||||
* decay 1/4 per second (or more) passed. | * decay 1/4 per second (or more) passed. | ||||
*/ | */ | ||||
uint32_t pkt_delta; | uint32_t pkt_delta; | ||||
pkt_delta = rack->r_ctl.input_pkt - rack->r_ctl.saved_input_pkt; | pkt_delta = rack->r_ctl.input_pkt - rack->r_ctl.saved_input_pkt; | ||||
/* Update our saved tracking values */ | /* Update our saved tracking values */ | ||||
rack->r_ctl.saved_input_pkt = rack->r_ctl.input_pkt; | rack->r_ctl.saved_input_pkt = rack->r_ctl.input_pkt; | ||||
rack->r_ctl.rc_last_time_decay = rack->r_ctl.rc_last_ack; | rack->r_ctl.rc_last_time_decay = rack->r_ctl.rc_last_ack; | ||||
/* Now do we escape without decay? */ | /* Now do we escape without decay? */ | ||||
if (rack->rc_in_persist || | if (rack->rc_in_persist || | ||||
(rack->rc_tp->snd_max == rack->rc_tp->snd_una) || | (rack->rc_tp->snd_max == rack->rc_tp->snd_una) || | ||||
(pkt_delta < tcp_sad_low_pps)){ | (pkt_delta < tcp_sad_low_pps)){ | ||||
/* | /* | ||||
* We don't decay idle connections | * We don't decay idle connections | ||||
* or ones that have a low input pps. | * or ones that have a low input pps. | ||||
*/ | */ | ||||
return; | return; | ||||
} | } | ||||
/* Decay the counters */ | /* Decay the counters */ | ||||
rack->r_ctl.ack_count = ctf_decay_count(rack->r_ctl.ack_count, | rack->r_ctl.ack_count = ctf_decay_count(rack->r_ctl.ack_count, | ||||
tcp_sad_decay_val); | tcp_sad_decay_val); | ||||
rack->r_ctl.sack_count = ctf_decay_count(rack->r_ctl.sack_count, | rack->r_ctl.sack_count = ctf_decay_count(rack->r_ctl.sack_count, | ||||
Show All 25 Lines | rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th) | ||||
rack = (struct tcp_rack *)tp->t_fb_ptr; | rack = (struct tcp_rack *)tp->t_fb_ptr; | ||||
cts = tcp_ts_getticks(); | cts = tcp_ts_getticks(); | ||||
rsm = RB_MIN(rack_rb_tree_head, &rack->r_ctl.rc_mtree); | rsm = RB_MIN(rack_rb_tree_head, &rack->r_ctl.rc_mtree); | ||||
changed = 0; | changed = 0; | ||||
th_ack = th->th_ack; | th_ack = th->th_ack; | ||||
if (rack->sack_attack_disable == 0) | if (rack->sack_attack_disable == 0) | ||||
rack_do_decay(rack); | rack_do_decay(rack); | ||||
if (BYTES_THIS_ACK(tp, th) >= ctf_fixed_maxseg(rack->rc_tp)) { | if (BYTES_THIS_ACK(tp, th) >= ctf_fixed_maxseg(rack->rc_tp)) { | ||||
/* | /* | ||||
* You only get credit for | * You only get credit for | ||||
* MSS and greater (and you get extra | * MSS and greater (and you get extra | ||||
* credit for larger cum-ack moves). | * credit for larger cum-ack moves). | ||||
*/ | */ | ||||
int ac; | int ac; | ||||
ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp); | ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp); | ||||
rack->r_ctl.ack_count += ac; | rack->r_ctl.ack_count += ac; | ||||
counter_u64_add(rack_ack_total, ac); | counter_u64_add(rack_ack_total, ac); | ||||
} | } | ||||
if (rack->r_ctl.ack_count > 0xfff00000) { | if (rack->r_ctl.ack_count > 0xfff00000) { | ||||
/* | /* | ||||
* reduce the number to keep us under | * reduce the number to keep us under | ||||
* a uint32_t. | * a uint32_t. | ||||
*/ | */ | ||||
rack->r_ctl.ack_count /= 2; | rack->r_ctl.ack_count /= 2; | ||||
rack->r_ctl.sack_count /= 2; | rack->r_ctl.sack_count /= 2; | ||||
} | } | ||||
if (SEQ_GT(th_ack, tp->snd_una)) { | if (SEQ_GT(th_ack, tp->snd_una)) { | ||||
rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__); | rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__); | ||||
tp->t_acktime = ticks; | tp->t_acktime = ticks; | ||||
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
if (rsm->r_flags & RACK_ACKED) { | if (rsm->r_flags & RACK_ACKED) { | ||||
/* | /* | ||||
* It was acked on the scoreboard -- remove it from | * It was acked on the scoreboard -- remove it from | ||||
* total for the part being cum-acked. | * total for the part being cum-acked. | ||||
*/ | */ | ||||
rack->r_ctl.rc_sacked -= (th_ack - rsm->r_start); | rack->r_ctl.rc_sacked -= (th_ack - rsm->r_start); | ||||
} | } | ||||
/* | /* | ||||
* Clear the dup ack count for | * Clear the dup ack count for | ||||
* the piece that remains. | * the piece that remains. | ||||
*/ | */ | ||||
rsm->r_dupack = 0; | rsm->r_dupack = 0; | ||||
rack_log_retran_reason(rack, rsm, __LINE__, 0, 2); | rack_log_retran_reason(rack, rsm, __LINE__, 0, 2); | ||||
if (rsm->r_rtr_bytes) { | if (rsm->r_rtr_bytes) { | ||||
/* | /* | ||||
* It was retransmitted adjust the | * It was retransmitted adjust the | ||||
* sack holes for what was acked. | * sack holes for what was acked. | ||||
*/ | */ | ||||
int ack_am; | int ack_am; | ||||
ack_am = (th_ack - rsm->r_start); | ack_am = (th_ack - rsm->r_start); | ||||
if (ack_am >= rsm->r_rtr_bytes) { | if (ack_am >= rsm->r_rtr_bytes) { | ||||
rack->r_ctl.rc_holes_rxt -= ack_am; | rack->r_ctl.rc_holes_rxt -= ack_am; | ||||
rsm->r_rtr_bytes -= ack_am; | rsm->r_rtr_bytes -= ack_am; | ||||
} | } | ||||
} | } | ||||
/* Update where the piece starts */ | /* Update where the piece starts */ | ||||
rsm->r_start = th_ack; | rsm->r_start = th_ack; | ||||
} | } | ||||
proc_sack: | proc_sack: | ||||
/* Check for reneging */ | /* Check for reneging */ | ||||
rsm = RB_MIN(rack_rb_tree_head, &rack->r_ctl.rc_mtree); | rsm = RB_MIN(rack_rb_tree_head, &rack->r_ctl.rc_mtree); | ||||
if (rsm && (rsm->r_flags & RACK_ACKED) && (th_ack == rsm->r_start)) { | if (rsm && (rsm->r_flags & RACK_ACKED) && (th_ack == rsm->r_start)) { | ||||
/* | /* | ||||
* The peer has moved snd_una up to | * The peer has moved snd_una up to | ||||
* the edge of this send, i.e. one | * the edge of this send, i.e. one | ||||
* that it had previously acked. The only | * that it had previously acked. The only | ||||
* way that can be true if the peer threw | * way that can be true if the peer threw | ||||
* away data (space issues) that it had | * away data (space issues) that it had | ||||
* previously sacked (else it would have | * previously sacked (else it would have | ||||
* given us snd_una up to (rsm->r_end). | * given us snd_una up to (rsm->r_end). | ||||
* We need to undo the acked markings here. | * We need to undo the acked markings here. | ||||
* | * | ||||
* Note we have to look to make sure th_ack is | * Note we have to look to make sure th_ack is | ||||
* our rsm->r_start in case we get an old ack | * our rsm->r_start in case we get an old ack | ||||
* where th_ack is behind snd_una. | * where th_ack is behind snd_una. | ||||
*/ | */ | ||||
rack_peer_reneges(rack, rsm, th->th_ack); | rack_peer_reneges(rack, rsm, th->th_ack); | ||||
▲ Show 20 Lines • Show All 92 Lines • ▼ Show 20 Lines | for (i = 0; i < num_sack_blks; i++) { | ||||
} | } | ||||
num_sack_blks--; | num_sack_blks--; | ||||
goto again; | goto again; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
} | } | ||||
do_sack_work: | do_sack_work: | ||||
/* | /* | ||||
* First lets look to see if | * First lets look to see if | ||||
* we have retransmitted and | * we have retransmitted and | ||||
* can use the transmit next? | * can use the transmit next? | ||||
*/ | */ | ||||
rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); | rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); | ||||
if (rsm && | if (rsm && | ||||
SEQ_GT(sack_blocks[0].end, rsm->r_start) && | SEQ_GT(sack_blocks[0].end, rsm->r_start) && | ||||
SEQ_LT(sack_blocks[0].start, rsm->r_end)) { | SEQ_LT(sack_blocks[0].start, rsm->r_end)) { | ||||
/* | /* | ||||
Show All 16 Lines | if (num_sack_blks == 1) { | ||||
* is the sequence in the sack block (has more | * is the sequence in the sack block (has more | ||||
* are acked). Count this as ACK'd data to boost | * are acked). Count this as ACK'd data to boost | ||||
* up the chances of recovering any false positives. | * up the chances of recovering any false positives. | ||||
*/ | */ | ||||
rack->r_ctl.ack_count += (acked / ctf_fixed_maxseg(rack->rc_tp)); | rack->r_ctl.ack_count += (acked / ctf_fixed_maxseg(rack->rc_tp)); | ||||
counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp))); | counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp))); | ||||
counter_u64_add(rack_express_sack, 1); | counter_u64_add(rack_express_sack, 1); | ||||
if (rack->r_ctl.ack_count > 0xfff00000) { | if (rack->r_ctl.ack_count > 0xfff00000) { | ||||
/* | /* | ||||
* reduce the number to keep us under | * reduce the number to keep us under | ||||
* a uint32_t. | * a uint32_t. | ||||
*/ | */ | ||||
rack->r_ctl.ack_count /= 2; | rack->r_ctl.ack_count /= 2; | ||||
rack->r_ctl.sack_count /= 2; | rack->r_ctl.sack_count /= 2; | ||||
} | } | ||||
goto out_with_totals; | goto out_with_totals; | ||||
} else { | } else { | ||||
/* | /* | ||||
* Start the loop through the | * Start the loop through the | ||||
* rest of blocks, past the first block. | * rest of blocks, past the first block. | ||||
*/ | */ | ||||
moved_two = 0; | moved_two = 0; | ||||
loop_start = 1; | loop_start = 1; | ||||
} | } | ||||
} | } | ||||
/* Its a sack of some sort */ | /* Its a sack of some sort */ | ||||
rack->r_ctl.sack_count++; | rack->r_ctl.sack_count++; | ||||
if (rack->r_ctl.sack_count > 0xfff00000) { | if (rack->r_ctl.sack_count > 0xfff00000) { | ||||
/* | /* | ||||
* reduce the number to keep us under | * reduce the number to keep us under | ||||
* a uint32_t. | * a uint32_t. | ||||
*/ | */ | ||||
rack->r_ctl.ack_count /= 2; | rack->r_ctl.ack_count /= 2; | ||||
rack->r_ctl.sack_count /= 2; | rack->r_ctl.sack_count /= 2; | ||||
} | } | ||||
counter_u64_add(rack_sack_total, 1); | counter_u64_add(rack_sack_total, 1); | ||||
if (rack->sack_attack_disable) { | if (rack->sack_attack_disable) { | ||||
/* An attacker disablement is in place */ | /* An attacker disablement is in place */ | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | for (i = loop_start; i < num_sack_blks; i++) { | ||||
if (rack->r_ctl.sack_count > 0xfff00000) { | if (rack->r_ctl.sack_count > 0xfff00000) { | ||||
rack->r_ctl.ack_count /= 2; | rack->r_ctl.ack_count /= 2; | ||||
rack->r_ctl.sack_count /= 2; | rack->r_ctl.sack_count /= 2; | ||||
} | } | ||||
moved_two = 0; | moved_two = 0; | ||||
} | } | ||||
out_with_totals: | out_with_totals: | ||||
if (num_sack_blks > 1) { | if (num_sack_blks > 1) { | ||||
/* | /* | ||||
* You get an extra stroke if | * You get an extra stroke if | ||||
* you have more than one sack-blk, this | * you have more than one sack-blk, this | ||||
* could be where we are skipping forward | * could be where we are skipping forward | ||||
* and the sack-filter is still working, or | * and the sack-filter is still working, or | ||||
* it could be an attacker constantly | * it could be an attacker constantly | ||||
* moving us. | * moving us. | ||||
*/ | */ | ||||
rack->r_ctl.sack_moved_extra++; | rack->r_ctl.sack_moved_extra++; | ||||
counter_u64_add(rack_move_some, 1); | counter_u64_add(rack_move_some, 1); | ||||
} | } | ||||
out: | out: | ||||
#ifdef NETFLIX_EXP_DETECTION | #ifdef NETFLIX_EXP_DETECTION | ||||
if ((rack->do_detection || tcp_force_detection) && | if ((rack->do_detection || tcp_force_detection) && | ||||
tcp_sack_to_ack_thresh && | tcp_sack_to_ack_thresh && | ||||
tcp_sack_to_move_thresh && | tcp_sack_to_move_thresh && | ||||
((rack->r_ctl.rc_num_maps_alloced > tcp_map_minimum) || rack->sack_attack_disable)) { | ((rack->r_ctl.rc_num_maps_alloced > tcp_map_minimum) || rack->sack_attack_disable)) { | ||||
/* | /* | ||||
* We have thresholds set to find | * We have thresholds set to find | ||||
* possible attackers and disable sack. | * possible attackers and disable sack. | ||||
* Check them. | * Check them. | ||||
*/ | */ | ||||
uint64_t ackratio, moveratio, movetotal; | uint64_t ackratio, moveratio, movetotal; | ||||
/* Log detecting */ | /* Log detecting */ | ||||
rack_log_sad(rack, 1); | rack_log_sad(rack, 1); | ||||
Show All 16 Lines | if (movetotal) | ||||
moveratio /= movetotal; | moveratio /= movetotal; | ||||
else { | else { | ||||
/* No moves, thats pretty good */ | /* No moves, thats pretty good */ | ||||
moveratio = 0; | moveratio = 0; | ||||
} | } | ||||
if ((rack->sack_attack_disable == 0) && | if ((rack->sack_attack_disable == 0) && | ||||
(moveratio > rack_highest_move_thresh_seen)) | (moveratio > rack_highest_move_thresh_seen)) | ||||
rack_highest_move_thresh_seen = (uint32_t)moveratio; | rack_highest_move_thresh_seen = (uint32_t)moveratio; | ||||
if (rack->sack_attack_disable == 0) { | if (rack->sack_attack_disable == 0) { | ||||
if ((ackratio > tcp_sack_to_ack_thresh) && | if ((ackratio > tcp_sack_to_ack_thresh) && | ||||
(moveratio > tcp_sack_to_move_thresh)) { | (moveratio > tcp_sack_to_move_thresh)) { | ||||
/* Disable sack processing */ | /* Disable sack processing */ | ||||
rack->sack_attack_disable = 1; | rack->sack_attack_disable = 1; | ||||
if (rack->r_rep_attack == 0) { | if (rack->r_rep_attack == 0) { | ||||
rack->r_rep_attack = 1; | rack->r_rep_attack = 1; | ||||
counter_u64_add(rack_sack_attacks_detected, 1); | counter_u64_add(rack_sack_attacks_detected, 1); | ||||
} | } | ||||
if (tcp_attack_on_turns_on_logging) { | if (tcp_attack_on_turns_on_logging) { | ||||
/* | /* | ||||
* Turn on logging, used for debugging | * Turn on logging, used for debugging | ||||
* false positives. | * false positives. | ||||
*/ | */ | ||||
rack->rc_tp->t_logstate = tcp_attack_on_turns_on_logging; | rack->rc_tp->t_logstate = tcp_attack_on_turns_on_logging; | ||||
} | } | ||||
/* Clamp the cwnd at flight size */ | /* Clamp the cwnd at flight size */ | ||||
rack->r_ctl.rc_saved_cwnd = rack->rc_tp->snd_cwnd; | rack->r_ctl.rc_saved_cwnd = rack->rc_tp->snd_cwnd; | ||||
rack->rc_tp->snd_cwnd = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); | rack->rc_tp->snd_cwnd = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); | ||||
▲ Show 20 Lines • Show All 286 Lines • ▼ Show 20 Lines | if (tp->snd_una == tp->snd_max) { | ||||
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); | rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); | ||||
/* Set need output so persist might get set */ | /* Set need output so persist might get set */ | ||||
rack->r_wanted_output++; | rack->r_wanted_output++; | ||||
if (rack_use_sack_filter) | if (rack_use_sack_filter) | ||||
sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una); | sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una); | ||||
if ((tp->t_state >= TCPS_FIN_WAIT_1) && | if ((tp->t_state >= TCPS_FIN_WAIT_1) && | ||||
(sbavail(&so->so_snd) == 0) && | (sbavail(&so->so_snd) == 0) && | ||||
(tp->t_flags2 & TF2_DROP_AF_DATA)) { | (tp->t_flags2 & TF2_DROP_AF_DATA)) { | ||||
/* | /* | ||||
* The socket was gone and the | * The socket was gone and the | ||||
* peer sent data, time to | * peer sent data, time to | ||||
* reset him. | * reset him. | ||||
*/ | */ | ||||
*ret_val = 1; | *ret_val = 1; | ||||
tp = tcp_close(tp); | tp = tcp_close(tp); | ||||
ctf_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, tlen); | ctf_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
} | } | ||||
if (ofia) | if (ofia) | ||||
*ofia = ourfinisacked; | *ofia = ourfinisacked; | ||||
return (0); | return (0); | ||||
} | } | ||||
static void | static void | ||||
rack_collapsed_window(struct tcp_rack *rack) | rack_collapsed_window(struct tcp_rack *rack) | ||||
{ | { | ||||
/* | /* | ||||
* Now we must walk the | * Now we must walk the | ||||
* send map and divide the | * send map and divide the | ||||
* ones left stranded. These | * ones left stranded. These | ||||
* guys can't cause us to abort | * guys can't cause us to abort | ||||
* the connection and are really | * the connection and are really | ||||
* "unsent". However if a buggy | * "unsent". However if a buggy | ||||
* client actually did keep some | * client actually did keep some | ||||
* of the data i.e. collapsed the win | * of the data i.e. collapsed the win | ||||
* and refused to ack and then opened | * and refused to ack and then opened | ||||
* the win and acked that data. We would | * the win and acked that data. We would | ||||
* get into an ack war, the simplier | * get into an ack war, the simplier | ||||
* method then of just pretending we | * method then of just pretending we | ||||
* did not send those segments something | * did not send those segments something | ||||
* won't work. | * won't work. | ||||
*/ | */ | ||||
struct rack_sendmap *rsm, *nrsm, fe, *insret; | struct rack_sendmap *rsm, *nrsm, fe, *insret; | ||||
tcp_seq max_seq; | tcp_seq max_seq; | ||||
uint32_t maxseg; | uint32_t maxseg; | ||||
max_seq = rack->rc_tp->snd_una + rack->rc_tp->snd_wnd; | max_seq = rack->rc_tp->snd_una + rack->rc_tp->snd_wnd; | ||||
maxseg = ctf_fixed_maxseg(rack->rc_tp); | maxseg = ctf_fixed_maxseg(rack->rc_tp); | ||||
memset(&fe, 0, sizeof(fe)); | memset(&fe, 0, sizeof(fe)); | ||||
fe.r_start = max_seq; | fe.r_start = max_seq; | ||||
/* Find the first seq past or at maxseq */ | /* Find the first seq past or at maxseq */ | ||||
rsm = RB_FIND(rack_rb_tree_head, &rack->r_ctl.rc_mtree, &fe); | rsm = RB_FIND(rack_rb_tree_head, &rack->r_ctl.rc_mtree, &fe); | ||||
if (rsm == NULL) { | if (rsm == NULL) { | ||||
/* Nothing to do strange */ | /* Nothing to do strange */ | ||||
rack->rc_has_collapsed = 0; | rack->rc_has_collapsed = 0; | ||||
return; | return; | ||||
} | } | ||||
/* | /* | ||||
* Now do we need to split at | * Now do we need to split at | ||||
* the collapse point? | * the collapse point? | ||||
*/ | */ | ||||
if (SEQ_GT(max_seq, rsm->r_start)) { | if (SEQ_GT(max_seq, rsm->r_start)) { | ||||
nrsm = rack_alloc_limit(rack, RACK_LIMIT_TYPE_SPLIT); | nrsm = rack_alloc_limit(rack, RACK_LIMIT_TYPE_SPLIT); | ||||
if (nrsm == NULL) { | if (nrsm == NULL) { | ||||
/* We can't get a rsm, mark all? */ | /* We can't get a rsm, mark all? */ | ||||
nrsm = rsm; | nrsm = rsm; | ||||
goto no_split; | goto no_split; | ||||
} | } | ||||
/* Clone it */ | /* Clone it */ | ||||
rack_clone_rsm(rack, nrsm, rsm, max_seq); | rack_clone_rsm(rack, nrsm, rsm, max_seq); | ||||
insret = RB_INSERT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, nrsm); | insret = RB_INSERT(rack_rb_tree_head, &rack->r_ctl.rc_mtree, nrsm); | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
if (insret != NULL) { | if (insret != NULL) { | ||||
panic("Insert in rb tree of %p fails ret:%p rack:%p rsm:%p", | panic("Insert in rb tree of %p fails ret:%p rack:%p rsm:%p", | ||||
nrsm, insret, rack, rsm); | nrsm, insret, rack, rsm); | ||||
} | } | ||||
#endif | #endif | ||||
if (rsm->r_in_tmap) { | if (rsm->r_in_tmap) { | ||||
TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext); | TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext); | ||||
nrsm->r_in_tmap = 1; | nrsm->r_in_tmap = 1; | ||||
} | } | ||||
/* | /* | ||||
* Set in the new RSM as the | * Set in the new RSM as the | ||||
* collapsed starting point | * collapsed starting point | ||||
*/ | */ | ||||
rsm = nrsm; | rsm = nrsm; | ||||
} | } | ||||
no_split: | no_split: | ||||
counter_u64_add(rack_collapsed_win, 1); | counter_u64_add(rack_collapsed_win, 1); | ||||
RB_FOREACH_FROM(nrsm, rack_rb_tree_head, rsm) { | RB_FOREACH_FROM(nrsm, rack_rb_tree_head, rsm) { | ||||
nrsm->r_flags |= RACK_RWND_COLLAPSED; | nrsm->r_flags |= RACK_RWND_COLLAPSED; | ||||
▲ Show 20 Lines • Show All 546 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
/* Ok if we reach here, we can process a fast-ack */ | /* Ok if we reach here, we can process a fast-ack */ | ||||
nsegs = max(1, m->m_pkthdr.lro_nsegs); | nsegs = max(1, m->m_pkthdr.lro_nsegs); | ||||
rack_log_ack(tp, to, th); | rack_log_ack(tp, to, th); | ||||
/* | /* | ||||
* We made progress, clear the tlp | * We made progress, clear the tlp | ||||
* out flag so we could start a TLP | * out flag so we could start a TLP | ||||
* again. | * again. | ||||
*/ | */ | ||||
rack->r_ctl.rc_tlp_rtx_out = 0; | rack->r_ctl.rc_tlp_rtx_out = 0; | ||||
/* Did the window get updated? */ | /* Did the window get updated? */ | ||||
if (tiwin != tp->snd_wnd) { | if (tiwin != tp->snd_wnd) { | ||||
tp->snd_wnd = tiwin; | tp->snd_wnd = tiwin; | ||||
tp->snd_wl1 = th->th_seq; | tp->snd_wl1 = th->th_seq; | ||||
if (tp->snd_wnd > tp->max_sndwnd) | if (tp->snd_wnd > tp->max_sndwnd) | ||||
tp->max_sndwnd = tp->snd_wnd; | tp->max_sndwnd = tp->snd_wnd; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 198 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && | if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && | ||||
V_tcp_do_ecn) { | V_tcp_do_ecn) { | ||||
tp->t_flags2 |= TF2_ECN_PERMIT; | tp->t_flags2 |= TF2_ECN_PERMIT; | ||||
TCPSTAT_INC(tcps_ecn_shs); | TCPSTAT_INC(tcps_ecn_shs); | ||||
} | } | ||||
if (SEQ_GT(th->th_ack, tp->snd_una)) { | if (SEQ_GT(th->th_ack, tp->snd_una)) { | ||||
/* | /* | ||||
* We advance snd_una for the | * We advance snd_una for the | ||||
* fast open case. If th_ack is | * fast open case. If th_ack is | ||||
* acknowledging data beyond | * acknowledging data beyond | ||||
* snd_una we can't just call | * snd_una we can't just call | ||||
* ack-processing since the | * ack-processing since the | ||||
* data stream in our send-map | * data stream in our send-map | ||||
* will start at snd_una + 1 (one | * will start at snd_una + 1 (one | ||||
* beyond the SYN). If its just | * beyond the SYN). If its just | ||||
* equal we don't need to do that | * equal we don't need to do that | ||||
* and there is no send_map. | * and there is no send_map. | ||||
*/ | */ | ||||
tp->snd_una++; | tp->snd_una++; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | if (thflags & TH_ACK) { | ||||
if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) { | if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) { | ||||
uint32_t t; | uint32_t t; | ||||
t = tcp_ts_getticks() - to->to_tsecr; | t = tcp_ts_getticks() - to->to_tsecr; | ||||
if (!tp->t_rttlow || tp->t_rttlow > t) | if (!tp->t_rttlow || tp->t_rttlow > t) | ||||
tp->t_rttlow = t; | tp->t_rttlow = t; | ||||
tcp_rack_xmit_timer(rack, t + 1); | tcp_rack_xmit_timer(rack, t + 1); | ||||
tcp_rack_xmit_timer_commit(rack, tp); | tcp_rack_xmit_timer_commit(rack, tp); | ||||
} | } | ||||
if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) | if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) | ||||
return (ret_val); | return (ret_val); | ||||
/* We may have changed to FIN_WAIT_1 above */ | /* We may have changed to FIN_WAIT_1 above */ | ||||
if (tp->t_state == TCPS_FIN_WAIT_1) { | if (tp->t_state == TCPS_FIN_WAIT_1) { | ||||
/* | /* | ||||
* In FIN_WAIT_1 STATE in addition to the processing | * In FIN_WAIT_1 STATE in addition to the processing | ||||
* for the ESTABLISHED state if our FIN is now | * for the ESTABLISHED state if our FIN is now | ||||
* acknowledged then enter FIN_WAIT_2. | * acknowledged then enter FIN_WAIT_2. | ||||
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines | rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
tp->t_starttime = ticks; | tp->t_starttime = ticks; | ||||
if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { | if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { | ||||
tcp_fastopen_decrement_counter(tp->t_tfo_pending); | tcp_fastopen_decrement_counter(tp->t_tfo_pending); | ||||
tp->t_tfo_pending = NULL; | tp->t_tfo_pending = NULL; | ||||
/* | /* | ||||
* Account for the ACK of our SYN prior to | * Account for the ACK of our SYN prior to | ||||
* regular ACK processing below. | * regular ACK processing below. | ||||
*/ | */ | ||||
tp->snd_una++; | tp->snd_una++; | ||||
} | } | ||||
if (tp->t_flags & TF_NEEDFIN) { | if (tp->t_flags & TF_NEEDFIN) { | ||||
tcp_state_change(tp, TCPS_FIN_WAIT_1); | tcp_state_change(tp, TCPS_FIN_WAIT_1); | ||||
tp->t_flags &= ~TF_NEEDFIN; | tp->t_flags &= ~TF_NEEDFIN; | ||||
} else { | } else { | ||||
tcp_state_change(tp, TCPS_ESTABLISHED); | tcp_state_change(tp, TCPS_ESTABLISHED); | ||||
TCP_PROBE5(accept__established, NULL, tp, | TCP_PROBE5(accept__established, NULL, tp, | ||||
Show All 19 Lines | rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) { | if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) { | ||||
uint32_t t; | uint32_t t; | ||||
t = tcp_ts_getticks() - to->to_tsecr; | t = tcp_ts_getticks() - to->to_tsecr; | ||||
if (!tp->t_rttlow || tp->t_rttlow > t) | if (!tp->t_rttlow || tp->t_rttlow > t) | ||||
tp->t_rttlow = t; | tp->t_rttlow = t; | ||||
tcp_rack_xmit_timer(rack, t + 1); | tcp_rack_xmit_timer(rack, t + 1); | ||||
tcp_rack_xmit_timer_commit(rack, tp); | tcp_rack_xmit_timer_commit(rack, tp); | ||||
} | } | ||||
if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { | if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { | ||||
return (ret_val); | return (ret_val); | ||||
} | } | ||||
if (tp->t_state == TCPS_FIN_WAIT_1) { | if (tp->t_state == TCPS_FIN_WAIT_1) { | ||||
/* We could have went to FIN_WAIT_1 (or EST) above */ | /* We could have went to FIN_WAIT_1 (or EST) above */ | ||||
/* | /* | ||||
* In FIN_WAIT_1 STATE in addition to the processing for the | * In FIN_WAIT_1 STATE in addition to the processing for the | ||||
* ESTABLISHED state if our FIN is now acknowledged then | * ESTABLISHED state if our FIN is now acknowledged then | ||||
▲ Show 20 Lines • Show All 242 Lines • ▼ Show 20 Lines | if (rack_progress_timeout_check(tp)) { | ||||
return (1); | return (1); | ||||
} | } | ||||
} | } | ||||
return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, | return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, | ||||
tiwin, thflags, nxt_pkt)); | tiwin, thflags, nxt_pkt)); | ||||
} | } | ||||
static int | static int | ||||
rack_check_data_after_close(struct mbuf *m, | rack_check_data_after_close(struct mbuf *m, | ||||
struct tcpcb *tp, int32_t *tlen, struct tcphdr *th, struct socket *so) | struct tcpcb *tp, int32_t *tlen, struct tcphdr *th, struct socket *so) | ||||
{ | { | ||||
struct tcp_rack *rack; | struct tcp_rack *rack; | ||||
rack = (struct tcp_rack *)tp->t_fb_ptr; | rack = (struct tcp_rack *)tp->t_fb_ptr; | ||||
if (rack->rc_allow_data_af_clo == 0) { | if (rack->rc_allow_data_af_clo == 0) { | ||||
close_now: | close_now: | ||||
tp = tcp_close(tp); | tp = tcp_close(tp); | ||||
▲ Show 20 Lines • Show All 464 Lines • ▼ Show 20 Lines | |||||
rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack) | rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack) | ||||
{ | { | ||||
uint32_t tls_seg = 0; | uint32_t tls_seg = 0; | ||||
#ifdef KERN_TLS | #ifdef KERN_TLS | ||||
if (rack->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { | if (rack->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { | ||||
tls_seg = ctf_get_opt_tls_size(rack->rc_inp->inp_socket, rack->rc_tp->snd_wnd); | tls_seg = ctf_get_opt_tls_size(rack->rc_inp->inp_socket, rack->rc_tp->snd_wnd); | ||||
rack->r_ctl.rc_pace_min_segs = tls_seg; | rack->r_ctl.rc_pace_min_segs = tls_seg; | ||||
} else | } else | ||||
#endif | #endif | ||||
rack->r_ctl.rc_pace_min_segs = ctf_fixed_maxseg(tp); | rack->r_ctl.rc_pace_min_segs = ctf_fixed_maxseg(tp); | ||||
rack->r_ctl.rc_pace_max_segs = ctf_fixed_maxseg(tp) * rack->rc_pace_max_segs; | rack->r_ctl.rc_pace_max_segs = ctf_fixed_maxseg(tp) * rack->rc_pace_max_segs; | ||||
if (rack->r_ctl.rc_pace_max_segs > PACE_MAX_IP_BYTES) | if (rack->r_ctl.rc_pace_max_segs > PACE_MAX_IP_BYTES) | ||||
rack->r_ctl.rc_pace_max_segs = PACE_MAX_IP_BYTES; | rack->r_ctl.rc_pace_max_segs = PACE_MAX_IP_BYTES; | ||||
#ifdef KERN_TLS | #ifdef KERN_TLS | ||||
if (tls_seg != 0) { | if (tls_seg != 0) { | ||||
if (rack_hw_tls_max_seg > 1) { | if (rack_hw_tls_max_seg > 1) { | ||||
▲ Show 20 Lines • Show All 241 Lines • ▼ Show 20 Lines | rack_timer_audit(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb) | ||||
} | } | ||||
if (rsm == NULL) { | if (rsm == NULL) { | ||||
/* Nothing outstanding? */ | /* Nothing outstanding? */ | ||||
if (tp->t_flags & TF_DELACK) { | if (tp->t_flags & TF_DELACK) { | ||||
if (tmr_up == PACE_TMR_DELACK) | if (tmr_up == PACE_TMR_DELACK) | ||||
/* We are supposed to have delayed ack up and we do */ | /* We are supposed to have delayed ack up and we do */ | ||||
return; | return; | ||||
} else if (sbavail(&tp->t_inpcb->inp_socket->so_snd) && (tmr_up == PACE_TMR_RXT)) { | } else if (sbavail(&tp->t_inpcb->inp_socket->so_snd) && (tmr_up == PACE_TMR_RXT)) { | ||||
/* | /* | ||||
* if we hit enobufs then we would expect the possiblity | * if we hit enobufs then we would expect the possiblity | ||||
* of nothing outstanding and the RXT up (and the hptsi timer). | * of nothing outstanding and the RXT up (and the hptsi timer). | ||||
*/ | */ | ||||
return; | return; | ||||
} else if (((tcp_always_keepalive || | } else if (((tcp_always_keepalive || | ||||
rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) && | rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) && | ||||
(tp->t_state <= TCPS_CLOSING)) && | (tp->t_state <= TCPS_CLOSING)) && | ||||
(tmr_up == PACE_TMR_KEEP) && | (tmr_up == PACE_TMR_KEEP) && | ||||
(tp->snd_max == tp->snd_una)) { | (tp->snd_max == tp->snd_una)) { | ||||
/* We should have keep alive up and we do */ | /* We should have keep alive up and we do */ | ||||
return; | return; | ||||
} | } | ||||
} | } | ||||
if (SEQ_GT(tp->snd_max, tp->snd_una) && | if (SEQ_GT(tp->snd_max, tp->snd_una) && | ||||
((tmr_up == PACE_TMR_TLP) || | ((tmr_up == PACE_TMR_TLP) || | ||||
(tmr_up == PACE_TMR_RACK) || | (tmr_up == PACE_TMR_RACK) || | ||||
(tmr_up == PACE_TMR_RXT))) { | (tmr_up == PACE_TMR_RXT))) { | ||||
/* | /* | ||||
* Either a Rack, TLP or RXT is fine if we | * Either a Rack, TLP or RXT is fine if we | ||||
* have outstanding data. | * have outstanding data. | ||||
*/ | */ | ||||
return; | return; | ||||
} else if (tmr_up == PACE_TMR_DELACK) { | } else if (tmr_up == PACE_TMR_DELACK) { | ||||
/* | /* | ||||
* If the delayed ack was going to go off | * If the delayed ack was going to go off | ||||
* before the rtx/tlp/rack timer were going to | * before the rtx/tlp/rack timer were going to | ||||
* expire, then that would be the timer in control. | * expire, then that would be the timer in control. | ||||
* Note we don't check the time here trusting the | * Note we don't check the time here trusting the | ||||
* code is correct. | * code is correct. | ||||
*/ | */ | ||||
return; | return; | ||||
} | } | ||||
/* | /* | ||||
* Ok the timer originally started is not what we want now. | * Ok the timer originally started is not what we want now. | ||||
* We will force the hpts to be stopped if any, and restart | * We will force the hpts to be stopped if any, and restart | ||||
* with the slot set to what was in the saved slot. | * with the slot set to what was in the saved slot. | ||||
*/ | */ | ||||
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); | rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); | ||||
rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), 0, 0, 0); | rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), 0, 0, 0); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 427 Lines • ▼ Show 20 Lines | old_method: | ||||
bw_est = 0; | bw_est = 0; | ||||
for (cnt=0; cnt<RACK_GP_HIST; cnt++) { | for (cnt=0; cnt<RACK_GP_HIST; cnt++) { | ||||
if ((rack->r_ctl.rc_gp_hist_filled == 0) && | if ((rack->r_ctl.rc_gp_hist_filled == 0) && | ||||
(rack->r_ctl.rc_gp_history[cnt] == 0)) | (rack->r_ctl.rc_gp_history[cnt] == 0)) | ||||
break; | break; | ||||
bw_est += rack->r_ctl.rc_gp_history[cnt]; | bw_est += rack->r_ctl.rc_gp_history[cnt]; | ||||
} | } | ||||
if (bw_est == 0) { | if (bw_est == 0) { | ||||
/* | /* | ||||
* No way yet to make a b/w estimate | * No way yet to make a b/w estimate | ||||
* (no goodput est yet). | * (no goodput est yet). | ||||
*/ | */ | ||||
goto old_method; | goto old_method; | ||||
} | } | ||||
/* Covert to bytes per second */ | /* Covert to bytes per second */ | ||||
bw_est *= MSEC_IN_SECOND; | bw_est *= MSEC_IN_SECOND; | ||||
/* | /* | ||||
* Now ratchet it up by our percentage. Note | * Now ratchet it up by our percentage. Note | ||||
* that the minimum you can do is 1 which would | * that the minimum you can do is 1 which would | ||||
* get you 101% of the average last N goodput estimates. | * get you 101% of the average last N goodput estimates. | ||||
* The max you can do is 256 which would yeild you | * The max you can do is 256 which would yeild you | ||||
* 356% of the last N goodput estimates. | * 356% of the last N goodput estimates. | ||||
*/ | */ | ||||
bw_raise = bw_est * (uint64_t)rack->rack_per_of_gp; | bw_raise = bw_est * (uint64_t)rack->rack_per_of_gp; | ||||
bw_est += bw_raise; | bw_est += bw_raise; | ||||
/* average by the number we added */ | /* average by the number we added */ | ||||
bw_est /= cnt; | bw_est /= cnt; | ||||
/* Now calculate a rate based on this b/w */ | /* Now calculate a rate based on this b/w */ | ||||
lentim = (uint64_t) len * (uint64_t)MSEC_IN_SECOND; | lentim = (uint64_t) len * (uint64_t)MSEC_IN_SECOND; | ||||
res = lentim / bw_est; | res = lentim / bw_est; | ||||
slot = (uint32_t)res; | slot = (uint32_t)res; | ||||
} | } | ||||
if (rack->r_enforce_min_pace && | if (rack->r_enforce_min_pace && | ||||
(slot == 0)) { | (slot == 0)) { | ||||
/* We are enforcing a minimum pace time of 1ms */ | /* We are enforcing a minimum pace time of 1ms */ | ||||
slot = rack->r_enforce_min_pace; | slot = rack->r_enforce_min_pace; | ||||
} | } | ||||
if (slot) | if (slot) | ||||
counter_u64_add(rack_calc_nonzero, 1); | counter_u64_add(rack_calc_nonzero, 1); | ||||
else | else | ||||
counter_u64_add(rack_calc_zero, 1); | counter_u64_add(rack_calc_zero, 1); | ||||
return (slot); | return (slot); | ||||
} | } | ||||
static int | static int | ||||
rack_output(struct tcpcb *tp) | rack_output(struct tcpcb *tp) | ||||
▲ Show 20 Lines • Show All 183 Lines • ▼ Show 20 Lines | if (flags & TH_RST) { | ||||
goto send; | goto send; | ||||
} | } | ||||
if (rack->r_ctl.rc_tlpsend) { | if (rack->r_ctl.rc_tlpsend) { | ||||
/* Tail loss probe */ | /* Tail loss probe */ | ||||
long cwin; | long cwin; | ||||
long tlen; | long tlen; | ||||
doing_tlp = 1; | doing_tlp = 1; | ||||
/* | /* | ||||
* Check if we can do a TLP with a RACK'd packet | * Check if we can do a TLP with a RACK'd packet | ||||
* this can happen if we are not doing the rack | * this can happen if we are not doing the rack | ||||
* cheat and we skipped to a TLP and it | * cheat and we skipped to a TLP and it | ||||
* went off. | * went off. | ||||
*/ | */ | ||||
rsm = tcp_rack_output(tp, rack, cts); | rsm = tcp_rack_output(tp, rack, cts); | ||||
if (rsm == NULL) | if (rsm == NULL) | ||||
rsm = rack->r_ctl.rc_tlpsend; | rsm = rack->r_ctl.rc_tlpsend; | ||||
rack->r_ctl.rc_tlpsend = NULL; | rack->r_ctl.rc_tlpsend = NULL; | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | #endif | ||||
sb_offset = rsm->r_start - tp->snd_una; | sb_offset = rsm->r_start - tp->snd_una; | ||||
/* Can we send it within the PRR boundary? */ | /* Can we send it within the PRR boundary? */ | ||||
if ((rack->use_rack_cheat == 0) && (len > rack->r_ctl.rc_prr_sndcnt)) { | if ((rack->use_rack_cheat == 0) && (len > rack->r_ctl.rc_prr_sndcnt)) { | ||||
/* It does not fit */ | /* It does not fit */ | ||||
if ((ctf_flight_size(tp, rack->r_ctl.rc_sacked) > len) && | if ((ctf_flight_size(tp, rack->r_ctl.rc_sacked) > len) && | ||||
(rack->r_ctl.rc_prr_sndcnt < maxseg)) { | (rack->r_ctl.rc_prr_sndcnt < maxseg)) { | ||||
/* | /* | ||||
* prr is less than a segment, we | * prr is less than a segment, we | ||||
* have more acks due in besides | * have more acks due in besides | ||||
* what we need to resend. Lets not send | * what we need to resend. Lets not send | ||||
* to avoid sending small pieces of | * to avoid sending small pieces of | ||||
* what we need to retransmit. | * what we need to retransmit. | ||||
*/ | */ | ||||
len = 0; | len = 0; | ||||
goto just_return_nolock; | goto just_return_nolock; | ||||
} | } | ||||
len = rack->r_ctl.rc_prr_sndcnt; | len = rack->r_ctl.rc_prr_sndcnt; | ||||
} | } | ||||
sendalot = 0; | sendalot = 0; | ||||
if (len >= maxseg) { | if (len >= maxseg) { | ||||
len = maxseg; | len = maxseg; | ||||
} | } | ||||
if (len > 0) { | if (len > 0) { | ||||
sub_from_prr = 1; | sub_from_prr = 1; | ||||
sack_rxmit = 1; | sack_rxmit = 1; | ||||
TCPSTAT_INC(tcps_sack_rexmits); | TCPSTAT_INC(tcps_sack_rexmits); | ||||
TCPSTAT_ADD(tcps_sack_rexmit_bytes, | TCPSTAT_ADD(tcps_sack_rexmit_bytes, | ||||
min(len, ctf_fixed_maxseg(tp))); | min(len, ctf_fixed_maxseg(tp))); | ||||
counter_u64_add(rack_rtm_prr_retran, 1); | counter_u64_add(rack_rtm_prr_retran, 1); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Enforce a connection sendmap count limit if set | * Enforce a connection sendmap count limit if set | ||||
* as long as we are not retransmiting. | * as long as we are not retransmiting. | ||||
*/ | */ | ||||
if ((rsm == NULL) && | if ((rsm == NULL) && | ||||
(rack->do_detection == 0) && | (rack->do_detection == 0) && | ||||
(rack_tcp_map_entries_limit > 0) && | (rack_tcp_map_entries_limit > 0) && | ||||
(rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) { | (rack->r_ctl.rc_num_maps_alloced >= rack_tcp_map_entries_limit)) { | ||||
counter_u64_add(rack_to_alloc_limited, 1); | counter_u64_add(rack_to_alloc_limited, 1); | ||||
if (!rack->alloc_limit_reported) { | if (!rack->alloc_limit_reported) { | ||||
▲ Show 20 Lines • Show All 257 Lines • ▼ Show 20 Lines | if ((tp->snd_wnd == 0) && | ||||
(tp->snd_una == tp->snd_max) && | (tp->snd_una == tp->snd_max) && | ||||
(sb_offset < (int)sbavail(sb))) { | (sb_offset < (int)sbavail(sb))) { | ||||
tp->snd_nxt = tp->snd_una; | tp->snd_nxt = tp->snd_una; | ||||
rack_enter_persist(tp, rack, cts); | rack_enter_persist(tp, rack, cts); | ||||
} | } | ||||
} else if ((rsm == NULL) && | } else if ((rsm == NULL) && | ||||
((doing_tlp == 0) || (new_data_tlp == 1)) && | ((doing_tlp == 0) || (new_data_tlp == 1)) && | ||||
(len < rack->r_ctl.rc_pace_max_segs)) { | (len < rack->r_ctl.rc_pace_max_segs)) { | ||||
/* | /* | ||||
* We are not sending a full segment for | * We are not sending a full segment for | ||||
* some reason. Should we not send anything (think | * some reason. Should we not send anything (think | ||||
* sws or persists)? | * sws or persists)? | ||||
*/ | */ | ||||
if ((tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) && | if ((tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) && | ||||
(TCPS_HAVEESTABLISHED(tp->t_state)) && | (TCPS_HAVEESTABLISHED(tp->t_state)) && | ||||
(len < (int)(sbavail(sb) - sb_offset))) { | (len < (int)(sbavail(sb) - sb_offset))) { | ||||
/* | /* | ||||
* Here the rwnd is less than | * Here the rwnd is less than | ||||
* the pacing size, this is not a retransmit, | * the pacing size, this is not a retransmit, | ||||
* we are established and | * we are established and | ||||
* the send is not the last in the socket buffer | * the send is not the last in the socket buffer | ||||
* we send nothing, and may enter persists. | * we send nothing, and may enter persists. | ||||
*/ | */ | ||||
len = 0; | len = 0; | ||||
if (tp->snd_max == tp->snd_una) { | if (tp->snd_max == tp->snd_una) { | ||||
/* | /* | ||||
* Nothing out we can | * Nothing out we can | ||||
* go into persists. | * go into persists. | ||||
*/ | */ | ||||
rack_enter_persist(tp, rack, cts); | rack_enter_persist(tp, rack, cts); | ||||
tp->snd_nxt = tp->snd_una; | tp->snd_nxt = tp->snd_una; | ||||
} | } | ||||
} else if ((tp->snd_cwnd >= max(rack->r_ctl.rc_pace_min_segs, (maxseg * 4))) && | } else if ((tp->snd_cwnd >= max(rack->r_ctl.rc_pace_min_segs, (maxseg * 4))) && | ||||
(ctf_flight_size(tp, rack->r_ctl.rc_sacked) > (2 * maxseg)) && | (ctf_flight_size(tp, rack->r_ctl.rc_sacked) > (2 * maxseg)) && | ||||
(len < (int)(sbavail(sb) - sb_offset)) && | (len < (int)(sbavail(sb) - sb_offset)) && | ||||
(len < rack->r_ctl.rc_pace_min_segs)) { | (len < rack->r_ctl.rc_pace_min_segs)) { | ||||
/* | /* | ||||
* Here we are not retransmitting, and | * Here we are not retransmitting, and | ||||
* the cwnd is not so small that we could | * the cwnd is not so small that we could | ||||
* not send at least a min size (rxt timer | * not send at least a min size (rxt timer | ||||
* not having gone off), We have 2 segments or | * not having gone off), We have 2 segments or | ||||
* more already in flight, its not the tail end | * more already in flight, its not the tail end | ||||
* of the socket buffer and the cwnd is blocking | * of the socket buffer and the cwnd is blocking | ||||
* us from sending out a minimum pacing segment size. | * us from sending out a minimum pacing segment size. | ||||
* Lets not send anything. | * Lets not send anything. | ||||
*/ | */ | ||||
len = 0; | len = 0; | ||||
} else if (((tp->snd_wnd - ctf_outstanding(tp)) < | } else if (((tp->snd_wnd - ctf_outstanding(tp)) < | ||||
min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) && | min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) && | ||||
(ctf_flight_size(tp, rack->r_ctl.rc_sacked) > (2 * maxseg)) && | (ctf_flight_size(tp, rack->r_ctl.rc_sacked) > (2 * maxseg)) && | ||||
(len < (int)(sbavail(sb) - sb_offset)) && | (len < (int)(sbavail(sb) - sb_offset)) && | ||||
(TCPS_HAVEESTABLISHED(tp->t_state))) { | (TCPS_HAVEESTABLISHED(tp->t_state))) { | ||||
/* | /* | ||||
* Here we have a send window but we have | * Here we have a send window but we have | ||||
* filled it up and we can't send another pacing segment. | * filled it up and we can't send another pacing segment. | ||||
* We also have in flight more than 2 segments | * We also have in flight more than 2 segments | ||||
* and we are not completing the sb i.e. we allow | * and we are not completing the sb i.e. we allow | ||||
* the last bytes of the sb to go out even if | * the last bytes of the sb to go out even if | ||||
* its not a full pacing segment. | * its not a full pacing segment. | ||||
*/ | */ | ||||
len = 0; | len = 0; | ||||
} | } | ||||
} | } | ||||
/* len will be >= 0 after this point. */ | /* len will be >= 0 after this point. */ | ||||
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | if (len) { | ||||
/* | /* | ||||
* NOTE! on localhost connections an 'ack' from the remote | * NOTE! on localhost connections an 'ack' from the remote | ||||
* end may occur synchronously with the output and cause us | * end may occur synchronously with the output and cause us | ||||
* to flush a buffer queued with moretocome. XXX | * to flush a buffer queued with moretocome. XXX | ||||
* | * | ||||
*/ | */ | ||||
if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ | if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ | ||||
(idle || (tp->t_flags & TF_NODELAY)) && | (idle || (tp->t_flags & TF_NODELAY)) && | ||||
((uint32_t)len + (uint32_t)sb_offset >= sbavail(&so->so_snd)) && | ((uint32_t)len + (uint32_t)sb_offset >= sbavail(&so->so_snd)) && | ||||
(tp->t_flags & TF_NOPUSH) == 0) { | (tp->t_flags & TF_NOPUSH) == 0) { | ||||
pass = 2; | pass = 2; | ||||
goto send; | goto send; | ||||
} | } | ||||
if (tp->t_flags & TF_FORCEDATA) { /* typ. timeout case */ | if (tp->t_flags & TF_FORCEDATA) { /* typ. timeout case */ | ||||
pass = 3; | pass = 3; | ||||
goto send; | goto send; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 130 Lines • ▼ Show 20 Lines | just_return_nolock: | ||||
rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, sup_rack); | rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, sup_rack); | ||||
rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling); | rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling); | ||||
tp->t_flags &= ~TF_FORCEDATA; | tp->t_flags &= ~TF_FORCEDATA; | ||||
return (0); | return (0); | ||||
send: | send: | ||||
if ((flags & TH_FIN) && | if ((flags & TH_FIN) && | ||||
sbavail(&tp->t_inpcb->inp_socket->so_snd)) { | sbavail(&tp->t_inpcb->inp_socket->so_snd)) { | ||||
/* | /* | ||||
* We do not transmit a FIN | * We do not transmit a FIN | ||||
* with data outstanding. We | * with data outstanding. We | ||||
* need to make it so all data | * need to make it so all data | ||||
* is acked first. | * is acked first. | ||||
*/ | */ | ||||
flags &= ~TH_FIN; | flags &= ~TH_FIN; | ||||
} | } | ||||
if (doing_tlp == 0) { | if (doing_tlp == 0) { | ||||
▲ Show 20 Lines • Show All 313 Lines • ▼ Show 20 Lines | if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) { | ||||
msb = NULL; | msb = NULL; | ||||
else | else | ||||
msb = sb; | msb = sb; | ||||
m->m_next = tcp_m_copym( | m->m_next = tcp_m_copym( | ||||
#ifdef NETFLIX_COPY_ARGS | #ifdef NETFLIX_COPY_ARGS | ||||
tp, | tp, | ||||
#endif | #endif | ||||
mb, moff, &len, | mb, moff, &len, | ||||
if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb, | if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb, | ||||
((rsm == NULL) ? hw_tls : 0) | ((rsm == NULL) ? hw_tls : 0) | ||||
#ifdef NETFLIX_COPY_ARGS | #ifdef NETFLIX_COPY_ARGS | ||||
, &filled_all | , &filled_all | ||||
#endif | #endif | ||||
); | ); | ||||
if (len <= (tp->t_maxseg - optlen)) { | if (len <= (tp->t_maxseg - optlen)) { | ||||
/* | /* | ||||
* Must have ran out of mbufs for the copy | * Must have ran out of mbufs for the copy | ||||
* shorten it to no longer need tso. Lets | * shorten it to no longer need tso. Lets | ||||
* not put on sendalot since we are low on | * not put on sendalot since we are low on | ||||
* mbufs. | * mbufs. | ||||
*/ | */ | ||||
tso = 0; | tso = 0; | ||||
} | } | ||||
if (m->m_next == NULL) { | if (m->m_next == NULL) { | ||||
▲ Show 20 Lines • Show All 739 Lines • ▼ Show 20 Lines | nomore: | ||||
if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) | if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) | ||||
tp->rcv_adv = tp->rcv_nxt + recwin; | tp->rcv_adv = tp->rcv_nxt + recwin; | ||||
tp->last_ack_sent = tp->rcv_nxt; | tp->last_ack_sent = tp->rcv_nxt; | ||||
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); | tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); | ||||
enobufs: | enobufs: | ||||
rack->r_tlp_running = 0; | rack->r_tlp_running = 0; | ||||
if (flags & TH_RST) { | if (flags & TH_RST) { | ||||
/* | /* | ||||
* We don't send again after sending a RST. | * We don't send again after sending a RST. | ||||
*/ | */ | ||||
slot = 0; | slot = 0; | ||||
sendalot = 0; | sendalot = 0; | ||||
} | } | ||||
if (rsm && (slot == 0)) { | if (rsm && (slot == 0)) { | ||||
/* | /* | ||||
* Dup ack retransmission possibly, so | * Dup ack retransmission possibly, so | ||||
* lets assure we have at least min rack | * lets assure we have at least min rack | ||||
* time, if its a rack resend then the rack | * time, if its a rack resend then the rack | ||||
* to will also be set to this. | * to will also be set to this. | ||||
*/ | */ | ||||
slot = rack->r_ctl.rc_min_to; | slot = rack->r_ctl.rc_min_to; | ||||
} | } | ||||
if (slot) { | if (slot) { | ||||
▲ Show 20 Lines • Show All 198 Lines • ▼ Show 20 Lines | case TCP_RACK_MIN_PACE: | ||||
RACK_OPTS_INC(tcp_rack_min_pace); | RACK_OPTS_INC(tcp_rack_min_pace); | ||||
if (optval > 3) | if (optval > 3) | ||||
rack->r_enforce_min_pace = 3; | rack->r_enforce_min_pace = 3; | ||||
else | else | ||||
rack->r_enforce_min_pace = optval; | rack->r_enforce_min_pace = optval; | ||||
break; | break; | ||||
case TCP_RACK_GP_INCREASE: | case TCP_RACK_GP_INCREASE: | ||||
if ((optval >= 0) && | if ((optval >= 0) && | ||||
(optval <= 256)) | (optval <= 256)) | ||||
rack->rack_per_of_gp = optval; | rack->rack_per_of_gp = optval; | ||||
else | else | ||||
error = EINVAL; | error = EINVAL; | ||||
break; | break; | ||||
case TCP_BBR_RACK_RTT_USE: | case TCP_BBR_RACK_RTT_USE: | ||||
if ((optval != USE_RTT_HIGH) && | if ((optval != USE_RTT_HIGH) && | ||||
(optval != USE_RTT_LOW) && | (optval != USE_RTT_LOW) && | ||||
▲ Show 20 Lines • Show All 279 Lines • Show Last 20 Lines |