Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_stacks/rack.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 199 Lines • ▼ Show 20 Lines | |||||
static int32_t rack_hw_pace_extra_slots = 2; /* 2 extra MSS time betweens */ | static int32_t rack_hw_pace_extra_slots = 2; /* 2 extra MSS time betweens */ | ||||
static int32_t rack_hw_rate_caps = 1; /* 1; */ | static int32_t rack_hw_rate_caps = 1; /* 1; */ | ||||
static int32_t rack_hw_rate_min = 0; /* 1500000;*/ | static int32_t rack_hw_rate_min = 0; /* 1500000;*/ | ||||
static int32_t rack_hw_rate_to_low = 0; /* 1200000; */ | static int32_t rack_hw_rate_to_low = 0; /* 1200000; */ | ||||
static int32_t rack_hw_up_only = 1; | static int32_t rack_hw_up_only = 1; | ||||
static int32_t rack_stats_gets_ms_rtt = 1; | static int32_t rack_stats_gets_ms_rtt = 1; | ||||
static int32_t rack_prr_addbackmax = 2; | static int32_t rack_prr_addbackmax = 2; | ||||
static int32_t rack_do_hystart = 0; | static int32_t rack_do_hystart = 0; | ||||
static int32_t rack_apply_rtt_with_reduced_conf = 0; | |||||
static int32_t rack_pkt_delay = 1000; | static int32_t rack_pkt_delay = 1000; | ||||
static int32_t rack_send_a_lot_in_prr = 1; | static int32_t rack_send_a_lot_in_prr = 1; | ||||
static int32_t rack_min_to = 1000; /* Number of microsecond min timeout */ | static int32_t rack_min_to = 1000; /* Number of microsecond min timeout */ | ||||
static int32_t rack_verbose_logging = 0; | static int32_t rack_verbose_logging = 0; | ||||
static int32_t rack_ignore_data_after_close = 1; | static int32_t rack_ignore_data_after_close = 1; | ||||
static int32_t rack_enable_shared_cwnd = 1; | static int32_t rack_enable_shared_cwnd = 1; | ||||
static int32_t rack_use_cmp_acks = 1; | static int32_t rack_use_cmp_acks = 1; | ||||
▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines | |||||
counter_u64_t rack_calc_zero; | counter_u64_t rack_calc_zero; | ||||
counter_u64_t rack_calc_nonzero; | counter_u64_t rack_calc_nonzero; | ||||
counter_u64_t rack_saw_enobuf; | counter_u64_t rack_saw_enobuf; | ||||
counter_u64_t rack_saw_enobuf_hw; | counter_u64_t rack_saw_enobuf_hw; | ||||
counter_u64_t rack_saw_enetunreach; | counter_u64_t rack_saw_enetunreach; | ||||
counter_u64_t rack_per_timer_hole; | counter_u64_t rack_per_timer_hole; | ||||
counter_u64_t rack_large_ackcmp; | counter_u64_t rack_large_ackcmp; | ||||
counter_u64_t rack_small_ackcmp; | counter_u64_t rack_small_ackcmp; | ||||
counter_u64_t rack_persists_sends; | |||||
counter_u64_t rack_persists_acks; | |||||
counter_u64_t rack_persists_loss; | |||||
counter_u64_t rack_persists_lost_ends; | |||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
counter_u64_t rack_adjust_map_bw; | counter_u64_t rack_adjust_map_bw; | ||||
#endif | #endif | ||||
/* Tail loss probe counters */ | /* Tail loss probe counters */ | ||||
counter_u64_t rack_tlp_tot; | counter_u64_t rack_tlp_tot; | ||||
counter_u64_t rack_tlp_newdata; | counter_u64_t rack_tlp_newdata; | ||||
counter_u64_t rack_tlp_retran; | counter_u64_t rack_tlp_retran; | ||||
counter_u64_t rack_tlp_retran_bytes; | counter_u64_t rack_tlp_retran_bytes; | ||||
▲ Show 20 Lines • Show All 413 Lines • ▼ Show 20 Lines | #endif | ||||
counter_u64_zero(rack_calc_nonzero); | counter_u64_zero(rack_calc_nonzero); | ||||
counter_u64_zero(rack_unpaced_segments); | counter_u64_zero(rack_unpaced_segments); | ||||
counter_u64_zero(rack_saw_enobuf); | counter_u64_zero(rack_saw_enobuf); | ||||
counter_u64_zero(rack_saw_enobuf_hw); | counter_u64_zero(rack_saw_enobuf_hw); | ||||
counter_u64_zero(rack_saw_enetunreach); | counter_u64_zero(rack_saw_enetunreach); | ||||
counter_u64_zero(rack_per_timer_hole); | counter_u64_zero(rack_per_timer_hole); | ||||
counter_u64_zero(rack_large_ackcmp); | counter_u64_zero(rack_large_ackcmp); | ||||
counter_u64_zero(rack_small_ackcmp); | counter_u64_zero(rack_small_ackcmp); | ||||
counter_u64_zero(rack_persists_sends); | |||||
counter_u64_zero(rack_persists_acks); | |||||
counter_u64_zero(rack_persists_loss); | |||||
counter_u64_zero(rack_persists_lost_ends); | |||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
counter_u64_zero(rack_adjust_map_bw); | counter_u64_zero(rack_adjust_map_bw); | ||||
#endif | #endif | ||||
counter_u64_zero(rack_to_alloc_hard); | counter_u64_zero(rack_to_alloc_hard); | ||||
counter_u64_zero(rack_to_alloc_emerg); | counter_u64_zero(rack_to_alloc_emerg); | ||||
counter_u64_zero(rack_sack_proc_all); | counter_u64_zero(rack_sack_proc_all); | ||||
counter_u64_zero(rack_fto_send); | counter_u64_zero(rack_fto_send); | ||||
counter_u64_zero(rack_fto_rsm_send); | counter_u64_zero(rack_fto_rsm_send); | ||||
▲ Show 20 Lines • Show All 626 Lines • ▼ Show 20 Lines | #ifdef TCP_ACCOUNTING | ||||
SYSCTL_ADD_S32(&rack_sysctl_ctx, | SYSCTL_ADD_S32(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_misc), | SYSCTL_CHILDREN(rack_misc), | ||||
OID_AUTO, "tcp_acct", CTLFLAG_RW, | OID_AUTO, "tcp_acct", CTLFLAG_RW, | ||||
&rack_tcp_accounting, 0, | &rack_tcp_accounting, 0, | ||||
"Should we turn on TCP accounting for all rack sessions?"); | "Should we turn on TCP accounting for all rack sessions?"); | ||||
#endif | #endif | ||||
SYSCTL_ADD_S32(&rack_sysctl_ctx, | SYSCTL_ADD_S32(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_misc), | SYSCTL_CHILDREN(rack_misc), | ||||
OID_AUTO, "apply_rtt_with_low_conf", CTLFLAG_RW, | |||||
&rack_apply_rtt_with_reduced_conf, 0, | |||||
"When a persist or keep-alive probe is not answered do we calculate rtt on subsequent answers?"); | |||||
SYSCTL_ADD_S32(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_misc), | |||||
OID_AUTO, "rack_dsack_ctl", CTLFLAG_RW, | OID_AUTO, "rack_dsack_ctl", CTLFLAG_RW, | ||||
&rack_dsack_std_based, 3, | &rack_dsack_std_based, 3, | ||||
"How do we process dsack with respect to rack timers, bit field, 3 is standards based?"); | "How do we process dsack with respect to rack timers, bit field, 3 is standards based?"); | ||||
SYSCTL_ADD_S32(&rack_sysctl_ctx, | SYSCTL_ADD_S32(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_misc), | SYSCTL_CHILDREN(rack_misc), | ||||
OID_AUTO, "prr_addback_max", CTLFLAG_RW, | OID_AUTO, "prr_addback_max", CTLFLAG_RW, | ||||
&rack_prr_addbackmax, 2, | &rack_prr_addbackmax, 2, | ||||
"What is the maximum number of MSS we allow to be added back if prr can't send all its data?"); | "What is the maximum number of MSS we allow to be added back if prr can't send all its data?"); | ||||
▲ Show 20 Lines • Show All 344 Lines • ▼ Show 20 Lines | SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | ||||
"Number of compressed acks we processed"); | "Number of compressed acks we processed"); | ||||
} | } | ||||
rack_large_ackcmp = counter_u64_alloc(M_WAITOK); | rack_large_ackcmp = counter_u64_alloc(M_WAITOK); | ||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_counters), | SYSCTL_CHILDREN(rack_counters), | ||||
OID_AUTO, "cmp_large_mbufs", CTLFLAG_RD, | OID_AUTO, "cmp_large_mbufs", CTLFLAG_RD, | ||||
&rack_large_ackcmp, | &rack_large_ackcmp, | ||||
"Number of TCP connections with large mbuf's for compressed acks"); | "Number of TCP connections with large mbuf's for compressed acks"); | ||||
rack_persists_sends = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "persist_sends", CTLFLAG_RD, | |||||
&rack_persists_sends, | |||||
"Number of times we sent a persist probe"); | |||||
rack_persists_acks = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "persist_acks", CTLFLAG_RD, | |||||
&rack_persists_acks, | |||||
"Number of times a persist probe was acked"); | |||||
rack_persists_loss = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "persist_loss", CTLFLAG_RD, | |||||
&rack_persists_loss, | |||||
"Number of times we detected a lost persist probe (no ack)"); | |||||
rack_persists_lost_ends = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "persist_loss_ends", CTLFLAG_RD, | |||||
&rack_persists_lost_ends, | |||||
"Number of lost persist probe (no ack) that the run ended with a PERSIST abort"); | |||||
rack_small_ackcmp = counter_u64_alloc(M_WAITOK); | rack_small_ackcmp = counter_u64_alloc(M_WAITOK); | ||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_counters), | SYSCTL_CHILDREN(rack_counters), | ||||
OID_AUTO, "cmp_small_mbufs", CTLFLAG_RD, | OID_AUTO, "cmp_small_mbufs", CTLFLAG_RD, | ||||
&rack_small_ackcmp, | &rack_small_ackcmp, | ||||
"Number of TCP connections with small mbuf's for compressed acks"); | "Number of TCP connections with small mbuf's for compressed acks"); | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
rack_adjust_map_bw = counter_u64_alloc(M_WAITOK); | rack_adjust_map_bw = counter_u64_alloc(M_WAITOK); | ||||
▲ Show 20 Lines • Show All 1,148 Lines • ▼ Show 20 Lines | rack_counter_destroy(void) | ||||
counter_u64_free(rack_progress_drops); | counter_u64_free(rack_progress_drops); | ||||
counter_u64_free(rack_input_idle_reduces); | counter_u64_free(rack_input_idle_reduces); | ||||
counter_u64_free(rack_collapsed_win); | counter_u64_free(rack_collapsed_win); | ||||
counter_u64_free(rack_tlp_does_nada); | counter_u64_free(rack_tlp_does_nada); | ||||
counter_u64_free(rack_try_scwnd); | counter_u64_free(rack_try_scwnd); | ||||
counter_u64_free(rack_per_timer_hole); | counter_u64_free(rack_per_timer_hole); | ||||
counter_u64_free(rack_large_ackcmp); | counter_u64_free(rack_large_ackcmp); | ||||
counter_u64_free(rack_small_ackcmp); | counter_u64_free(rack_small_ackcmp); | ||||
counter_u64_free(rack_persists_sends); | |||||
counter_u64_free(rack_persists_acks); | |||||
counter_u64_free(rack_persists_loss); | |||||
counter_u64_free(rack_persists_lost_ends); | |||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
counter_u64_free(rack_adjust_map_bw); | counter_u64_free(rack_adjust_map_bw); | ||||
#endif | #endif | ||||
COUNTER_ARRAY_FREE(rack_out_size, TCP_MSS_ACCT_SIZE); | COUNTER_ARRAY_FREE(rack_out_size, TCP_MSS_ACCT_SIZE); | ||||
COUNTER_ARRAY_FREE(rack_opts_arry, RACK_OPTS_SIZE); | COUNTER_ARRAY_FREE(rack_opts_arry, RACK_OPTS_SIZE); | ||||
} | } | ||||
static struct rack_sendmap * | static struct rack_sendmap * | ||||
▲ Show 20 Lines • Show All 2,669 Lines • ▼ Show 20 Lines | if (rack->r_ctl.rc_scw) { | ||||
tcp_shared_cwnd_idle(rack->r_ctl.rc_scw, rack->r_ctl.rc_scw_index); | tcp_shared_cwnd_idle(rack->r_ctl.rc_scw, rack->r_ctl.rc_scw_index); | ||||
rack->rack_scwnd_is_idle = 1; | rack->rack_scwnd_is_idle = 1; | ||||
} | } | ||||
#endif | #endif | ||||
rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL); | rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL); | ||||
if (rack->r_ctl.rc_went_idle_time == 0) | if (rack->r_ctl.rc_went_idle_time == 0) | ||||
rack->r_ctl.rc_went_idle_time = 1; | rack->r_ctl.rc_went_idle_time = 1; | ||||
rack_timer_cancel(tp, rack, cts, __LINE__); | rack_timer_cancel(tp, rack, cts, __LINE__); | ||||
rack->r_ctl.persist_lost_ends = 0; | |||||
rack->probe_not_answered = 0; | |||||
rack->forced_ack = 0; | |||||
tp->t_rxtshift = 0; | tp->t_rxtshift = 0; | ||||
RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp), | RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp), | ||||
rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop); | rack_rto_min, rack_rto_max, rack->r_ctl.timer_slop); | ||||
rack->rc_in_persist = 1; | rack->rc_in_persist = 1; | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 855 Lines • ▼ Show 20 Lines | if (tp->t_timers->tt_flags & TT_STOPPED) { | ||||
return (1); | return (1); | ||||
} | } | ||||
if (rack->rc_in_persist == 0) | if (rack->rc_in_persist == 0) | ||||
return (0); | return (0); | ||||
if (ctf_progress_timeout_check(tp, false)) { | if (ctf_progress_timeout_check(tp, false)) { | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); | tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); | ||||
rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__); | rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__); | ||||
tcp_set_inp_to_drop(inp, ETIMEDOUT); | tcp_set_inp_to_drop(inp, ETIMEDOUT); | ||||
counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends); | |||||
return (1); | return (1); | ||||
} | } | ||||
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); | KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); | ||||
/* | /* | ||||
* Persistence timer into zero window. Force a byte to be output, if | * Persistence timer into zero window. Force a byte to be output, if | ||||
* possible. | * possible. | ||||
*/ | */ | ||||
KMOD_TCPSTAT_INC(tcps_persisttimeo); | KMOD_TCPSTAT_INC(tcps_persisttimeo); | ||||
/* | /* | ||||
* Hack: if the peer is dead/unreachable, we do not time out if the | * Hack: if the peer is dead/unreachable, we do not time out if the | ||||
* window is closed. After a full backoff, drop the connection if | * window is closed. After a full backoff, drop the connection if | ||||
* the idle time (no responses to probes) reaches the maximum | * the idle time (no responses to probes) reaches the maximum | ||||
* backoff that we would use if retransmitting. | * backoff that we would use if retransmitting. | ||||
*/ | */ | ||||
if (tp->t_rxtshift == TCP_MAXRXTSHIFT && | if (tp->t_rxtshift == TCP_MAXRXTSHIFT && | ||||
(ticks - tp->t_rcvtime >= tcp_maxpersistidle || | (ticks - tp->t_rcvtime >= tcp_maxpersistidle || | ||||
TICKS_2_USEC(ticks - tp->t_rcvtime) >= RACK_REXMTVAL(tp) * tcp_totbackoff)) { | TICKS_2_USEC(ticks - tp->t_rcvtime) >= RACK_REXMTVAL(tp) * tcp_totbackoff)) { | ||||
KMOD_TCPSTAT_INC(tcps_persistdrop); | KMOD_TCPSTAT_INC(tcps_persistdrop); | ||||
retval = 1; | retval = 1; | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); | tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); | ||||
tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); | tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); | ||||
counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends); | |||||
goto out; | goto out; | ||||
} | } | ||||
if ((sbavail(&rack->rc_inp->inp_socket->so_snd) == 0) && | if ((sbavail(&rack->rc_inp->inp_socket->so_snd) == 0) && | ||||
tp->snd_una == tp->snd_max) | tp->snd_una == tp->snd_max) | ||||
rack_exit_persist(tp, rack, cts); | rack_exit_persist(tp, rack, cts); | ||||
rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_PERSIT; | rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_PERSIT; | ||||
/* | /* | ||||
* If the user has closed the socket then drop a persisting | * If the user has closed the socket then drop a persisting | ||||
* connection after a much reduced timeout. | * connection after a much reduced timeout. | ||||
*/ | */ | ||||
if (tp->t_state > TCPS_CLOSE_WAIT && | if (tp->t_state > TCPS_CLOSE_WAIT && | ||||
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { | (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { | ||||
retval = 1; | retval = 1; | ||||
KMOD_TCPSTAT_INC(tcps_persistdrop); | KMOD_TCPSTAT_INC(tcps_persistdrop); | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); | tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); | ||||
tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); | tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); | ||||
counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends); | |||||
goto out; | goto out; | ||||
} | } | ||||
t_template = tcpip_maketemplate(rack->rc_inp); | t_template = tcpip_maketemplate(rack->rc_inp); | ||||
if (t_template) { | if (t_template) { | ||||
/* only set it if we were answered */ | /* only set it if we were answered */ | ||||
if (rack->forced_ack == 0) { | if (rack->forced_ack == 0) { | ||||
rack->forced_ack = 1; | rack->forced_ack = 1; | ||||
rack->r_ctl.forced_ack_ts = tcp_get_usecs(NULL); | rack->r_ctl.forced_ack_ts = tcp_get_usecs(NULL); | ||||
} else { | |||||
rack->probe_not_answered = 1; | |||||
counter_u64_add(rack_persists_loss, 1); | |||||
rack->r_ctl.persist_lost_ends++; | |||||
} | } | ||||
counter_u64_add(rack_persists_sends, 1); | |||||
tcp_respond(tp, t_template->tt_ipgen, | tcp_respond(tp, t_template->tt_ipgen, | ||||
&t_template->tt_t, (struct mbuf *)NULL, | &t_template->tt_t, (struct mbuf *)NULL, | ||||
tp->rcv_nxt, tp->snd_una - 1, 0); | tp->rcv_nxt, tp->snd_una - 1, 0); | ||||
/* This sends an ack */ | /* This sends an ack */ | ||||
if (tp->t_flags & TF_DELACK) | if (tp->t_flags & TF_DELACK) | ||||
tp->t_flags &= ~TF_DELACK; | tp->t_flags &= ~TF_DELACK; | ||||
free(t_template, M_TEMP); | free(t_template, M_TEMP); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && | ||||
* respond. | * respond. | ||||
*/ | */ | ||||
KMOD_TCPSTAT_INC(tcps_keepprobe); | KMOD_TCPSTAT_INC(tcps_keepprobe); | ||||
t_template = tcpip_maketemplate(inp); | t_template = tcpip_maketemplate(inp); | ||||
if (t_template) { | if (t_template) { | ||||
if (rack->forced_ack == 0) { | if (rack->forced_ack == 0) { | ||||
rack->forced_ack = 1; | rack->forced_ack = 1; | ||||
rack->r_ctl.forced_ack_ts = tcp_get_usecs(NULL); | rack->r_ctl.forced_ack_ts = tcp_get_usecs(NULL); | ||||
} else { | |||||
rack->probe_not_answered = 1; | |||||
} | } | ||||
tcp_respond(tp, t_template->tt_ipgen, | tcp_respond(tp, t_template->tt_ipgen, | ||||
&t_template->tt_t, (struct mbuf *)NULL, | &t_template->tt_t, (struct mbuf *)NULL, | ||||
tp->rcv_nxt, tp->snd_una - 1, 0); | tp->rcv_nxt, tp->snd_una - 1, 0); | ||||
free(t_template, M_TEMP); | free(t_template, M_TEMP); | ||||
} | } | ||||
} | } | ||||
rack_start_hpts_timer(rack, tp, cts, 0, 0, 0); | rack_start_hpts_timer(rack, tp, cts, 0, 0, 0); | ||||
▲ Show 20 Lines • Show All 3,683 Lines • ▼ Show 20 Lines | if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == | ||||
tp->rcv_scale = tp->request_r_scale; | tp->rcv_scale = tp->request_r_scale; | ||||
/* Send window already scaled. */ | /* Send window already scaled. */ | ||||
} | } | ||||
} | } | ||||
nsegs = max(1, m->m_pkthdr.lro_nsegs); | nsegs = max(1, m->m_pkthdr.lro_nsegs); | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
acked = BYTES_THIS_ACK(tp, th); | acked = BYTES_THIS_ACK(tp, th); | ||||
if (acked) { | |||||
/* | |||||
* Any time we move the cum-ack forward clear | |||||
* keep-alive tied probe-not-answered. The | |||||
* persists clears its own on entry. | |||||
*/ | |||||
rack->probe_not_answered = 0; | |||||
} | |||||
KMOD_TCPSTAT_ADD(tcps_rcvackpack, nsegs); | KMOD_TCPSTAT_ADD(tcps_rcvackpack, nsegs); | ||||
KMOD_TCPSTAT_ADD(tcps_rcvackbyte, acked); | KMOD_TCPSTAT_ADD(tcps_rcvackbyte, acked); | ||||
/* | /* | ||||
* If we just performed our first retransmit, and the ACK arrives | * If we just performed our first retransmit, and the ACK arrives | ||||
* within our recovery window, then it was a mistake to do the | * within our recovery window, then it was a mistake to do the | ||||
* retransmit in the first place. Recover our original cwnd and | * retransmit in the first place. Recover our original cwnd and | ||||
* ssthresh, and proceed to transmit where we left off. | * ssthresh, and proceed to transmit where we left off. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 3,057 Lines • ▼ Show 20 Lines | TCP_LOG_EVENTP(tp, th, | ||||
0, &log, true, <v); | 0, &log, true, <v); | ||||
if (xx) { | if (xx) { | ||||
tp->snd_una = orig_snd_una; | tp->snd_una = orig_snd_una; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
static void | |||||
rack_handle_probe_response(struct tcp_rack *rack, uint32_t tiwin, uint32_t us_cts) | |||||
{ | |||||
uint32_t us_rtt; | |||||
/* | |||||
* A persist or keep-alive was forced out, update our | |||||
* min rtt time. Note now worry about lost responses. | |||||
* When a subsequent keep-alive or persist times out | |||||
* and forced_ack is still on, then the last probe | |||||
* was not responded to. In such cases we have a | |||||
* sysctl that controls the behavior. Either we apply | |||||
* the rtt but with reduced confidence (0). Or we just | |||||
* plain don't apply the rtt estimate. Having data flow | |||||
* will clear the probe_not_answered flag i.e. cum-ack | |||||
* move forward <or> exiting and reentering persists. | |||||
*/ | |||||
rack->forced_ack = 0; | |||||
rack->rc_tp->t_rxtshift = 0; | |||||
if ((rack->rc_in_persist && | |||||
(tiwin == rack->rc_tp->snd_wnd)) || | |||||
(rack->rc_in_persist == 0)) { | |||||
/* | |||||
* In persists only apply the RTT update if this is | |||||
* a response to our window probe. And that | |||||
* means the rwnd sent must match the current | |||||
* snd_wnd. If it does not, then we got a | |||||
* window update ack instead. For keepalive | |||||
* we allow the answer no matter what the window. | |||||
* | |||||
* Note that if the probe_not_answered is set then | |||||
* the forced_ack_ts is the oldest one i.e. the first | |||||
* probe sent that might have been lost. This assures | |||||
* us that if we do calculate an RTT it is longer not | |||||
* some short thing. | |||||
*/ | |||||
if (rack->rc_in_persist) | |||||
counter_u64_add(rack_persists_acks, 1); | |||||
us_rtt = us_cts - rack->r_ctl.forced_ack_ts; | |||||
if (us_rtt == 0) | |||||
us_rtt = 1; | |||||
if (rack->probe_not_answered == 0) { | |||||
rack_apply_updated_usrtt(rack, us_rtt, us_cts); | |||||
tcp_rack_xmit_timer(rack, us_rtt, 0, us_rtt, 3, NULL, 1); | |||||
} else { | |||||
/* We have a retransmitted probe here too */ | |||||
if (rack_apply_rtt_with_reduced_conf) { | |||||
rack_apply_updated_usrtt(rack, us_rtt, us_cts); | |||||
tcp_rack_xmit_timer(rack, us_rtt, 0, us_rtt, 0, NULL, 1); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
static int | static int | ||||
rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mbuf *m, int nxt_pkt, struct timeval *tv) | rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mbuf *m, int nxt_pkt, struct timeval *tv) | ||||
{ | { | ||||
/* | /* | ||||
* Handle a "special" compressed ack mbuf. Each incoming | * Handle a "special" compressed ack mbuf. Each incoming | ||||
* ack has only four possible dispositions: | * ack has only four possible dispositions: | ||||
* | * | ||||
* A) It moves the cum-ack forward | * A) It moves the cum-ack forward | ||||
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | #endif | ||||
tiwin = ae->win << tp->snd_scale; | tiwin = ae->win << tp->snd_scale; | ||||
/* figure out the type of ack */ | /* figure out the type of ack */ | ||||
if (SEQ_LT(ae->ack, high_seq)) { | if (SEQ_LT(ae->ack, high_seq)) { | ||||
/* Case B*/ | /* Case B*/ | ||||
ae->ack_val_set = ACK_BEHIND; | ae->ack_val_set = ACK_BEHIND; | ||||
} else if (SEQ_GT(ae->ack, high_seq)) { | } else if (SEQ_GT(ae->ack, high_seq)) { | ||||
/* Case A */ | /* Case A */ | ||||
ae->ack_val_set = ACK_CUMACK; | ae->ack_val_set = ACK_CUMACK; | ||||
} else if (tiwin == the_win) { | } else if ((tiwin == the_win) && (rack->rc_in_persist == 0)){ | ||||
/* Case D */ | /* Case D */ | ||||
ae->ack_val_set = ACK_DUPACK; | ae->ack_val_set = ACK_DUPACK; | ||||
} else { | } else { | ||||
/* Case C */ | /* Case C */ | ||||
ae->ack_val_set = ACK_RWND; | ae->ack_val_set = ACK_RWND; | ||||
} | } | ||||
rack_log_input_packet(tp, rack, ae, ae->ack_val_set, high_seq); | rack_log_input_packet(tp, rack, ae, ae->ack_val_set, high_seq); | ||||
/* Validate timestamp */ | /* Validate timestamp */ | ||||
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines | if (ae->ack_val_set == ACK_BEHIND) { | ||||
counter_u64_add(rack_reorder_seen, 1); | counter_u64_add(rack_reorder_seen, 1); | ||||
rack->r_ctl.rc_reorder_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time); | rack->r_ctl.rc_reorder_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time); | ||||
} | } | ||||
} else if (ae->ack_val_set == ACK_DUPACK) { | } else if (ae->ack_val_set == ACK_DUPACK) { | ||||
/* Case D */ | /* Case D */ | ||||
rack_strike_dupack(rack); | rack_strike_dupack(rack); | ||||
} else if (ae->ack_val_set == ACK_RWND) { | } else if (ae->ack_val_set == ACK_RWND) { | ||||
/* Case C */ | /* Case C */ | ||||
if ((ae->flags & TSTMP_LRO) || (ae->flags & TSTMP_HDWR)) { | |||||
ts.tv_sec = ae->timestamp / 1000000000; | |||||
ts.tv_nsec = ae->timestamp % 1000000000; | |||||
rack->r_ctl.act_rcv_time.tv_sec = ts.tv_sec; | |||||
rack->r_ctl.act_rcv_time.tv_usec = ts.tv_nsec/1000; | |||||
} else { | |||||
rack->r_ctl.act_rcv_time = *tv; | |||||
} | |||||
if (rack->forced_ack) { | |||||
rack_handle_probe_response(rack, tiwin, | |||||
tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time)); | |||||
} | |||||
win_up_req = 1; | win_up_req = 1; | ||||
win_upd_ack = ae->ack; | win_upd_ack = ae->ack; | ||||
win_seq = ae->seq; | win_seq = ae->seq; | ||||
the_win = tiwin; | the_win = tiwin; | ||||
rack_do_win_updates(tp, rack, the_win, win_seq, win_upd_ack, cts, high_seq); | rack_do_win_updates(tp, rack, the_win, win_seq, win_upd_ack, cts, high_seq); | ||||
} else { | } else { | ||||
/* Case A */ | /* Case A */ | ||||
if (SEQ_GT(ae->ack, tp->snd_max)) { | if (SEQ_GT(ae->ack, tp->snd_max)) { | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | #ifdef TCP_ACCOUNTING | ||||
} | } | ||||
#endif | #endif | ||||
} | } | ||||
#ifdef TCP_ACCOUNTING | #ifdef TCP_ACCOUNTING | ||||
ts_val = get_cyclecount(); | ts_val = get_cyclecount(); | ||||
#endif | #endif | ||||
acked_amount = acked = (high_seq - tp->snd_una); | acked_amount = acked = (high_seq - tp->snd_una); | ||||
if (acked) { | if (acked) { | ||||
/* | |||||
* Clear the probe not answered flag | |||||
* since cum-ack moved forward. | |||||
*/ | |||||
rack->probe_not_answered = 0; | |||||
if (rack->sack_attack_disable == 0) | if (rack->sack_attack_disable == 0) | ||||
rack_do_decay(rack); | rack_do_decay(rack); | ||||
if (acked >= segsiz) { | if (acked >= segsiz) { | ||||
/* | /* | ||||
* You only get credit for | * You only get credit for | ||||
* MSS and greater (and you get extra | * MSS and greater (and you get extra | ||||
* credit for larger cum-ack moves). | * credit for larger cum-ack moves). | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 739 Lines • ▼ Show 20 Lines | #endif | ||||
if ((rack->rc_gp_dyn_mul) && | if ((rack->rc_gp_dyn_mul) && | ||||
(rack->use_fixed_rate == 0) && | (rack->use_fixed_rate == 0) && | ||||
(rack->rc_always_pace)) { | (rack->rc_always_pace)) { | ||||
/* Check in on probertt */ | /* Check in on probertt */ | ||||
rack_check_probe_rtt(rack, us_cts); | rack_check_probe_rtt(rack, us_cts); | ||||
} | } | ||||
rack_clear_rate_sample(rack); | rack_clear_rate_sample(rack); | ||||
if (rack->forced_ack) { | if (rack->forced_ack) { | ||||
uint32_t us_rtt; | rack_handle_probe_response(rack, tiwin, us_cts); | ||||
/* | |||||
* A persist or keep-alive was forced out, update our | |||||
* min rtt time. Note we do not worry about lost | |||||
* retransmissions since KEEP-ALIVES and persists | |||||
* are usually way long on times of sending (though | |||||
* if we were really paranoid or worried we could | |||||
* at least use timestamps if available to validate). | |||||
*/ | |||||
rack->forced_ack = 0; | |||||
if (tiwin == tp->snd_wnd) { | |||||
/* | |||||
* Only apply the RTT update if this is | |||||
* a response to our window probe. And that | |||||
* means the rwnd sent must match the current | |||||
* snd_wnd. If it does not, then we got a | |||||
* window update ack instead. | |||||
*/ | |||||
us_rtt = us_cts - rack->r_ctl.forced_ack_ts; | |||||
if (us_rtt == 0) | |||||
us_rtt = 1; | |||||
rack_apply_updated_usrtt(rack, us_rtt, us_cts); | |||||
tcp_rack_xmit_timer(rack, us_rtt, 0, us_rtt, 3, NULL, 1); | |||||
} | |||||
} | } | ||||
/* | /* | ||||
* This is the one exception case where we set the rack state | * This is the one exception case where we set the rack state | ||||
* always. All other times (timers etc) we must have a rack-state | * always. All other times (timers etc) we must have a rack-state | ||||
* set (so we assure we have done the checks above for SACK). | * set (so we assure we have done the checks above for SACK). | ||||
*/ | */ | ||||
rack->r_ctl.rc_rcvtime = cts; | rack->r_ctl.rc_rcvtime = cts; | ||||
if (rack->r_state != tp->t_state) | if (rack->r_state != tp->t_state) | ||||
▲ Show 20 Lines • Show All 6,422 Lines • Show Last 20 Lines |