Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_stacks/rack.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 486 Lines • ▼ Show 20 Lines | |||||
static int32_t rack_handoff_ok(struct tcpcb *tp); | static int32_t rack_handoff_ok(struct tcpcb *tp); | ||||
static int32_t rack_init(struct tcpcb *tp); | static int32_t rack_init(struct tcpcb *tp); | ||||
static void rack_init_sysctls(void); | static void rack_init_sysctls(void); | ||||
static void | static void | ||||
rack_log_ack(struct tcpcb *tp, struct tcpopt *to, | rack_log_ack(struct tcpcb *tp, struct tcpopt *to, | ||||
struct tcphdr *th, int entered_rec, int dup_ack_struck); | struct tcphdr *th, int entered_rec, int dup_ack_struck); | ||||
static void | static void | ||||
rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len, | rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len, | ||||
uint32_t seq_out, uint8_t th_flags, int32_t err, uint64_t ts, | uint32_t seq_out, uint16_t th_flags, int32_t err, uint64_t ts, | ||||
struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff, int hw_tls); | struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff, int hw_tls); | ||||
static void | static void | ||||
rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack, | rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack, | ||||
struct rack_sendmap *rsm); | struct rack_sendmap *rsm); | ||||
static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rsm); | static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rsm); | ||||
static int32_t rack_output(struct tcpcb *tp); | static int32_t rack_output(struct tcpcb *tp); | ||||
▲ Show 20 Lines • Show All 6,909 Lines • ▼ Show 20 Lines | #endif | ||||
/* Log a split of rsm into rsm and nrsm */ | /* Log a split of rsm into rsm and nrsm */ | ||||
rack_log_map_chg(tp, rack, NULL, rsm, nrsm, MAP_SPLIT, 0, __LINE__); | rack_log_map_chg(tp, rack, NULL, rsm, nrsm, MAP_SPLIT, 0, __LINE__); | ||||
*lenp = 0; | *lenp = 0; | ||||
return (0); | return (0); | ||||
} | } | ||||
static void | static void | ||||
rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len, | rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len, | ||||
uint32_t seq_out, uint8_t th_flags, int32_t err, uint64_t cts, | uint32_t seq_out, uint16_t th_flags, int32_t err, uint64_t cts, | ||||
struct rack_sendmap *hintrsm, uint16_t add_flag, struct mbuf *s_mb, uint32_t s_moff, int hw_tls) | struct rack_sendmap *hintrsm, uint16_t add_flag, struct mbuf *s_mb, uint32_t s_moff, int hw_tls) | ||||
{ | { | ||||
struct tcp_rack *rack; | struct tcp_rack *rack; | ||||
struct rack_sendmap *rsm, *nrsm, fe; | struct rack_sendmap *rsm, *nrsm, fe; | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
struct rack_sendmap *insret; | struct rack_sendmap *insret; | ||||
#endif | #endif | ||||
register uint32_t snd_max, snd_una; | register uint32_t snd_max, snd_una; | ||||
▲ Show 20 Lines • Show All 2,218 Lines • ▼ Show 20 Lines | rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered_recovery, int dup_ack_struck) | ||||
register uint32_t th_ack; | register uint32_t th_ack; | ||||
int32_t i, j, k, num_sack_blks = 0; | int32_t i, j, k, num_sack_blks = 0; | ||||
uint32_t cts, acked, ack_point; | uint32_t cts, acked, ack_point; | ||||
int loop_start = 0, moved_two = 0; | int loop_start = 0, moved_two = 0; | ||||
uint32_t tsused; | uint32_t tsused; | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
if (th->th_flags & TH_RST) { | if (tcp_get_flags(th) & TH_RST) { | ||||
/* We don't log resets */ | /* We don't log resets */ | ||||
return; | return; | ||||
} | } | ||||
rack = (struct tcp_rack *)tp->t_fb_ptr; | rack = (struct tcp_rack *)tp->t_fb_ptr; | ||||
cts = tcp_get_usecs(NULL); | cts = tcp_get_usecs(NULL); | ||||
rsm = RB_MIN(rack_rb_tree_head, &rack->r_ctl.rc_mtree); | rsm = RB_MIN(rack_rb_tree_head, &rack->r_ctl.rc_mtree); | ||||
changed = 0; | changed = 0; | ||||
th_ack = th->th_ack; | th_ack = th->th_ack; | ||||
▲ Show 20 Lines • Show All 1,135 Lines • ▼ Show 20 Lines | #endif | ||||
((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && | ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && | ||||
(tp->t_fbyte_in == 0)) { | (tp->t_fbyte_in == 0)) { | ||||
tp->t_fbyte_in = ticks; | tp->t_fbyte_in = ticks; | ||||
if (tp->t_fbyte_in == 0) | if (tp->t_fbyte_in == 0) | ||||
tp->t_fbyte_in = 1; | tp->t_fbyte_in = 1; | ||||
if (tp->t_fbyte_out && tp->t_fbyte_in) | if (tp->t_fbyte_out && tp->t_fbyte_in) | ||||
tp->t_flags2 |= TF2_FBYTES_COMPLETE; | tp->t_flags2 |= TF2_FBYTES_COMPLETE; | ||||
} | } | ||||
thflags = th->th_flags & TH_FIN; | thflags = tcp_get_flags(th) & TH_FIN; | ||||
KMOD_TCPSTAT_ADD(tcps_rcvpack, nsegs); | KMOD_TCPSTAT_ADD(tcps_rcvpack, nsegs); | ||||
KMOD_TCPSTAT_ADD(tcps_rcvbyte, tlen); | KMOD_TCPSTAT_ADD(tcps_rcvbyte, tlen); | ||||
SOCKBUF_LOCK(&so->so_rcv); | SOCKBUF_LOCK(&so->so_rcv); | ||||
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { | if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { | ||||
m_freem(m); | m_freem(m); | ||||
} else | } else | ||||
#ifdef NETFLIX_SB_LIMITS | #ifdef NETFLIX_SB_LIMITS | ||||
appended = | appended = | ||||
▲ Show 20 Lines • Show All 2,592 Lines • ▼ Show 20 Lines | #endif | ||||
memset(tcp_hdr_buf, 0, sizeof(tcp_hdr_buf)); | memset(tcp_hdr_buf, 0, sizeof(tcp_hdr_buf)); | ||||
th = (struct tcphdr *)tcp_hdr_buf; | th = (struct tcphdr *)tcp_hdr_buf; | ||||
th->th_seq = ae->seq; | th->th_seq = ae->seq; | ||||
th->th_ack = ae->ack; | th->th_ack = ae->ack; | ||||
th->th_win = ae->win; | th->th_win = ae->win; | ||||
/* Now fill in the ports */ | /* Now fill in the ports */ | ||||
th->th_sport = tp->t_inpcb->inp_fport; | th->th_sport = tp->t_inpcb->inp_fport; | ||||
th->th_dport = tp->t_inpcb->inp_lport; | th->th_dport = tp->t_inpcb->inp_lport; | ||||
th->th_flags = ae->flags & 0xff; | tcp_set_flags(th, ae->flags); | ||||
/* Now do we have a timestamp option? */ | /* Now do we have a timestamp option? */ | ||||
if (ae->flags & HAS_TSTMP) { | if (ae->flags & HAS_TSTMP) { | ||||
u_char *cp; | u_char *cp; | ||||
uint32_t val; | uint32_t val; | ||||
th->th_off = ((sizeof(struct tcphdr) + TCPOLEN_TSTAMP_APPA) >> 2); | th->th_off = ((sizeof(struct tcphdr) + TCPOLEN_TSTAMP_APPA) >> 2); | ||||
cp = (u_char *)(th + 1); | cp = (u_char *)(th + 1); | ||||
*cp = TCPOPT_NOP; | *cp = TCPOPT_NOP; | ||||
▲ Show 20 Lines • Show All 816 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
if (m->m_flags & M_ACKCMP) { | if (m->m_flags & M_ACKCMP) { | ||||
panic("Impossible reach m has ackcmp? m:%p tp:%p", m, tp); | panic("Impossible reach m has ackcmp? m:%p tp:%p", m, tp); | ||||
} | } | ||||
cts = tcp_tv_to_usectick(tv); | cts = tcp_tv_to_usectick(tv); | ||||
ms_cts = tcp_tv_to_mssectick(tv); | ms_cts = tcp_tv_to_mssectick(tv); | ||||
nsegs = m->m_pkthdr.lro_nsegs; | nsegs = m->m_pkthdr.lro_nsegs; | ||||
counter_u64_add(rack_proc_non_comp_ack, 1); | counter_u64_add(rack_proc_non_comp_ack, 1); | ||||
thflags = th->th_flags; | thflags = tcp_get_flags(th); | ||||
#ifdef TCP_ACCOUNTING | #ifdef TCP_ACCOUNTING | ||||
sched_pin(); | sched_pin(); | ||||
if (thflags & TH_ACK) | if (thflags & TH_ACK) | ||||
ts_val = get_cyclecount(); | ts_val = get_cyclecount(); | ||||
#endif | #endif | ||||
if ((m->m_flags & M_TSTMP) || | if ((m->m_flags & M_TSTMP) || | ||||
(m->m_flags & M_TSTMP_LRO)) { | (m->m_flags & M_TSTMP_LRO)) { | ||||
mbuf_tstmp2timespec(m, &ts); | mbuf_tstmp2timespec(m, &ts); | ||||
▲ Show 20 Lines • Show All 339 Lines • ▼ Show 20 Lines | #endif | ||||
if ((rack->rc_gp_dyn_mul) && | if ((rack->rc_gp_dyn_mul) && | ||||
(rack->use_fixed_rate == 0) && | (rack->use_fixed_rate == 0) && | ||||
(rack->rc_always_pace)) { | (rack->rc_always_pace)) { | ||||
/* Check in on probertt */ | /* Check in on probertt */ | ||||
rack_check_probe_rtt(rack, us_cts); | rack_check_probe_rtt(rack, us_cts); | ||||
} | } | ||||
rack_clear_rate_sample(rack); | rack_clear_rate_sample(rack); | ||||
if ((rack->forced_ack) && | if ((rack->forced_ack) && | ||||
((th->th_flags & TH_RST) == 0)) { | ((tcp_get_flags(th) & TH_RST) == 0)) { | ||||
rack_handle_probe_response(rack, tiwin, us_cts); | rack_handle_probe_response(rack, tiwin, us_cts); | ||||
} | } | ||||
/* | /* | ||||
* This is the one exception case where we set the rack state | * This is the one exception case where we set the rack state | ||||
* always. All other times (timers etc) we must have a rack-state | * always. All other times (timers etc) we must have a rack-state | ||||
* set (so we assure we have done the checks above for SACK). | * set (so we assure we have done the checks above for SACK). | ||||
*/ | */ | ||||
rack->r_ctl.rc_rcvtime = cts; | rack->r_ctl.rc_rcvtime = cts; | ||||
▲ Show 20 Lines • Show All 1,391 Lines • ▼ Show 20 Lines | #endif | ||||
* if the full retransmission is made. If | * if the full retransmission is made. If | ||||
* we are sending less than this is the | * we are sending less than this is the | ||||
* left hand edge and should not have | * left hand edge and should not have | ||||
* the PUSH bit. | * the PUSH bit. | ||||
*/ | */ | ||||
if ((rsm->r_flags & RACK_HAD_PUSH) && | if ((rsm->r_flags & RACK_HAD_PUSH) && | ||||
(len == (rsm->r_end - rsm->r_start))) | (len == (rsm->r_end - rsm->r_start))) | ||||
flags |= TH_PUSH; | flags |= TH_PUSH; | ||||
th->th_flags = flags; | tcp_set_flags(th, flags); | ||||
th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale)); | th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale)); | ||||
if (th->th_win == 0) { | if (th->th_win == 0) { | ||||
tp->t_sndzerowin++; | tp->t_sndzerowin++; | ||||
tp->t_flags |= TF_RXWIN0SENT; | tp->t_flags |= TF_RXWIN0SENT; | ||||
} else | } else | ||||
tp->t_flags &= ~TF_RXWIN0SENT; | tp->t_flags &= ~TF_RXWIN0SENT; | ||||
if (rsm->r_flags & RACK_TLP) { | if (rsm->r_flags & RACK_TLP) { | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 460 Lines • ▼ Show 20 Lines | if ((tso == 0) && (len > segsiz)) | ||||
len = segsiz; | len = segsiz; | ||||
if ((len == 0) || | if ((len == 0) || | ||||
(len <= MHLEN - hdrlen - max_linkhdr)) { | (len <= MHLEN - hdrlen - max_linkhdr)) { | ||||
goto failed; | goto failed; | ||||
} | } | ||||
sb_offset = tp->snd_max - tp->snd_una; | sb_offset = tp->snd_max - tp->snd_una; | ||||
th->th_seq = htonl(tp->snd_max); | th->th_seq = htonl(tp->snd_max); | ||||
th->th_ack = htonl(tp->rcv_nxt); | th->th_ack = htonl(tp->rcv_nxt); | ||||
th->th_flags = flags; | tcp_set_flags(th, flags); | ||||
th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale)); | th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale)); | ||||
if (th->th_win == 0) { | if (th->th_win == 0) { | ||||
tp->t_sndzerowin++; | tp->t_sndzerowin++; | ||||
tp->t_flags |= TF_RXWIN0SENT; | tp->t_flags |= TF_RXWIN0SENT; | ||||
} else | } else | ||||
tp->t_flags &= ~TF_RXWIN0SENT; | tp->t_flags &= ~TF_RXWIN0SENT; | ||||
tp->snd_up = tp->snd_una; /* drag it along, its deprecated */ | tp->snd_up = tp->snd_una; /* drag it along, its deprecated */ | ||||
KMOD_TCPSTAT_INC(tcps_sndpack); | KMOD_TCPSTAT_INC(tcps_sndpack); | ||||
Show All 14 Lines | if (len <= segsiz) { | ||||
* shorten it to no longer need tso. Lets | * shorten it to no longer need tso. Lets | ||||
* not put on sendalot since we are low on | * not put on sendalot since we are low on | ||||
* mbufs. | * mbufs. | ||||
*/ | */ | ||||
tso = 0; | tso = 0; | ||||
} | } | ||||
if (rack->r_ctl.fsb.rfo_apply_push && | if (rack->r_ctl.fsb.rfo_apply_push && | ||||
(len == rack->r_ctl.fsb.left_to_send)) { | (len == rack->r_ctl.fsb.left_to_send)) { | ||||
th->th_flags |= TH_PUSH; | tcp_set_flags(th, flags | TH_PUSH); | ||||
add_flag |= RACK_HAD_PUSH; | add_flag |= RACK_HAD_PUSH; | ||||
} | } | ||||
if ((m->m_next == NULL) || (len <= 0)){ | if ((m->m_next == NULL) || (len <= 0)){ | ||||
goto failed; | goto failed; | ||||
} | } | ||||
if (udp) { | if (udp) { | ||||
if (rack->r_is_v6) | if (rack->r_is_v6) | ||||
ulen = hdrlen + len - sizeof(struct ip6_hdr); | ulen = hdrlen + len - sizeof(struct ip6_hdr); | ||||
▲ Show 20 Lines • Show All 2,125 Lines • ▼ Show 20 Lines | if (len || (flags & (TH_SYN | TH_FIN))) { | ||||
th->th_seq = htonl(tp->snd_max); | th->th_seq = htonl(tp->snd_max); | ||||
rack_seq = tp->snd_max; | rack_seq = tp->snd_max; | ||||
} | } | ||||
} else { | } else { | ||||
th->th_seq = htonl(rsm->r_start); | th->th_seq = htonl(rsm->r_start); | ||||
rack_seq = rsm->r_start; | rack_seq = rsm->r_start; | ||||
} | } | ||||
th->th_ack = htonl(tp->rcv_nxt); | th->th_ack = htonl(tp->rcv_nxt); | ||||
th->th_flags = flags; | tcp_set_flags(th, flags); | ||||
/* | /* | ||||
* Calculate receive window. Don't shrink window, but avoid silly | * Calculate receive window. Don't shrink window, but avoid silly | ||||
* window syndrome. | * window syndrome. | ||||
* If a RST segment is sent, advertise a window of zero. | * If a RST segment is sent, advertise a window of zero. | ||||
*/ | */ | ||||
if (flags & TH_RST) { | if (flags & TH_RST) { | ||||
recwin = 0; | recwin = 0; | ||||
} else { | } else { | ||||
▲ Show 20 Lines • Show All 2,370 Lines • Show Last 20 Lines |