Changeset View
Changeset View
Standalone View
Standalone View
head/sys/netinet/tcp_input.c
Show First 20 Lines • Show All 284 Lines • ▼ Show 20 Lines | cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type) | ||||
if (tp->snd_cwnd <= tp->snd_wnd) | if (tp->snd_cwnd <= tp->snd_wnd) | ||||
tp->ccv->flags |= CCF_CWND_LIMITED; | tp->ccv->flags |= CCF_CWND_LIMITED; | ||||
else | else | ||||
tp->ccv->flags &= ~CCF_CWND_LIMITED; | tp->ccv->flags &= ~CCF_CWND_LIMITED; | ||||
if (type == CC_ACK) { | if (type == CC_ACK) { | ||||
if (tp->snd_cwnd > tp->snd_ssthresh) { | if (tp->snd_cwnd > tp->snd_ssthresh) { | ||||
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, | tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, | ||||
V_tcp_abc_l_var * tp->t_maxseg); | V_tcp_abc_l_var * tcp_maxseg(tp)); | ||||
if (tp->t_bytes_acked >= tp->snd_cwnd) { | if (tp->t_bytes_acked >= tp->snd_cwnd) { | ||||
tp->t_bytes_acked -= tp->snd_cwnd; | tp->t_bytes_acked -= tp->snd_cwnd; | ||||
tp->ccv->flags |= CCF_ABC_SENTAWND; | tp->ccv->flags |= CCF_ABC_SENTAWND; | ||||
} | } | ||||
} else { | } else { | ||||
tp->ccv->flags &= ~CCF_ABC_SENTAWND; | tp->ccv->flags &= ~CCF_ABC_SENTAWND; | ||||
tp->t_bytes_acked = 0; | tp->t_bytes_acked = 0; | ||||
} | } | ||||
} | } | ||||
if (CC_ALGO(tp)->ack_received != NULL) { | if (CC_ALGO(tp)->ack_received != NULL) { | ||||
/* XXXLAS: Find a way to live without this */ | /* XXXLAS: Find a way to live without this */ | ||||
tp->ccv->curack = th->th_ack; | tp->ccv->curack = th->th_ack; | ||||
CC_ALGO(tp)->ack_received(tp->ccv, type); | CC_ALGO(tp)->ack_received(tp->ccv, type); | ||||
} | } | ||||
} | } | ||||
void | void | ||||
cc_conn_init(struct tcpcb *tp) | cc_conn_init(struct tcpcb *tp) | ||||
{ | { | ||||
struct hc_metrics_lite metrics; | struct hc_metrics_lite metrics; | ||||
struct inpcb *inp = tp->t_inpcb; | struct inpcb *inp = tp->t_inpcb; | ||||
u_int maxseg; | |||||
int rtt; | int rtt; | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
tcp_hc_get(&inp->inp_inc, &metrics); | tcp_hc_get(&inp->inp_inc, &metrics); | ||||
maxseg = tcp_maxseg(tp); | |||||
if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { | if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { | ||||
tp->t_srtt = rtt; | tp->t_srtt = rtt; | ||||
tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; | tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; | ||||
TCPSTAT_INC(tcps_usedrtt); | TCPSTAT_INC(tcps_usedrtt); | ||||
if (metrics.rmx_rttvar) { | if (metrics.rmx_rttvar) { | ||||
tp->t_rttvar = metrics.rmx_rttvar; | tp->t_rttvar = metrics.rmx_rttvar; | ||||
TCPSTAT_INC(tcps_usedrttvar); | TCPSTAT_INC(tcps_usedrttvar); | ||||
} else { | } else { | ||||
/* default variation is +- 1 rtt */ | /* default variation is +- 1 rtt */ | ||||
tp->t_rttvar = | tp->t_rttvar = | ||||
tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; | tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; | ||||
} | } | ||||
TCPT_RANGESET(tp->t_rxtcur, | TCPT_RANGESET(tp->t_rxtcur, | ||||
((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, | ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, | ||||
tp->t_rttmin, TCPTV_REXMTMAX); | tp->t_rttmin, TCPTV_REXMTMAX); | ||||
} | } | ||||
if (metrics.rmx_ssthresh) { | if (metrics.rmx_ssthresh) { | ||||
/* | /* | ||||
* There's some sort of gateway or interface | * There's some sort of gateway or interface | ||||
* buffer limit on the path. Use this to set | * buffer limit on the path. Use this to set | ||||
* the slow start threshhold, but set the | * the slow start threshhold, but set the | ||||
* threshold to no less than 2*mss. | * threshold to no less than 2*mss. | ||||
*/ | */ | ||||
tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh); | tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh); | ||||
TCPSTAT_INC(tcps_usedssthresh); | TCPSTAT_INC(tcps_usedssthresh); | ||||
} | } | ||||
/* | /* | ||||
* Set the initial slow-start flight size. | * Set the initial slow-start flight size. | ||||
* | * | ||||
* RFC5681 Section 3.1 specifies the default conservative values. | * RFC5681 Section 3.1 specifies the default conservative values. | ||||
* RFC3390 specifies slightly more aggressive values. | * RFC3390 specifies slightly more aggressive values. | ||||
* RFC6928 increases it to ten segments. | * RFC6928 increases it to ten segments. | ||||
* Support for user specified value for initial flight size. | * Support for user specified value for initial flight size. | ||||
* | * | ||||
* If a SYN or SYN/ACK was lost and retransmitted, we have to | * If a SYN or SYN/ACK was lost and retransmitted, we have to | ||||
* reduce the initial CWND to one segment as congestion is likely | * reduce the initial CWND to one segment as congestion is likely | ||||
* requiring us to be cautious. | * requiring us to be cautious. | ||||
*/ | */ | ||||
if (tp->snd_cwnd == 1) | if (tp->snd_cwnd == 1) | ||||
tp->snd_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ | tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */ | ||||
else if (V_tcp_initcwnd_segments) | else if (V_tcp_initcwnd_segments) | ||||
tp->snd_cwnd = min(V_tcp_initcwnd_segments * tp->t_maxseg, | tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg, | ||||
max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460)); | max(2 * maxseg, V_tcp_initcwnd_segments * 1460)); | ||||
else if (V_tcp_do_rfc3390) | else if (V_tcp_do_rfc3390) | ||||
tp->snd_cwnd = min(4 * tp->t_maxseg, | tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380)); | ||||
max(2 * tp->t_maxseg, 4380)); | |||||
else { | else { | ||||
/* Per RFC5681 Section 3.1 */ | /* Per RFC5681 Section 3.1 */ | ||||
if (tp->t_maxseg > 2190) | if (maxseg > 2190) | ||||
tp->snd_cwnd = 2 * tp->t_maxseg; | tp->snd_cwnd = 2 * maxseg; | ||||
else if (tp->t_maxseg > 1095) | else if (maxseg > 1095) | ||||
tp->snd_cwnd = 3 * tp->t_maxseg; | tp->snd_cwnd = 3 * maxseg; | ||||
else | else | ||||
tp->snd_cwnd = 4 * tp->t_maxseg; | tp->snd_cwnd = 4 * maxseg; | ||||
} | } | ||||
if (CC_ALGO(tp)->conn_init != NULL) | if (CC_ALGO(tp)->conn_init != NULL) | ||||
CC_ALGO(tp)->conn_init(tp->ccv); | CC_ALGO(tp)->conn_init(tp->ccv); | ||||
} | } | ||||
void inline | void inline | ||||
cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) | cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) | ||||
{ | { | ||||
u_int maxseg; | |||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
switch(type) { | switch(type) { | ||||
case CC_NDUPACK: | case CC_NDUPACK: | ||||
if (!IN_FASTRECOVERY(tp->t_flags)) { | if (!IN_FASTRECOVERY(tp->t_flags)) { | ||||
tp->snd_recover = tp->snd_max; | tp->snd_recover = tp->snd_max; | ||||
if (tp->t_flags & TF_ECN_PERMIT) | if (tp->t_flags & TF_ECN_PERMIT) | ||||
tp->t_flags |= TF_ECN_SND_CWR; | tp->t_flags |= TF_ECN_SND_CWR; | ||||
} | } | ||||
break; | break; | ||||
case CC_ECN: | case CC_ECN: | ||||
if (!IN_CONGRECOVERY(tp->t_flags)) { | if (!IN_CONGRECOVERY(tp->t_flags)) { | ||||
TCPSTAT_INC(tcps_ecn_rcwnd); | TCPSTAT_INC(tcps_ecn_rcwnd); | ||||
tp->snd_recover = tp->snd_max; | tp->snd_recover = tp->snd_max; | ||||
if (tp->t_flags & TF_ECN_PERMIT) | if (tp->t_flags & TF_ECN_PERMIT) | ||||
tp->t_flags |= TF_ECN_SND_CWR; | tp->t_flags |= TF_ECN_SND_CWR; | ||||
} | } | ||||
break; | break; | ||||
case CC_RTO: | case CC_RTO: | ||||
maxseg = tcp_maxseg(tp); | |||||
tp->t_dupacks = 0; | tp->t_dupacks = 0; | ||||
tp->t_bytes_acked = 0; | tp->t_bytes_acked = 0; | ||||
EXIT_RECOVERY(tp->t_flags); | EXIT_RECOVERY(tp->t_flags); | ||||
tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / | tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / | ||||
tp->t_maxseg) * tp->t_maxseg; | maxseg) * maxseg; | ||||
tp->snd_cwnd = tp->t_maxseg; | tp->snd_cwnd = maxseg; | ||||
break; | break; | ||||
case CC_RTO_ERR: | case CC_RTO_ERR: | ||||
TCPSTAT_INC(tcps_sndrexmitbad); | TCPSTAT_INC(tcps_sndrexmitbad); | ||||
/* RTO was unnecessary, so reset everything. */ | /* RTO was unnecessary, so reset everything. */ | ||||
tp->snd_cwnd = tp->snd_cwnd_prev; | tp->snd_cwnd = tp->snd_cwnd_prev; | ||||
tp->snd_ssthresh = tp->snd_ssthresh_prev; | tp->snd_ssthresh = tp->snd_ssthresh_prev; | ||||
tp->snd_recover = tp->snd_recover_prev; | tp->snd_recover = tp->snd_recover_prev; | ||||
if (tp->t_flags & TF_WASFRECOVERY) | if (tp->t_flags & TF_WASFRECOVERY) | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Indicate whether this ack should be delayed. We can delay the ack if | * Indicate whether this ack should be delayed. We can delay the ack if | ||||
* following conditions are met: | * following conditions are met: | ||||
* - There is no delayed ack timer in progress. | * - There is no delayed ack timer in progress. | ||||
* - Our last ack wasn't a 0-sized window. We never want to delay | * - Our last ack wasn't a 0-sized window. We never want to delay | ||||
* the ack that opens up a 0-sized window. | * the ack that opens up a 0-sized window. | ||||
* - LRO wasn't used for this segment. We make sure by checking that the | * - LRO wasn't used for this segment. We make sure by checking that the | ||||
* segment size is not larger than the MSS. | * segment size is not larger than the MSS. | ||||
* - Delayed acks are enabled or this is a half-synchronized T/TCP | |||||
* connection. | |||||
*/ | */ | ||||
#define DELAY_ACK(tp, tlen) \ | #define DELAY_ACK(tp, tlen) \ | ||||
((!tcp_timer_active(tp, TT_DELACK) && \ | ((!tcp_timer_active(tp, TT_DELACK) && \ | ||||
(tp->t_flags & TF_RXWIN0SENT) == 0) && \ | (tp->t_flags & TF_RXWIN0SENT) == 0) && \ | ||||
(tlen <= tp->t_maxopd) && \ | (tlen <= tp->t_maxseg) && \ | ||||
(V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) | (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) | ||||
static void inline | static void inline | ||||
cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos) | cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos) | ||||
{ | { | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
if (CC_ALGO(tp)->ecnpkt_handler != NULL) { | if (CC_ALGO(tp)->ecnpkt_handler != NULL) { | ||||
▲ Show 20 Lines • Show All 1,989 Lines • ▼ Show 20 Lines | else | ||||
* from the last ack with SACK doesn't get used. | * from the last ack with SACK doesn't get used. | ||||
*/ | */ | ||||
tp->sackhint.sacked_bytes = 0; | tp->sackhint.sacked_bytes = 0; | ||||
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ | /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ | ||||
hhook_run_tcp_est_in(tp, th, &to); | hhook_run_tcp_est_in(tp, th, &to); | ||||
if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
u_int maxseg; | |||||
maxseg = tcp_maxseg(tp); | |||||
if (tlen == 0 && | if (tlen == 0 && | ||||
(tiwin == tp->snd_wnd || | (tiwin == tp->snd_wnd || | ||||
(tp->t_flags & TF_SACK_PERMIT))) { | (tp->t_flags & TF_SACK_PERMIT))) { | ||||
/* | /* | ||||
* If this is the first time we've seen a | * If this is the first time we've seen a | ||||
* FIN from the remote, this is not a | * FIN from the remote, this is not a | ||||
* duplicate and it needs to be processed | * duplicate and it needs to be processed | ||||
* normally. This happens during a | * normally. This happens during a | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
*/ | */ | ||||
if (V_tcp_do_rfc6675_pipe) | if (V_tcp_do_rfc6675_pipe) | ||||
awnd = tcp_compute_pipe(tp); | awnd = tcp_compute_pipe(tp); | ||||
else | else | ||||
awnd = (tp->snd_nxt - tp->snd_fack) + | awnd = (tp->snd_nxt - tp->snd_fack) + | ||||
tp->sackhint.sack_bytes_rexmit; | tp->sackhint.sack_bytes_rexmit; | ||||
if (awnd < tp->snd_ssthresh) { | if (awnd < tp->snd_ssthresh) { | ||||
tp->snd_cwnd += tp->t_maxseg; | tp->snd_cwnd += maxseg; | ||||
if (tp->snd_cwnd > tp->snd_ssthresh) | if (tp->snd_cwnd > tp->snd_ssthresh) | ||||
tp->snd_cwnd = tp->snd_ssthresh; | tp->snd_cwnd = tp->snd_ssthresh; | ||||
} | } | ||||
} else | } else | ||||
tp->snd_cwnd += tp->t_maxseg; | tp->snd_cwnd += maxseg; | ||||
(void) tp->t_fb->tfb_tcp_output(tp); | (void) tp->t_fb->tfb_tcp_output(tp); | ||||
goto drop; | goto drop; | ||||
} else if (tp->t_dupacks == tcprexmtthresh) { | } else if (tp->t_dupacks == tcprexmtthresh) { | ||||
tcp_seq onxt = tp->snd_nxt; | tcp_seq onxt = tp->snd_nxt; | ||||
/* | /* | ||||
* If we're doing sack, check to | * If we're doing sack, check to | ||||
* see if we're already in sack | * see if we're already in sack | ||||
Show All 17 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
cc_cong_signal(tp, th, CC_NDUPACK); | cc_cong_signal(tp, th, CC_NDUPACK); | ||||
cc_ack_received(tp, th, CC_DUPACK); | cc_ack_received(tp, th, CC_DUPACK); | ||||
tcp_timer_activate(tp, TT_REXMT, 0); | tcp_timer_activate(tp, TT_REXMT, 0); | ||||
tp->t_rtttime = 0; | tp->t_rtttime = 0; | ||||
if (tp->t_flags & TF_SACK_PERMIT) { | if (tp->t_flags & TF_SACK_PERMIT) { | ||||
TCPSTAT_INC( | TCPSTAT_INC( | ||||
tcps_sack_recovery_episode); | tcps_sack_recovery_episode); | ||||
tp->sack_newdata = tp->snd_nxt; | tp->sack_newdata = tp->snd_nxt; | ||||
tp->snd_cwnd = tp->t_maxseg; | tp->snd_cwnd = maxseg; | ||||
(void) tp->t_fb->tfb_tcp_output(tp); | (void) tp->t_fb->tfb_tcp_output(tp); | ||||
goto drop; | goto drop; | ||||
} | } | ||||
tp->snd_nxt = th->th_ack; | tp->snd_nxt = th->th_ack; | ||||
tp->snd_cwnd = tp->t_maxseg; | tp->snd_cwnd = maxseg; | ||||
(void) tp->t_fb->tfb_tcp_output(tp); | (void) tp->t_fb->tfb_tcp_output(tp); | ||||
KASSERT(tp->snd_limited <= 2, | KASSERT(tp->snd_limited <= 2, | ||||
("%s: tp->snd_limited too big", | ("%s: tp->snd_limited too big", | ||||
__func__)); | __func__)); | ||||
tp->snd_cwnd = tp->snd_ssthresh + | tp->snd_cwnd = tp->snd_ssthresh + | ||||
tp->t_maxseg * | maxseg * | ||||
(tp->t_dupacks - tp->snd_limited); | (tp->t_dupacks - tp->snd_limited); | ||||
if (SEQ_GT(onxt, tp->snd_nxt)) | if (SEQ_GT(onxt, tp->snd_nxt)) | ||||
tp->snd_nxt = onxt; | tp->snd_nxt = onxt; | ||||
goto drop; | goto drop; | ||||
} else if (V_tcp_do_rfc3042) { | } else if (V_tcp_do_rfc3042) { | ||||
/* | /* | ||||
* Process first and second duplicate | * Process first and second duplicate | ||||
* ACKs. Each indicates a segment | * ACKs. Each indicates a segment | ||||
Show All 14 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
tp->t_dupacks == 2, | tp->t_dupacks == 2, | ||||
("%s: dupacks not 1 or 2", | ("%s: dupacks not 1 or 2", | ||||
__func__)); | __func__)); | ||||
if (tp->t_dupacks == 1) | if (tp->t_dupacks == 1) | ||||
tp->snd_limited = 0; | tp->snd_limited = 0; | ||||
tp->snd_cwnd = | tp->snd_cwnd = | ||||
(tp->snd_nxt - tp->snd_una) + | (tp->snd_nxt - tp->snd_una) + | ||||
(tp->t_dupacks - tp->snd_limited) * | (tp->t_dupacks - tp->snd_limited) * | ||||
tp->t_maxseg; | maxseg; | ||||
/* | /* | ||||
* Only call tcp_output when there | * Only call tcp_output when there | ||||
* is new data available to be sent. | * is new data available to be sent. | ||||
* Otherwise we would send pure ACKs. | * Otherwise we would send pure ACKs. | ||||
*/ | */ | ||||
SOCKBUF_LOCK(&so->so_snd); | SOCKBUF_LOCK(&so->so_snd); | ||||
avail = sbavail(&so->so_snd) - | avail = sbavail(&so->so_snd) - | ||||
(tp->snd_nxt - tp->snd_una); | (tp->snd_nxt - tp->snd_una); | ||||
SOCKBUF_UNLOCK(&so->so_snd); | SOCKBUF_UNLOCK(&so->so_snd); | ||||
if (avail > 0) | if (avail > 0) | ||||
(void) tp->t_fb->tfb_tcp_output(tp); | (void) tp->t_fb->tfb_tcp_output(tp); | ||||
sent = tp->snd_max - oldsndmax; | sent = tp->snd_max - oldsndmax; | ||||
if (sent > tp->t_maxseg) { | if (sent > maxseg) { | ||||
KASSERT((tp->t_dupacks == 2 && | KASSERT((tp->t_dupacks == 2 && | ||||
tp->snd_limited == 0) || | tp->snd_limited == 0) || | ||||
(sent == tp->t_maxseg + 1 && | (sent == maxseg + 1 && | ||||
tp->t_flags & TF_SENTFIN), | tp->t_flags & TF_SENTFIN), | ||||
("%s: sent too much", | ("%s: sent too much", | ||||
__func__)); | __func__)); | ||||
tp->snd_limited = 2; | tp->snd_limited = 2; | ||||
} else if (sent > 0) | } else if (sent > 0) | ||||
++tp->snd_limited; | ++tp->snd_limited; | ||||
tp->snd_cwnd = oldcwnd; | tp->snd_cwnd = oldcwnd; | ||||
goto drop; | goto drop; | ||||
▲ Show 20 Lines • Show All 836 Lines • ▼ Show 20 Lines | |||||
* or the destination isn't local, use a default, hopefully conservative | * or the destination isn't local, use a default, hopefully conservative | ||||
* size (usually 512 or the default IP max size, but no more than the mtu | * size (usually 512 or the default IP max size, but no more than the mtu | ||||
* of the interface), as we can't discover anything about intervening | * of the interface), as we can't discover anything about intervening | ||||
* gateways or networks. We also initialize the congestion/slow start | * gateways or networks. We also initialize the congestion/slow start | ||||
* window to be a single segment if the destination isn't local. | * window to be a single segment if the destination isn't local. | ||||
* While looking at the routing entry, we also initialize other path-dependent | * While looking at the routing entry, we also initialize other path-dependent | ||||
* parameters from pre-set or cached values in the routing entry. | * parameters from pre-set or cached values in the routing entry. | ||||
* | * | ||||
* Also take into account the space needed for options that we | * NOTE that resulting t_maxseg doesn't include space for TCP options or | ||||
* send regularly. Make maxseg shorter by that amount to assure | * IP options, e.g. IPSEC data, since length of this data may vary, and | ||||
* that we can send maxseg amount of data even when the options | * thus it is calculated for every segment separately in tcp_output(). | ||||
* are present. Store the upper limit of the length of options plus | |||||
* data in maxopd. | |||||
* | * | ||||
* NOTE that this routine is only called when we process an incoming | * NOTE that this routine is only called when we process an incoming | ||||
* segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS | * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS | ||||
* settings are handled in tcp_mssopt(). | * settings are handled in tcp_mssopt(). | ||||
*/ | */ | ||||
void | void | ||||
tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, | tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, | ||||
struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap) | struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap) | ||||
{ | { | ||||
int mss = 0; | int mss = 0; | ||||
u_long maxmtu = 0; | u_long maxmtu = 0; | ||||
struct inpcb *inp = tp->t_inpcb; | struct inpcb *inp = tp->t_inpcb; | ||||
struct hc_metrics_lite metrics; | struct hc_metrics_lite metrics; | ||||
int origoffer; | |||||
#ifdef INET6 | #ifdef INET6 | ||||
int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; | int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; | ||||
size_t min_protoh = isipv6 ? | size_t min_protoh = isipv6 ? | ||||
sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : | sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : | ||||
sizeof (struct tcpiphdr); | sizeof (struct tcpiphdr); | ||||
#else | #else | ||||
const size_t min_protoh = sizeof(struct tcpiphdr); | const size_t min_protoh = sizeof(struct tcpiphdr); | ||||
#endif | #endif | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
if (mtuoffer != -1) { | if (mtuoffer != -1) { | ||||
KASSERT(offer == -1, ("%s: conflict", __func__)); | KASSERT(offer == -1, ("%s: conflict", __func__)); | ||||
offer = mtuoffer - min_protoh; | offer = mtuoffer - min_protoh; | ||||
} | } | ||||
origoffer = offer; | |||||
/* Initialize. */ | /* Initialize. */ | ||||
#ifdef INET6 | #ifdef INET6 | ||||
if (isipv6) { | if (isipv6) { | ||||
maxmtu = tcp_maxmtu6(&inp->inp_inc, cap); | maxmtu = tcp_maxmtu6(&inp->inp_inc, cap); | ||||
tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt; | tp->t_maxseg = V_tcp_v6mssdflt; | ||||
} | } | ||||
#endif | #endif | ||||
#if defined(INET) && defined(INET6) | #if defined(INET) && defined(INET6) | ||||
else | else | ||||
#endif | #endif | ||||
#ifdef INET | #ifdef INET | ||||
{ | { | ||||
maxmtu = tcp_maxmtu(&inp->inp_inc, cap); | maxmtu = tcp_maxmtu(&inp->inp_inc, cap); | ||||
tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt; | tp->t_maxseg = V_tcp_mssdflt; | ||||
} | } | ||||
#endif | #endif | ||||
/* | /* | ||||
* No route to sender, stay with default mss and return. | * No route to sender, stay with default mss and return. | ||||
*/ | */ | ||||
if (maxmtu == 0) { | if (maxmtu == 0) { | ||||
/* | /* | ||||
* In case we return early we need to initialize metrics | * In case we return early we need to initialize metrics | ||||
* to a defined state as tcp_hc_get() would do for us | * to a defined state as tcp_hc_get() would do for us | ||||
* if there was no cache hit. | * if there was no cache hit. | ||||
*/ | */ | ||||
if (metricptr != NULL) | if (metricptr != NULL) | ||||
bzero(metricptr, sizeof(struct hc_metrics_lite)); | bzero(metricptr, sizeof(struct hc_metrics_lite)); | ||||
return; | return; | ||||
} | } | ||||
/* What have we got? */ | /* What have we got? */ | ||||
switch (offer) { | switch (offer) { | ||||
case 0: | case 0: | ||||
/* | /* | ||||
* Offer == 0 means that there was no MSS on the SYN | * Offer == 0 means that there was no MSS on the SYN | ||||
* segment, in this case we use tcp_mssdflt as | * segment, in this case we use tcp_mssdflt as | ||||
* already assigned to t_maxopd above. | * already assigned to t_maxseg above. | ||||
*/ | */ | ||||
offer = tp->t_maxopd; | offer = tp->t_maxseg; | ||||
break; | break; | ||||
case -1: | case -1: | ||||
/* | /* | ||||
* Offer == -1 means that we didn't receive SYN yet. | * Offer == -1 means that we didn't receive SYN yet. | ||||
*/ | */ | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | #endif | ||||
* to before. Then, they will reply with an MSS value which | * to before. Then, they will reply with an MSS value which | ||||
* will get recorded and the new parameters should get | * will get recorded and the new parameters should get | ||||
* recomputed. For Further Study. | * recomputed. For Further Study. | ||||
*/ | */ | ||||
} | } | ||||
mss = min(mss, offer); | mss = min(mss, offer); | ||||
/* | /* | ||||
* Sanity check: make sure that maxopd will be large | * Sanity check: make sure that maxseg will be large | ||||
* enough to allow some data on segments even if the | * enough to allow some data on segments even if the | ||||
* all the option space is used (40bytes). Otherwise | * all the option space is used (40bytes). Otherwise | ||||
* funny things may happen in tcp_output. | * funny things may happen in tcp_output. | ||||
* | |||||
* XXXGL: shouldn't we reserve space for IP/IPv6 options? | |||||
*/ | */ | ||||
mss = max(mss, 64); | mss = max(mss, 64); | ||||
/* | |||||
* maxopd stores the maximum length of data AND options | |||||
* in a segment; maxseg is the amount of data in a normal | |||||
* segment. We need to store this value (maxopd) apart | |||||
* from maxseg, because now every segment carries options | |||||
* and thus we normally have somewhat less data in segments. | |||||
*/ | |||||
tp->t_maxopd = mss; | |||||
/* | |||||
* origoffer==-1 indicates that no segments were received yet. | |||||
* In this case we just guess. | |||||
*/ | |||||
if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && | |||||
(origoffer == -1 || | |||||
(tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) | |||||
mss -= TCPOLEN_TSTAMP_APPA; | |||||
tp->t_maxseg = mss; | tp->t_maxseg = mss; | ||||
} | } | ||||
void | void | ||||
tcp_mss(struct tcpcb *tp, int offer) | tcp_mss(struct tcpcb *tp, int offer) | ||||
{ | { | ||||
int mss; | int mss; | ||||
u_long bufsize; | u_long bufsize; | ||||
▲ Show 20 Lines • Show All 106 Lines • ▼ Show 20 Lines | |||||
* next unacknowledged segment. Do not clear tp->t_dupacks. | * next unacknowledged segment. Do not clear tp->t_dupacks. | ||||
* By setting snd_nxt to ti_ack, this forces retransmission timer to | * By setting snd_nxt to ti_ack, this forces retransmission timer to | ||||
* be started again. | * be started again. | ||||
*/ | */ | ||||
void | void | ||||
tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) | tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) | ||||
{ | { | ||||
tcp_seq onxt = tp->snd_nxt; | tcp_seq onxt = tp->snd_nxt; | ||||
u_long ocwnd = tp->snd_cwnd; | u_long ocwnd = tp->snd_cwnd; | ||||
u_int maxseg = tcp_maxseg(tp); | |||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
tcp_timer_activate(tp, TT_REXMT, 0); | tcp_timer_activate(tp, TT_REXMT, 0); | ||||
tp->t_rtttime = 0; | tp->t_rtttime = 0; | ||||
tp->snd_nxt = th->th_ack; | tp->snd_nxt = th->th_ack; | ||||
/* | /* | ||||
* Set snd_cwnd to one segment beyond acknowledged offset. | * Set snd_cwnd to one segment beyond acknowledged offset. | ||||
* (tp->snd_una has not yet been updated when this function is called.) | * (tp->snd_una has not yet been updated when this function is called.) | ||||
*/ | */ | ||||
tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th); | tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th); | ||||
tp->t_flags |= TF_ACKNOW; | tp->t_flags |= TF_ACKNOW; | ||||
(void) tp->t_fb->tfb_tcp_output(tp); | (void) tp->t_fb->tfb_tcp_output(tp); | ||||
tp->snd_cwnd = ocwnd; | tp->snd_cwnd = ocwnd; | ||||
if (SEQ_GT(onxt, tp->snd_nxt)) | if (SEQ_GT(onxt, tp->snd_nxt)) | ||||
tp->snd_nxt = onxt; | tp->snd_nxt = onxt; | ||||
/* | /* | ||||
* Partial window deflation. Relies on fact that tp->snd_una | * Partial window deflation. Relies on fact that tp->snd_una | ||||
* not updated yet. | * not updated yet. | ||||
*/ | */ | ||||
if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) | if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) | ||||
tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); | tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); | ||||
else | else | ||||
tp->snd_cwnd = 0; | tp->snd_cwnd = 0; | ||||
tp->snd_cwnd += tp->t_maxseg; | tp->snd_cwnd += maxseg; | ||||
} | } | ||||
int | int | ||||
tcp_compute_pipe(struct tcpcb *tp) | tcp_compute_pipe(struct tcpcb *tp) | ||||
{ | { | ||||
return (tp->snd_max - tp->snd_una + | return (tp->snd_max - tp->snd_una + | ||||
tp->sackhint.sack_bytes_rexmit - | tp->sackhint.sack_bytes_rexmit - | ||||
tp->sackhint.sacked_bytes); | tp->sackhint.sacked_bytes); | ||||
} | } |