Index: sys/netinet/cc/cc_cubic.h =================================================================== --- sys/netinet/cc/cc_cubic.h +++ sys/netinet/cc/cc_cubic.h @@ -42,6 +42,8 @@ /* Number of bits of precision for fixed point math calcs. */ #define CUBIC_SHIFT 8 +#define CUBIC_SCALE (1<> CUBIC_SHIFT); + smss) << CUBIC_SHIFT) / (TWO_SUB_PT2 * rtt_ticks))) >> CUBIC_SHIFT); } #endif /* _NETINET_CC_CUBIC_H_ */ Index: sys/netinet/cc/cc_cubic.c =================================================================== --- sys/netinet/cc/cc_cubic.c +++ sys/netinet/cc/cc_cubic.c @@ -96,11 +96,23 @@ int epoch_ack_count; /* Time of last congestion event in ticks. */ int t_last_cong; + /* Time of last congestion event in ticks for previous event. */ + int t_last_cong_prev; }; static MALLOC_DEFINE(M_CUBIC, "cubic data", "Per connection data required for the CUBIC congestion control algorithm"); + +/* Declare sysctl tree and populate it. */ +SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, cubic, CTLFLAG_RW, NULL, + "cubic congestion control related settings"); + +static int fast_convergence = 1; +SYSCTL_INT(_net_inet_tcp_cc_cubic, OID_AUTO, fast_convergence, CTLFLAG_RWTUN, + &fast_convergence, 0, "Enable fast convergence as specified in Section 3.6."); + + struct cc_algo cubic_cc_algo = { .name = "cubic", .ack_received = cubic_ack_received, @@ -243,7 +255,6 @@ if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); cubic_data->num_cong_events++; - cubic_data->prev_max_cwnd = cubic_data->max_cwnd; cubic_data->max_cwnd = CCV(ccv, snd_cwnd); } ENTER_RECOVERY(CCV(ccv, t_flags)); @@ -254,25 +265,31 @@ if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); cubic_data->num_cong_events++; - cubic_data->prev_max_cwnd = cubic_data->max_cwnd; cubic_data->max_cwnd = CCV(ccv, snd_cwnd); cubic_data->t_last_cong = ticks; + /* + * cubic_ssthresh_update(ccv) sets ssthresh to cwnd scaled by beta + * so this assignment is equivalent + */ CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); ENTER_CONGRECOVERY(CCV(ccv, t_flags)); } break; case CC_RTO: - /* - * Grab the current time and record it so we know when the - * most recent congestion event was. Only record it when the - * timeout has fired more than once, as there is a reasonable - * chance the first one is a false alarm and may not indicate - * congestion. - */ - if (CCV(ccv, t_rxtshift) >= 2) - cubic_data->num_cong_events++; - cubic_data->t_last_cong = ticks; + if (CCV(ccv, t_rxtshift) == 1) + cubic_data->t_last_cong_prev = cubic_data->t_last_cong; + cubic_ssthresh_update(ccv); + cubic_data->num_cong_events++; + cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->t_last_cong = ticks; + CCV(ccv, snd_cwnd) = CCV(ccv, t_maxseg); + break; + + case CC_RTO_ERR: + cubic_data->num_cong_events--; + cubic_data->t_last_cong = cubic_data->t_last_cong_prev; + cubic_data->max_cwnd = max(cubic_data->max_cwnd, cubic_data->prev_max_cwnd); break; } } @@ -401,19 +418,52 @@ static void cubic_ssthresh_update(struct cc_var *ccv) { - struct cubic *cubic_data; + struct cubic *cd; + u_long snd_ssthresh, snd_cwnd; + + cd = ccv->cc_data; + +/* 3.5. Multiplicative decrease + * + * When a packet loss occurs, CUBIC reduces its window size by a factor + * of beta. Parameter beta_cubic SHOULD be set to 0.7. + * + * W_max = cwnd; // save window size before reduction + * cwnd = cwnd * beta_cubic; // window reduction + */ + snd_ssthresh = (CCV(ccv, snd_cwnd) * CUBIC_BETA) >> CUBIC_SHIFT; + CCV(ccv, snd_ssthresh) = max(snd_ssthresh, 2*CCV(ccv, t_maxseg)); + +/* 3.6. Fast convergence + * + * To improve the convergence speed of CUBIC, we add a heuristic in the + * protocol. When a new flow joins the network, existing flows in the + * network need to give up their bandwidth shares to allow the flow some + * room for growth if the existing flows have been using all the + * bandwidth of the network. To increase this release of bandwidth by + * existing flows, the following mechanism called fast convergence + * SHOULD be implemented. + * + * With fast convergence, when a loss event occurs, before a window + * reduction of congestion window, a flow remembers the last value of + * W_max before it updates W_max for the current loss event. Let us + * call the last value of W_max to be W_last_max. + * + * if (W_max < W_last_max){ // check downward trend + * W_last_max = W_max; // remember the last W_max + * W_max = W_max*(1+beta_cubic)/2; // further reduce W_max + * } else { // check upward trend + * W_last_max = W_max // remember the last W_max + * } + */ - cubic_data = ccv->cc_data; + snd_cwnd = CCV(ccv, snd_cwnd); - /* - * On the first congestion event, set ssthresh to cwnd * 0.5, on - * subsequent congestion events, set it to cwnd * beta. - */ - if (cubic_data->num_cong_events == 0) - CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd) >> 1; + if (snd_cwnd < cd->prev_max_cwnd && fast_convergence) + cd->prev_max_cwnd = (snd_cwnd * (CUBIC_SCALE + CUBIC_BETA)) + / (2 * CUBIC_SCALE); else - CCV(ccv, snd_ssthresh) = (CCV(ccv, snd_cwnd) * CUBIC_BETA) - >> CUBIC_SHIFT; + cd->prev_max_cwnd = snd_cwnd; } Index: sys/netinet/cc/cc_dctcp.c =================================================================== --- sys/netinet/cc/cc_dctcp.c +++ sys/netinet/cc/cc_dctcp.c @@ -232,7 +232,7 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type) { struct dctcp *dctcp_data; - u_int win, mss; + u_long win, mss; dctcp_data = ccv->cc_data; win = CCV(ccv, snd_cwnd); @@ -287,6 +287,8 @@ dctcp_data->save_sndnxt += CCV(ccv, t_maxseg); dctcp_data->num_cong_events++; } + CCV(ccv, snd_ssthresh) = max(2*mss, win/2); + CCV(ccv, snd_cwnd) = mss; break; } } Index: sys/netinet/cc/cc_htcp.c =================================================================== --- sys/netinet/cc/cc_htcp.c +++ sys/netinet/cc/cc_htcp.c @@ -271,7 +271,10 @@ htcp_cong_signal(struct cc_var *ccv, uint32_t type) { struct htcp *htcp_data; + u_long win, mss; + win = CCV(ccv, snd_cwnd); + mss = CCV(ccv, t_maxseg); htcp_data = ccv->cc_data; switch (type) { @@ -320,6 +323,8 @@ */ if (CCV(ccv, t_rxtshift) >= 2) htcp_data->t_last_cong = ticks; + CCV(ccv, snd_ssthresh) = max(2*mss, win/2); + CCV(ccv, snd_cwnd) = mss; break; } } Index: sys/netinet/cc/cc_newreno.c =================================================================== --- sys/netinet/cc/cc_newreno.c +++ sys/netinet/cc/cc_newreno.c @@ -184,14 +184,16 @@ static void newreno_cong_signal(struct cc_var *ccv, uint32_t type) { - u_int win; + u_long win, mss; + + win = CCV(ccv, snd_cwnd); + mss = CCV(ccv, t_maxseg); /* Catch algos which mistakenly leak private signal types. */ KASSERT((type & CC_SIGPRIVMASK) == 0, ("%s: congestion signal type 0x%08x is private\n", __func__, type)); - win = max(CCV(ccv, snd_cwnd) / 2 / CCV(ccv, t_maxseg), 2) * - CCV(ccv, t_maxseg); + win = max(2*mss, win/2); switch (type) { case CC_NDUPACK: @@ -208,6 +210,10 @@ ENTER_CONGRECOVERY(CCV(ccv, t_flags)); } break; + case CC_RTO: + CCV(ccv, snd_ssthresh) = win; + CCV(ccv, snd_cwnd) = mss; + break; } } Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -321,8 +321,10 @@ if (type == CC_ACK) { if (tp->snd_cwnd > tp->snd_ssthresh) { + /* Do we really need to filter out stretch ACKs? */ tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, V_tcp_abc_l_var * tp->t_maxseg); + if (tp->t_bytes_acked >= tp->snd_cwnd) { tp->t_bytes_acked -= tp->snd_cwnd; tp->ccv->flags |= CCF_ABC_SENTAWND; @@ -436,9 +438,11 @@ tp->t_dupacks = 0; tp->t_bytes_acked = 0; EXIT_RECOVERY(tp->t_flags); - tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / - tp->t_maxseg) * tp->t_maxseg; - tp->snd_cwnd = tp->t_maxseg; + if (CC_ALGO(tp)->cong_signal == NULL) { + tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / + tp->t_maxseg) * tp->t_maxseg; + tp->snd_cwnd = tp->t_maxseg; + } break; case CC_RTO_ERR: TCPSTAT_INC(tcps_sndrexmitbad);