diff --git a/sys/netinet/cc/cc.h b/sys/netinet/cc/cc.h --- a/sys/netinet/cc/cc.h +++ b/sys/netinet/cc/cc.h @@ -231,6 +231,9 @@ void newreno_cc_after_idle(struct cc_var *); void newreno_cc_cong_signal(struct cc_var *, ccsignal_t); void newreno_cc_ack_received(struct cc_var *, ccsignal_t); +u_int newreno_new_cw_on_multi_decrease(struct cc_var *ccv, uint32_t mss); +u_int newreno_new_cw_in_cong_avoid(struct cc_var *ccv); +u_int newreno_new_cw_in_slow_start(struct cc_var *ccv); /* Called to temporarily keep an algo from going away during change */ void cc_refer(struct cc_algo *algo); diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c --- a/sys/netinet/cc/cc.c +++ b/sys/netinet/cc/cc.c @@ -449,15 +449,14 @@ } /* - * Perform any necessary tasks before we enter congestion recovery. - */ -void -newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type) + * Get a new congestion window size on a multiplicative decrease event. + * */ +u_int +newreno_new_cw_on_multi_decrease(struct cc_var *ccv, uint32_t mss) { - uint32_t cwin, factor, mss, pipe; + uint32_t cwin, factor; cwin = CCV(ccv, snd_cwnd); - mss = tcp_fixed_maxseg(ccv->tp); /* * Other TCP congestion controls use newreno_cong_signal(), but * with their own private cc_data. Make sure the cc_data is used @@ -465,12 +464,24 @@ */ factor = V_newreno_beta; + return max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss), 2) * mss; +} + +/* + * Perform any necessary tasks before we enter congestion recovery. + */ +void +newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type) +{ + uint32_t cwin, mss, pipe; + + mss = tcp_fixed_maxseg(ccv->tp); + /* Catch algos which mistakenly leak private signal types. */ KASSERT((type & CC_SIGPRIVMASK) == 0, ("%s: congestion signal type 0x%08x is private\n", __func__, type)); - cwin = max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss), - 2) * mss; + cwin = newreno_new_cw_on_multi_decrease(ccv, mss); switch (type) { case CC_NDUPACK: @@ -506,78 +517,109 @@ } } -void -newreno_cc_ack_received(struct cc_var *ccv, ccsignal_t type) +u_int +newreno_new_cw_in_cong_avoid(struct cc_var *ccv) { - if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && - (ccv->flags & CCF_CWND_LIMITED)) { - u_int cw = CCV(ccv, snd_cwnd); - u_int incr = CCV(ccv, t_maxseg); + u_int cw = CCV(ccv, snd_cwnd); + u_int incr = CCV(ccv, t_maxseg); + + KASSERT(cw > CCV(ccv, snd_ssthresh), + ("congestion control state not in congestion avoidance\n")); + /* + * Regular in-order ACK, open the congestion window. + * The congestion control state we're in is congestion avoidance. + * + * Check if ABC (RFC 3465) is enabled. + * cong avoid: cwnd > ssthresh + * + * cong avoid and ABC (RFC 3465): + * Grow cwnd linearly by maxseg per RTT for each + * cwnd worth of ACKed data. + * + * cong avoid without ABC (RFC 5681): + * Grow cwnd linearly by approximately maxseg per RTT using + * maxseg^2 / cwnd per ACK as the increment. + * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to + * avoid capping cwnd. + */ + if (V_tcp_do_rfc3465) { + if (ccv->flags & CCF_ABC_SENTAWND) + ccv->flags &= ~CCF_ABC_SENTAWND; + else + incr = 0; + } else + incr = max((incr * incr / cw), 1); + /* ABC is on by default, so incr equals 0 frequently. */ + if (incr > 0) + return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale)); + else + return cw; +} + +u_int +newreno_new_cw_in_slow_start(struct cc_var *ccv) +{ + u_int cw = CCV(ccv, snd_cwnd); + u_int incr = CCV(ccv, t_maxseg); + + KASSERT(cw <= CCV(ccv, snd_ssthresh), + ("congestion control state not in slow start\n")); + + /* + * Regular in-order ACK, open the congestion window. + * The congestion control state we're in is slow start. + * + * slow start: cwnd <= ssthresh + * + * slow start and ABC (RFC 3465): + * Grow cwnd exponentially by the amount of data + * ACKed capping the max increment per ACK to + * (abc_l_var * maxseg) bytes. + * + * slow start without ABC (RFC 5681): + * Grow cwnd exponentially by maxseg per ACK. + */ + if (V_tcp_do_rfc3465) { /* - * Regular in-order ACK, open the congestion window. - * Method depends on which congestion control state we're - * in (slow start or cong avoid) and if ABC (RFC 3465) is - * enabled. - * - * slow start: cwnd <= ssthresh - * cong avoid: cwnd > ssthresh - * - * slow start and ABC (RFC 3465): - * Grow cwnd exponentially by the amount of data - * ACKed capping the max increment per ACK to - * (abc_l_var * maxseg) bytes. - * - * slow start without ABC (RFC 5681): - * Grow cwnd exponentially by maxseg per ACK. - * - * cong avoid and ABC (RFC 3465): - * Grow cwnd linearly by maxseg per RTT for each - * cwnd worth of ACKed data. + * In slow-start with ABC enabled and no RTO in sight? + * (Must not use abc_l_var > 1 if slow starting after + * an RTO. On RTO, snd_nxt = snd_una, so the + * snd_nxt == snd_max check is sufficient to + * handle this). * - * cong avoid without ABC (RFC 5681): - * Grow cwnd linearly by approximately maxseg per RTT using - * maxseg^2 / cwnd per ACK as the increment. - * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to - * avoid capping cwnd. + * XXXLAS: Find a way to signal SS after RTO that + * doesn't rely on tcpcb vars. */ - if (cw > CCV(ccv, snd_ssthresh)) { - if (V_tcp_do_rfc3465) { - if (ccv->flags & CCF_ABC_SENTAWND) - ccv->flags &= ~CCF_ABC_SENTAWND; - else - incr = 0; - } else - incr = max((incr * incr / cw), 1); - } else if (V_tcp_do_rfc3465) { - /* - * In slow-start with ABC enabled and no RTO in sight? - * (Must not use abc_l_var > 1 if slow starting after - * an RTO. On RTO, snd_nxt = snd_una, so the - * snd_nxt == snd_max check is sufficient to - * handle this). - * - * XXXLAS: Find a way to signal SS after RTO that - * doesn't rely on tcpcb vars. - */ - uint16_t abc_val; - - if (ccv->flags & CCF_USE_LOCAL_ABC) - abc_val = ccv->labc; - else - abc_val = V_tcp_abc_l_var; - if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) - incr = min(ccv->bytes_this_ack, - ccv->nsegs * abc_val * - CCV(ccv, t_maxseg)); - else - incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg)); + uint16_t abc_val; + if (ccv->flags & CCF_USE_LOCAL_ABC) + abc_val = ccv->labc; + else + abc_val = V_tcp_abc_l_var; + if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) + incr = min(ccv->bytes_this_ack, + ccv->nsegs * abc_val * CCV(ccv, t_maxseg)); + else + incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg)); + } + /* ABC is on by default, so incr equals 0 frequently. */ + if (incr > 0) + return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale)); + else + return cw; +} + +void +newreno_cc_ack_received(struct cc_var *ccv, ccsignal_t type) +{ + if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && + (ccv->flags & CCF_CWND_LIMITED)) { + if (CCV(ccv, snd_cwnd) > CCV(ccv, snd_ssthresh)) { + CCV(ccv, snd_cwnd) = newreno_new_cw_in_cong_avoid(ccv); + } else { + CCV(ccv, snd_cwnd) = newreno_new_cw_in_slow_start(ccv); } - /* ABC is on by default, so incr equals 0 frequently. */ - if (incr > 0) - CCV(ccv, snd_cwnd) = min(cw + incr, - TCP_MAXWIN << CCV(ccv, snd_scale)); } } diff --git a/sys/netinet/cc/cc_cubic.h b/sys/netinet/cc/cc_cubic.h --- a/sys/netinet/cc/cc_cubic.h +++ b/sys/netinet/cc/cc_cubic.h @@ -83,6 +83,7 @@ #define CUBICFLAG_RTO_EVENT 0x00000008 /* RTO experienced */ #define CUBICFLAG_HYSTART_ENABLED 0x00000010 /* Hystart++ is enabled */ #define CUBICFLAG_HYSTART_IN_CSS 0x00000020 /* We are in Hystart++ CSS */ +#define CUBICFLAG_IN_TF 0x00000040 /* We are in TCP friendly region */ /* Kernel only bits */ #ifdef _KERNEL @@ -286,22 +287,13 @@ } /* - * Compute an approximation of the "TCP friendly" cwnd some number of usecs - * after a congestion event that is designed to yield the same average cwnd as - * NewReno while using CUBIC's beta of 0.7. RTT should be the average RTT - * estimate for the path measured over the previous congestion epoch and wmax is - * the value of cwnd at the last congestion event. + * Compute the "TCP friendly" cwnd by newreno in congestion avoidance state. */ static __inline unsigned long -tf_cwnd(int usecs_since_epoch, int rtt_usecs, unsigned long wmax, - uint32_t smss) +tf_cwnd(struct cc_var *ccv) { - - /* Equation 4 of I-D. */ - return (((wmax * CUBIC_BETA) + - (((THREE_X_PT3 * (unsigned long)usecs_since_epoch * - (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_usecs))) - >> CUBIC_SHIFT); + /* newreno is "TCP friendly" */ + return newreno_new_cw_in_cong_avoid(ccv); } #endif /* _NETINET_CC_CUBIC_H_ */ diff --git a/sys/netinet/cc/cc_cubic.c b/sys/netinet/cc/cc_cubic.c --- a/sys/netinet/cc/cc_cubic.c +++ b/sys/netinet/cc/cc_cubic.c @@ -288,31 +288,26 @@ usecs_since_epoch = INT_MAX; cubic_data->t_epoch = ticks - INT_MAX; } + + W_est = tf_cwnd(ccv); + /* * The mean RTT is used to best reflect the equations in - * the I-D. Using min_rtt in the tf_cwnd calculation - * causes W_est to grow much faster than it should if the - * RTT is dominated by network buffering rather than - * propagation delay. + * the I-D. */ - W_est = tf_cwnd(usecs_since_epoch, cubic_data->mean_rtt_usecs, - cubic_data->W_max, CCV(ccv, t_maxseg)); - W_cubic = cubic_cwnd(usecs_since_epoch + cubic_data->mean_rtt_usecs, cubic_data->W_max, CCV(ccv, t_maxseg), cubic_data->K); - ccv->flags &= ~CCF_ABC_SENTAWND; - if (W_cubic < W_est) { /* * TCP-friendly region, follow tf * cwnd growth. */ - if (CCV(ccv, snd_cwnd) < W_est) - CCV(ccv, snd_cwnd) = ulmin(W_est, INT_MAX); + CCV(ccv, snd_cwnd) = ulmin(W_est, INT_MAX); + cubic_data->flags |= CUBICFLAG_IN_TF; } else if (CCV(ccv, snd_cwnd) < W_cubic) { /* * Concave or convex region, follow CUBIC @@ -320,6 +315,7 @@ * Only update snd_cwnd, if it doesn't shrink. */ CCV(ccv, snd_cwnd) = ulmin(W_cubic, INT_MAX); + cubic_data->flags &= ~CUBICFLAG_IN_TF; } /* @@ -644,19 +640,23 @@ cubic_data->undo_W_max = cubic_data->W_max; cubic_data->W_max = cwnd; - /* - * On the first congestion event, set ssthresh to cwnd * 0.5 - * and reduce W_max to cwnd * beta. This aligns the cubic concave - * region appropriately. On subsequent congestion events, set - * ssthresh to cwnd * beta. - */ - if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) { + if (cubic_data->flags & CUBICFLAG_IN_TF) { + /* If in the TCP friendly region, follow what newreno does */ + ssthresh = newreno_new_cw_on_multi_decrease(ccv, maxseg); + + } else if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) { + /* + * On the first congestion event, set ssthresh to cwnd * 0.5 + * and reduce W_max to cwnd * beta. This aligns the cubic + * concave region appropriately. + */ ssthresh = cwnd >> 1; - cubic_data->W_max = ((uint64_t)cwnd * - CUBIC_BETA) >> CUBIC_SHIFT; + cubic_data->W_max = ((uint64_t)cwnd * CUBIC_BETA) >> CUBIC_SHIFT; } else { - ssthresh = ((uint64_t)cwnd * - CUBIC_BETA) >> CUBIC_SHIFT; + /* + * On subsequent congestion events, set ssthresh to cwnd * beta. + */ + ssthresh = ((uint64_t)cwnd * CUBIC_BETA) >> CUBIC_SHIFT; } CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * maxseg); }