Index: sys/modules/cc/Makefile =================================================================== --- sys/modules/cc/Makefile +++ sys/modules/cc/Makefile @@ -1,6 +1,7 @@ # $FreeBSD$ SUBDIR= cc_cubic \ + cc_cubic_8312bis \ cc_dctcp \ cc_htcp Index: sys/modules/cc/cc_cubic_8312bis/Makefile =================================================================== --- /dev/null +++ sys/modules/cc/cc_cubic_8312bis/Makefile @@ -0,0 +1,7 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/netinet/cc +KMOD= cc_cubic_8312bis +SRCS= cc_cubic_8312bis.c + +.include Index: sys/netinet/cc/cc_cubic_8312bis.h =================================================================== --- /dev/null +++ sys/netinet/cc/cc_cubic_8312bis.h @@ -0,0 +1,249 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2008-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Lawrence Stewart while studying at the Centre + * for Advanced Internet Architectures, Swinburne University of Technology, made + * possible in part by a grant from the Cisco University Research Program Fund + * at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET_CC_CUBIC_H_ +#define _NETINET_CC_CUBIC_H_ + +#include + +/* Number of bits of precision for fixed point math calcs. */ +#define CUBIC_SHIFT 8 + +#define CUBIC_SHIFT_4 32 + +/* 0.5 << CUBIC_SHIFT. */ +#define RENO_BETA 128 + +/* ~0.7 << CUBIC_SHIFT. */ +#define CUBIC_BETA 179 + +/* ~0.3 << CUBIC_SHIFT. */ +#define ONE_SUB_CUBIC_BETA 77 + +/* 3 * ONE_SUB_CUBIC_BETA. */ +#define THREE_X_PT3 231 + +/* (2 << CUBIC_SHIFT) - ONE_SUB_CUBIC_BETA. */ +#define TWO_SUB_PT3 435 + +/* ~0.4 << CUBIC_SHIFT. */ +#define CUBIC_C_FACTOR 102 + +/* CUBIC fast convergence factor: (1+beta_cubic)/2. */ +#define CUBIC_FC_FACTOR 217 + +/* Don't trust s_rtt until this many rtt samples have been taken. */ +#define CUBIC_MIN_RTT_SAMPLES 8 + +/* + * (2^21)^3 is long max. Dividing (2^63) by Cubic_C_factor + * and taking cube-root yields 448845 as the effective useful limit + */ +#define CUBED_ROOT_MAX_ULONG 448845 + +/* Userland only bits. */ +#ifndef _KERNEL + +extern int hz; + +/* + * Implementation based on the formulae found in the CUBIC Internet Draft + * "draft-ietf-tcpm-cubic-04". + * + */ + +static __inline float +theoretical_cubic_k(double wmax_pkts) +{ + double C; + + C = 0.4; + + return (pow((wmax_pkts * 0.3) / C, (1.0 / 3.0)) * pow(2, CUBIC_SHIFT)); +} + +static __inline unsigned long +theoretical_cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss) +{ + double C, wmax_pkts; + + C = 0.4; + wmax_pkts = wmax / (double)smss; + + return (smss * (wmax_pkts + + (C * pow(ticks_since_cong / (double)hz - + theoretical_cubic_k(wmax_pkts) / pow(2, CUBIC_SHIFT), 3.0)))); +} + +static __inline unsigned long +theoretical_reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax, + uint32_t smss) +{ + + return ((wmax * 0.5) + ((ticks_since_cong / (float)rtt_ticks) * smss)); +} + +static __inline unsigned long +theoretical_tf_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax, + uint32_t smss) +{ + + return ((wmax * 0.7) + ((3 * 0.3) / (2 - 0.3) * + (ticks_since_cong / (float)rtt_ticks) * smss)); +} + +#endif /* !_KERNEL */ + +/* + * Compute the CUBIC K value used in the cwnd calculation, using an + * implementation of eqn 2 in the I-D. The method used + * here is adapted from Apple Computer Technical Report #KT-32. + */ +static __inline int64_t +cubic_8312bis_k(unsigned long wmax_pkts) +{ + int64_t s, K; + uint16_t p; + + K = s = 0; + p = 0; + + /* (wmax * beta)/C with CUBIC_SHIFT worth of precision. */ + s = ((wmax_pkts * ONE_SUB_CUBIC_BETA) << CUBIC_SHIFT) / CUBIC_C_FACTOR; + + /* Rebase s to be between 1 and 1/8 with a shift of CUBIC_SHIFT. */ + while (s >= 256) { + s >>= 3; + p++; + } + + /* + * Some magic constants taken from the Apple TR with appropriate + * shifts: 275 == 1.072302 << CUBIC_SHIFT, 98 == 0.3812513 << + * CUBIC_SHIFT, 120 == 0.46946116 << CUBIC_SHIFT. + */ + K = (((s * 275) >> CUBIC_SHIFT) + 98) - + (((s * s * 120) >> CUBIC_SHIFT) >> CUBIC_SHIFT); + + /* Multiply by 2^p to undo the rebasing of s from above. */ + return (K <<= p); +} + +/* + * Compute the new cwnd value using an implementation of eqn 1 from the I-D. + * Thanks to Kip Macy for help debugging this function. + * + * XXXLAS: Characterise bounds for overflow. + */ +static __inline unsigned long +cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss, int64_t K) +{ + int64_t cwnd; + + /* K is in fixed point form with CUBIC_SHIFT worth of precision. */ + + /* t - K, with CUBIC_SHIFT worth of precision. */ + cwnd = (((int64_t)ticks_since_cong << CUBIC_SHIFT) - (K * hz)) / hz; + + if (cwnd > CUBED_ROOT_MAX_ULONG) + return INT_MAX; + if (cwnd < -CUBED_ROOT_MAX_ULONG) + return 0; + + /* (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */ + cwnd *= (cwnd * cwnd); + + /* + * C(t - K)^3 + wmax + * The down shift by CUBIC_SHIFT_4 is because cwnd has 4 lots of + * CUBIC_SHIFT included in the value. 3 from the cubing of cwnd above, + * and an extra from multiplying through by CUBIC_C_FACTOR. + */ + + cwnd = ((cwnd * CUBIC_C_FACTOR) >> CUBIC_SHIFT_4) * smss + wmax; + + /* + * for negative cwnd, limiting to zero as lower bound + */ + return (lmax(0,cwnd)); +} + +/* + * Compute an approximation of the NewReno cwnd some number of ticks after a + * congestion event. RTT should be the average RTT estimate for the path + * measured over the previous congestion epoch and wmax is the value of cwnd at + * the last congestion event. The "TCP friendly" concept in the CUBIC I-D is + * rather tricky to understand and it turns out this function is not required. + * It is left here for reference. + */ +static __inline unsigned long +reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax, + uint32_t smss) +{ + + /* + * For NewReno, beta = 0.5, therefore: W_tcp(t) = wmax*0.5 + t/RTT + * W_tcp(t) deals with cwnd/wmax in pkts, so because our cwnd is in + * bytes, we have to multiply by smss. + */ + return (((wmax * RENO_BETA) + (((ticks_since_cong * smss) + << CUBIC_SHIFT) / rtt_ticks)) >> CUBIC_SHIFT); +} + +/* + * Compute an approximation of the "TCP friendly" cwnd some number of ticks + * after a congestion event that is designed to yield the same average cwnd as + * NewReno while using CUBIC's beta of 0.7. RTT should be the average RTT + * estimate for the path measured over the previous congestion epoch and wmax is + * the value of cwnd at the last congestion event. + */ +static __inline unsigned long +tf_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax, + uint32_t smss) +{ + + /* Equation 4 of I-D. */ + return (((wmax * CUBIC_BETA) + + (((THREE_X_PT3 * (unsigned long)ticks_since_cong * + (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_ticks))) + >> CUBIC_SHIFT); +} + +#endif /* _NETINET_CC_CUBIC_H_ */ Index: sys/netinet/cc/cc_cubic_8312bis.c =================================================================== --- /dev/null +++ sys/netinet/cc/cc_cubic_8312bis.c @@ -0,0 +1,487 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2008-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Lawrence Stewart while studying at the Centre + * for Advanced Internet Architectures, Swinburne University of Technology, made + * possible in part by a grant from the Cisco University Research Program Fund + * at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * An implementation of the CUBIC congestion control algorithm for FreeBSD, + * based on the Internet Draft "draft-rhee-tcpm-cubic-02" by Rhee, Xu and Ha. + * Originally released as part of the NewTCP research project at Swinburne + * University of Technology's Centre for Advanced Internet Architectures, + * Melbourne, Australia, which was made possible in part by a grant from the + * Cisco University Research Program Fund at Community Foundation Silicon + * Valley. More details are available at: + * http://caia.swin.edu.au/urp/newtcp/ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +static void cubic_8312bis_ack_received(struct cc_var *ccv, uint16_t type); +static void cubic_8312bis_cb_destroy(struct cc_var *ccv); +static int cubic_8312bis_cb_init(struct cc_var *ccv); +static void cubic_8312bis_cong_signal(struct cc_var *ccv, uint32_t type); +static void cubic_8312bis_conn_init(struct cc_var *ccv); +static int cubic_8312bis_mod_init(void); +static void cubic_8312bis_post_recovery(struct cc_var *ccv); +static void cubic_8312bis_record_rtt(struct cc_var *ccv); +static void cubic_8312bis_ssthresh_update(struct cc_var *ccv, uint32_t maxseg); +static void cubic_8312bis_after_idle(struct cc_var *ccv); + +struct cubic_8312bis { + /* Cubic K in fixed point form with CUBIC_SHIFT worth of precision. */ + int64_t K; + /* Sum of RTT samples across an epoch in ticks. */ + int64_t sum_rtt_ticks; + /* cwnd at the most recent congestion event. */ + unsigned long max_cwnd; + /* cwnd at the previous congestion event. */ + unsigned long prev_max_cwnd; + /* A copy of prev_max_cwnd. Used for CC_RTO_ERR */ + unsigned long prev_max_cwnd_cp; + /* various flags */ + uint32_t flags; +#define CUBICFLAG_CONG_EVENT 0x00000001 /* congestion experienced */ +#define CUBICFLAG_IN_SLOWSTART 0x00000002 /* in slow start */ +#define CUBICFLAG_IN_APPLIMIT 0x00000004 /* application limited */ +#define CUBICFLAG_RTO_EVENT 0x00000008 /* RTO experienced */ + /* Minimum observed rtt in ticks. */ + int min_rtt_ticks; + /* Mean observed rtt between congestion epochs. */ + int mean_rtt_ticks; + /* ACKs since last congestion event. */ + int epoch_ack_count; + /* Timestamp (in ticks) of arriving in congestion avoidance from last + * congestion event. + */ + int t_last_cong; + /* Timestamp (in ticks) of a previous congestion event. Used for + * CC_RTO_ERR. + */ + int t_last_cong_prev; +}; + +static MALLOC_DEFINE(M_CUBIC, "cubic data", + "Per connection data required for the CUBIC congestion control algorithm"); + +struct cc_algo cubic_8312bis_cc_algo = { + .name = "cubic_8312bis", + .ack_received = cubic_8312bis_ack_received, + .cb_destroy = cubic_8312bis_cb_destroy, + .cb_init = cubic_8312bis_cb_init, + .cong_signal = cubic_8312bis_cong_signal, + .conn_init = cubic_8312bis_conn_init, + .mod_init = cubic_8312bis_mod_init, + .post_recovery = cubic_8312bis_post_recovery, + .after_idle = cubic_8312bis_after_idle, +}; + +static void +cubic_8312bis_ack_received(struct cc_var *ccv, uint16_t type) +{ + struct cubic_8312bis *cubic_data; + unsigned long w_tf, w_cubic_next; + int ticks_since_cong; + + cubic_data = ccv->cc_data; + cubic_8312bis_record_rtt(ccv); + + /* + * For a regular ACK and we're not in cong/fast recovery and + * we're cwnd limited, always recalculate cwnd. + */ + if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && + (ccv->flags & CCF_CWND_LIMITED)) { + /* Use the logic in NewReno ack_received() for slow start. */ + if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || + cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) { + cubic_data->flags |= CUBICFLAG_IN_SLOWSTART; + newreno_cc_algo.ack_received(ccv, type); + } else { + if ((cubic_data->flags & CUBICFLAG_RTO_EVENT) && + (cubic_data->flags & CUBICFLAG_IN_SLOWSTART)) { + /* RFC8312 Section 4.7 */ + cubic_data->flags &= ~(CUBICFLAG_RTO_EVENT | + CUBICFLAG_IN_SLOWSTART); + cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->K = 0; + } else if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | + CUBICFLAG_IN_APPLIMIT)) { + cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | + CUBICFLAG_IN_APPLIMIT); + cubic_data->t_last_cong = ticks; + cubic_data->K = cubic_8312bis_k(cubic_data->max_cwnd / + CCV(ccv, t_maxseg)); + } + if ((ticks_since_cong = + ticks - cubic_data->t_last_cong) < 0) { + /* + * dragging t_last_cong along + */ + ticks_since_cong = INT_MAX; + cubic_data->t_last_cong = ticks - INT_MAX; + } + /* + * The mean RTT is used to best reflect the equations in + * the I-D. Using min_rtt in the tf_cwnd calculation + * causes w_tf to grow much faster than it should if the + * RTT is dominated by network buffering rather than + * propagation delay. + */ + w_tf = tf_cwnd(ticks_since_cong, + cubic_data->mean_rtt_ticks, cubic_data->max_cwnd, + CCV(ccv, t_maxseg)); + + w_cubic_next = cubic_cwnd(ticks_since_cong + + cubic_data->mean_rtt_ticks, cubic_data->max_cwnd, + CCV(ccv, t_maxseg), cubic_data->K); + + ccv->flags &= ~CCF_ABC_SENTAWND; + + if (w_cubic_next < w_tf) { + /* + * TCP-friendly region, follow tf + * cwnd growth. + */ + if (CCV(ccv, snd_cwnd) < w_tf) + CCV(ccv, snd_cwnd) = ulmin(w_tf, INT_MAX); + } else if (CCV(ccv, snd_cwnd) < w_cubic_next) { + /* + * Concave or convex region, follow CUBIC + * cwnd growth. + * Only update snd_cwnd, if it doesn't shrink. + */ + CCV(ccv, snd_cwnd) = ulmin(w_cubic_next, + INT_MAX); + } + + /* + * If we're not in slow start and we're probing for a + * new cwnd limit at the start of a connection + * (happens when hostcache has a relevant entry), + * keep updating our current estimate of the + * max_cwnd. + */ + if (((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) && + cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) { + cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->K = cubic_8312bis_k(cubic_data->max_cwnd / + CCV(ccv, t_maxseg)); + } + } + } else if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && + !(ccv->flags & CCF_CWND_LIMITED)) { + cubic_data->flags |= CUBICFLAG_IN_APPLIMIT; + } +} + +/* + * This is a Cubic specific implementation of after_idle. + * - Reset cwnd by calling New Reno implementation of after_idle. + * - Reset t_last_cong. + */ +static void +cubic_8312bis_after_idle(struct cc_var *ccv) +{ + struct cubic_8312bis *cubic_data; + + cubic_data = ccv->cc_data; + + cubic_data->max_cwnd = ulmax(cubic_data->max_cwnd, CCV(ccv, snd_cwnd)); + cubic_data->K = cubic_8312bis_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); + + newreno_cc_algo.after_idle(ccv); + cubic_data->t_last_cong = ticks; +} + +static void +cubic_8312bis_cb_destroy(struct cc_var *ccv) +{ + free(ccv->cc_data, M_CUBIC); +} + +static int +cubic_8312bis_cb_init(struct cc_var *ccv) +{ + struct cubic_8312bis *cubic_data; + + cubic_data = malloc(sizeof(struct cubic_8312bis), M_CUBIC, M_NOWAIT|M_ZERO); + + if (cubic_data == NULL) + return (ENOMEM); + + /* Init some key variables with sensible defaults. */ + cubic_data->t_last_cong = ticks; + cubic_data->min_rtt_ticks = TCPTV_SRTTBASE; + cubic_data->mean_rtt_ticks = 1; + + ccv->cc_data = cubic_data; + + return (0); +} + +/* + * Perform any necessary tasks before we enter congestion recovery. + */ +static void +cubic_8312bis_cong_signal(struct cc_var *ccv, uint32_t type) +{ + struct cubic_8312bis *cubic_data; + u_int mss; + + cubic_data = ccv->cc_data; + mss = tcp_maxseg(ccv->ccvc.tcp); + + switch (type) { + case CC_NDUPACK: + if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { + if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { + cubic_8312bis_ssthresh_update(ccv, mss); + cubic_data->flags |= CUBICFLAG_CONG_EVENT; + cubic_data->t_last_cong = ticks; + cubic_data->K = cubic_8312bis_k(cubic_data->max_cwnd / mss); + } + ENTER_RECOVERY(CCV(ccv, t_flags)); + } + break; + + case CC_ECN: + if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { + cubic_8312bis_ssthresh_update(ccv, mss); + cubic_data->flags |= CUBICFLAG_CONG_EVENT; + cubic_data->t_last_cong = ticks; + cubic_data->K = cubic_8312bis_k(cubic_data->max_cwnd / mss); + CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); + ENTER_CONGRECOVERY(CCV(ccv, t_flags)); + } + break; + + case CC_RTO: + /* RFC8312 Section 4.7 */ + if (CCV(ccv, t_rxtshift) == 1) { + cubic_data->t_last_cong_prev = cubic_data->t_last_cong; + cubic_data->prev_max_cwnd_cp = cubic_data->prev_max_cwnd; + } + cubic_data->flags |= CUBICFLAG_CONG_EVENT | CUBICFLAG_RTO_EVENT; + cubic_data->prev_max_cwnd = cubic_data->max_cwnd; + CCV(ccv, snd_ssthresh) = ((uint64_t)CCV(ccv, snd_cwnd) * + CUBIC_BETA) >> CUBIC_SHIFT; + CCV(ccv, snd_cwnd) = mss; + break; + + case CC_RTO_ERR: + cubic_data->flags &= ~(CUBICFLAG_CONG_EVENT | CUBICFLAG_RTO_EVENT); + cubic_data->max_cwnd = cubic_data->prev_max_cwnd; + cubic_data->prev_max_cwnd = cubic_data->prev_max_cwnd_cp; + cubic_data->t_last_cong = cubic_data->t_last_cong_prev; + cubic_data->K = cubic_8312bis_k(cubic_data->max_cwnd / mss); + break; + } +} + +static void +cubic_8312bis_conn_init(struct cc_var *ccv) +{ + struct cubic_8312bis *cubic_data; + + cubic_data = ccv->cc_data; + + /* + * Ensure we have a sane initial value for max_cwnd recorded. Without + * this here bad things happen when entries from the TCP hostcache + * get used. + */ + cubic_data->max_cwnd = CCV(ccv, snd_cwnd); +} + +static int +cubic_8312bis_mod_init(void) +{ + return (0); +} + +/* + * Perform any necessary tasks before we exit congestion recovery. + */ +static void +cubic_8312bis_post_recovery(struct cc_var *ccv) +{ + struct cubic_8312bis *cubic_data; + int pipe; + + cubic_data = ccv->cc_data; + pipe = 0; + + if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { + /* + * If inflight data is less than ssthresh, set cwnd + * conservatively to avoid a burst of data, as suggested in + * the NewReno RFC. Otherwise, use the CUBIC method. + * + * XXXLAS: Find a way to do this without needing curack + */ + if (V_tcp_do_rfc6675_pipe) + pipe = tcp_compute_pipe(ccv->ccvc.tcp); + else + pipe = CCV(ccv, snd_max) - ccv->curack; + + if (pipe < CCV(ccv, snd_ssthresh)) + /* + * Ensure that cwnd does not collapse to 1 MSS under + * adverse conditions. Implements RFC6582 + */ + CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) + + CCV(ccv, t_maxseg); + else + /* Update cwnd based on beta and adjusted max_cwnd. */ + CCV(ccv, snd_cwnd) = max(((uint64_t)cubic_data->max_cwnd * + CUBIC_BETA) >> CUBIC_SHIFT, + 2 * CCV(ccv, t_maxseg)); + } + + /* Calculate the average RTT between congestion epochs. */ + if (cubic_data->epoch_ack_count > 0 && + cubic_data->sum_rtt_ticks >= cubic_data->epoch_ack_count) { + cubic_data->mean_rtt_ticks = (int)(cubic_data->sum_rtt_ticks / + cubic_data->epoch_ack_count); + } + + cubic_data->epoch_ack_count = 0; + cubic_data->sum_rtt_ticks = 0; +} + +/* + * Record the min RTT and sum samples for the epoch average RTT calculation. + */ +static void +cubic_8312bis_record_rtt(struct cc_var *ccv) +{ + struct cubic_8312bis *cubic_data; + int t_srtt_ticks; + + /* Ignore srtt until a min number of samples have been taken. */ + if (CCV(ccv, t_rttupdated) >= CUBIC_MIN_RTT_SAMPLES) { + cubic_data = ccv->cc_data; + t_srtt_ticks = CCV(ccv, t_srtt) / TCP_RTT_SCALE; + + /* + * Record the current SRTT as our minrtt if it's the smallest + * we've seen or minrtt is currently equal to its initialised + * value. + * + * XXXLAS: Should there be some hysteresis for minrtt? + */ + if ((t_srtt_ticks < cubic_data->min_rtt_ticks || + cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) { + cubic_data->min_rtt_ticks = max(1, t_srtt_ticks); + + /* + * If the connection is within its first congestion + * epoch, ensure we prime mean_rtt_ticks with a + * reasonable value until the epoch average RTT is + * calculated in cubic_post_recovery(). + */ + if (cubic_data->min_rtt_ticks > + cubic_data->mean_rtt_ticks) + cubic_data->mean_rtt_ticks = + cubic_data->min_rtt_ticks; + } + + /* Sum samples for epoch average RTT calculation. */ + cubic_data->sum_rtt_ticks += t_srtt_ticks; + cubic_data->epoch_ack_count++; + } +} + +/* + * Update the ssthresh in the event of congestion. + */ +static void +cubic_8312bis_ssthresh_update(struct cc_var *ccv, uint32_t maxseg) +{ + struct cubic_8312bis *cubic_data; + uint32_t ssthresh; + uint32_t cwnd; + + cubic_data = ccv->cc_data; + cwnd = CCV(ccv, snd_cwnd); + + /* Fast convergence heuristic. */ + if (cwnd < cubic_data->max_cwnd) { + cwnd = ((uint64_t)cwnd * CUBIC_FC_FACTOR) >> CUBIC_SHIFT; + } + cubic_data->prev_max_cwnd = cubic_data->max_cwnd; + cubic_data->max_cwnd = cwnd; + + /* + * On the first congestion event, set ssthresh to cwnd * 0.5 + * and reduce max_cwnd to cwnd * beta. This aligns the cubic concave + * region appropriately. On subsequent congestion events, set + * ssthresh to cwnd * beta. + */ + if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) { + ssthresh = cwnd >> 1; + cubic_data->max_cwnd = ((uint64_t)cwnd * + CUBIC_BETA) >> CUBIC_SHIFT; + } else { + ssthresh = ((uint64_t)cwnd * + CUBIC_BETA) >> CUBIC_SHIFT; + } + CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * maxseg); +} + +DECLARE_CC_MODULE(cubic_8312bis, &cubic_8312bis_cc_algo); +MODULE_VERSION(cubic_8312bis, 1);