Index: share/man/man4/cc_newreno.4 =================================================================== --- share/man/man4/cc_newreno.4 +++ share/man/man4/cc_newreno.4 @@ -30,17 +30,69 @@ .\" .\" $FreeBSD$ .\" -.Dd September 15, 2011 +.Dd August 2, 2017 .Dt CC_NEWRENO 4 .Os .Sh NAME .Nm cc_newreno .Nd NewReno Congestion Control Algorithm +.Sh SYNOPSIS +.In netinet/cc/cc_newreno.h .Sh DESCRIPTION The NewReno congestion control algorithm is the default for TCP. Details about the algorithm can be found in RFC5681. +.Sh Socket Options +The +.Nm +module supports a number of socket options under TCP_CC_ALGOOPT (refer to +.Xr tcp 4 +and +.Xr moc_cc 9 for details) +which can +be set with +.Xr setsockopt 2 +and tested with +.Xr getsockopt 2 . +The +.Nm +socket options use the following structure defined in +. +.Bd -literal +struct cc_newreno_opts { + int name; + uint32_t val; +} +.Ed +.Bl -tag -width ".Va CC_NEWRENO_BETA_ECN" +.It Va CC_NEWRENO_BETA +Multiplicative window decrease factor, specified as a percentage, applied to +the congestion window in response to a congestion signal per: cwnd = (cwnd * +CC_NEWRENO_BETA) / 100. +Default is 50. +.It Va CC_NEWRENO_BETA_ECN +Multiplicative window decrease factor, specified as a percentage, applied to +the congestion window in response to an ECN congestion signal when +.Va net.inet.tcp.abe=1 +per: cwnd = (cwnd * CC_NEWRENO_BETA_ECN) / 100. +Default is 80. .Sh MIB Variables -There are currently no tunable MIB variables. +The algorithm exposes the following variables in the +.Va net.inet.tcp.cc.newreno +branch of the +.Xr sysctl 3 +MIB: +.Bl -tag -width ".Va beta_ecn" +.It Va beta +Multiplicative window decrease factor, specified as a percentage, applied to +the congestion window in response to a congestion signal per: cwnd = (cwnd * +beta) / 100. +Default is 50. +.It Va beta_ecn +Multiplicative window decrease factor, specified as a percentage, applied to +the congestion window in response to an ECN congestion signal when +.Va net.inet.tcp.abe=1 +per: cwnd = (cwnd * beta_ecn) / 100. +Default is 80. .Sh SEE ALSO .Xr cc_chd 4 , .Xr cc_cubic 4 , @@ -50,6 +102,24 @@ .Xr mod_cc 4 , .Xr tcp 4 , .Xr mod_cc 9 +.Rs +.%A "Mark Allman" +.%A "Vern Paxson" +.%A "Ethan Blanton" +.%T "TCP Congestion Control" +.%O "RFC 5681" +.Re +.Rs +.%A "Naeem Khademi" +.%A "Michael Welzl" +.%A "Grenville Armitage" +.%A "Gorry Fairhurst" +.%T "TCP Alternative Backoff with ECN (ABE)" +.%R "internet draft" +.%D "May 2017" +.%N "draft-ietf-tcpm-alternativebackoff-ecn" +.%O "work in progress" +.Re .Sh ACKNOWLEDGEMENTS Development and testing of this software were made possible in part by grants from the FreeBSD Foundation and Cisco University Research Program Fund at @@ -78,5 +148,8 @@ and .An David Hayes Aq Mt david.hayes@ieee.org . .Pp +Support for TCP ABE was added by +.An Tom Jones Aq Mt tj@enoti.me . +.Pp This manual page was written by .An Lawrence Stewart Aq Mt lstewart@FreeBSD.org . Index: share/man/man4/tcp.4 =================================================================== --- share/man/man4/tcp.4 +++ share/man/man4/tcp.4 @@ -34,7 +34,7 @@ .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd February 6, 2017 +.Dd August 2, 2017 .Dt TCP 4 .Os .Sh NAME @@ -489,6 +489,14 @@ This helps throughput in general, but particularly affects short transfers and high-bandwidth large propagation-delay connections. +.It Va abe +Enable support for draft-ietf-tcpm-alternativebackoff-ecn, +which alters the window decrease factor applied to the congestion window in +response to an ECN congestion signal. +Refer to +.Xr mod_cc 4 +and individual congestion control man pages to determine if they implement +support for ABE and for configuration details. .It Va sack.enable Enable support for RFC 2018, TCP Selective Acknowledgment option, which allows the receiver to inform the sender about all successfully Index: sys/netinet/cc/cc_newreno.h =================================================================== --- sys/netinet/cc/cc_newreno.h +++ sys/netinet/cc/cc_newreno.h @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2017 Tom Jones + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _CC_NEWRENO_H +#define _CC_NEWRENO_H + +#define CCALGONAME_NEWRENO "newreno" + +struct cc_newreno_opts { + int name; + uint32_t val; +}; + +#define CC_NEWRENO_BETA 1 +#define CC_NEWRENO_BETA_ECN 2 + +#endif /* _CC_NEWRENO_H */ Index: sys/netinet/cc/cc_newreno.c =================================================================== --- sys/netinet/cc/cc_newreno.c +++ sys/netinet/cc/cc_newreno.c @@ -1,7 +1,7 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. - * Copyright (c) 2007-2008,2010 + * Copyright (c) 2007-2008,2010,2014 * Swinburne University of Technology, Melbourne, Australia. * Copyright (c) 2009-2010 Lawrence Stewart * Copyright (c) 2010 The FreeBSD Foundation @@ -46,6 +46,11 @@ * University Research Program Fund at Community Foundation Silicon Valley. * More details are available at: * http://caia.swin.edu.au/urp/newtcp/ + * + * Dec 2014 garmitage@swin.edu.au + * Borrowed code fragments from cc_cdg.c to add modifiable beta + * via sysctls. + * */ #include @@ -67,20 +72,56 @@ #include #include #include +#include +static MALLOC_DEFINE(M_NEWRENO, "newreno data", + "newreno beta values"); + +#define CAST_PTR_INT(X) (*((int*)(X))) + +static int newreno_cb_init(struct cc_var *ccv); static void newreno_ack_received(struct cc_var *ccv, uint16_t type); static void newreno_after_idle(struct cc_var *ccv); static void newreno_cong_signal(struct cc_var *ccv, uint32_t type); static void newreno_post_recovery(struct cc_var *ccv); +static int newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf); +static VNET_DEFINE(uint32_t, newreno_beta) = 50; +static VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80; +#define V_newreno_beta VNET(newreno_beta) +#define V_newreno_beta_ecn VNET(newreno_beta_ecn) + struct cc_algo newreno_cc_algo = { .name = "newreno", + .cb_init = newreno_cb_init, .ack_received = newreno_ack_received, .after_idle = newreno_after_idle, .cong_signal = newreno_cong_signal, .post_recovery = newreno_post_recovery, + .ctl_output = newreno_ctl_output, }; +struct newreno { + uint32_t beta; + uint32_t beta_ecn; +}; + +int +newreno_cb_init(struct cc_var *ccv) +{ + struct newreno *nreno; + + nreno = malloc(sizeof(struct newreno), M_NEWRENO, M_NOWAIT|M_ZERO); + if (nreno != NULL) { + nreno->beta = V_newreno_beta; + nreno->beta_ecn = V_newreno_beta_ecn; + } + + ccv->cc_data = nreno; + + return (0); +} + static void newreno_ack_received(struct cc_var *ccv, uint16_t type) { @@ -182,27 +223,41 @@ static void newreno_cong_signal(struct cc_var *ccv, uint32_t type) { - u_int win; + struct newreno *nreno; + uint32_t cwin, factor; + u_int mss; + factor = V_newreno_beta; + nreno = ccv->cc_data; + if (nreno != NULL) { + if (V_tcp_do_abe) + factor = (type == CC_ECN ? nreno->beta_ecn: nreno->beta); + else + factor = nreno->beta; + } + + cwin = CCV(ccv, snd_cwnd); + mss = CCV(ccv, t_maxseg); + /* Catch algos which mistakenly leak private signal types. */ KASSERT((type & CC_SIGPRIVMASK) == 0, ("%s: congestion signal type 0x%08x is private\n", __func__, type)); - win = max(CCV(ccv, snd_cwnd) / 2 / CCV(ccv, t_maxseg), 2) * - CCV(ccv, t_maxseg); + cwin = max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss), + 2) * mss; switch (type) { case CC_NDUPACK: if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) - CCV(ccv, snd_ssthresh) = win; + CCV(ccv, snd_ssthresh) = cwin; ENTER_RECOVERY(CCV(ccv, t_flags)); } break; case CC_ECN: if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { - CCV(ccv, snd_ssthresh) = win; - CCV(ccv, snd_cwnd) = win; + CCV(ccv, snd_ssthresh) = cwin; + CCV(ccv, snd_cwnd) = cwin; ENTER_CONGRECOVERY(CCV(ccv, t_flags)); } break; @@ -240,5 +295,79 @@ } } +int +newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf) +{ + struct newreno *nreno; + struct cc_newreno_opts *opt; + if (sopt->sopt_valsize != sizeof(struct cc_newreno_opts)) + return (EMSGSIZE); + + nreno = ccv->cc_data; + if (nreno == NULL) + return (ENOPROTOOPT); + opt = buf; + + switch (sopt->sopt_dir) { + case SOPT_SET: + switch (opt->name) { + case CC_NEWRENO_BETA: + nreno->beta = opt->val; + break; + case CC_NEWRENO_BETA_ECN: + if (!V_tcp_do_abe) + return (EACCES); + nreno->beta_ecn = opt->val; + break; + default: + return (ENOPROTOOPT); + } + break; + case SOPT_GET: + switch (opt->name) { + case CC_NEWRENO_BETA: + opt->val = nreno->beta; + break; + case CC_NEWRENO_BETA_ECN: + opt->val = nreno->beta_ecn; + break; + default: + return (ENOPROTOOPT); + } + break; + default: + return (EINVAL); + } + + return (0); +} + +static int +newreno_beta_handler(SYSCTL_HANDLER_ARGS) +{ + if (req->newptr != NULL ) { + if (arg1 == &VNET_NAME(newreno_beta_ecn) && !V_tcp_do_abe) + return (EACCES); + if (CAST_PTR_INT(req->newptr) <= 0 || CAST_PTR_INT(req->newptr) > 100) + return (EINVAL); + } + + return (sysctl_handle_int(oidp, arg1, arg2, req)); +} + +SYSCTL_DECL(_net_inet_tcp_cc_newreno); +SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, newreno, CTLFLAG_RW, NULL, + "New Reno related settings"); + +SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, + &VNET_NAME(newreno_beta), 3, &newreno_beta_handler, "IU", + "New Reno beta, specified as number between 1 and 100"); + +SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta_ecn, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, + &VNET_NAME(newreno_beta_ecn), 3, &newreno_beta_handler, "IU", + "New Reno beta ecn, specified as number between 1 and 100"); + DECLARE_CC_MODULE(newreno, &newreno_cc_algo); Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -181,6 +181,11 @@ &VNET_NAME(tcp_abc_l_var), 2, "Cap the max cwnd increment during slow-start to this number of segments"); +VNET_DEFINE(int, tcp_do_abe) = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, abe, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(tcp_do_abe), 0, + "Enable draft-ietf-tcpm-alternativebackoff-ecn (TCP Alternative Backoff with ECN) "); + static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN"); VNET_DEFINE(int, tcp_do_ecn) = 2; Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -714,6 +714,7 @@ VNET_DECLARE(int, path_mtu_discovery); VNET_DECLARE(int, tcp_do_rfc3465); VNET_DECLARE(int, tcp_abc_l_var); +VNET_DECLARE(int, tcp_do_abe); #define V_tcb VNET(tcb) #define V_tcbinfo VNET(tcbinfo) #define V_tcp_mssdflt VNET(tcp_mssdflt) @@ -726,6 +727,7 @@ #define V_path_mtu_discovery VNET(path_mtu_discovery) #define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) #define V_tcp_abc_l_var VNET(tcp_abc_l_var) +#define V_tcp_do_abe VNET(tcp_do_abe) VNET_DECLARE(int, tcp_do_sack); /* SACK enabled/disabled */ VNET_DECLARE(int, tcp_sc_rst_sock_fail); /* RST on sock alloc failure */