Page MenuHomeFreeBSD

D711.id1325.diff
No OneTemporary

D711.id1325.diff

Index: etc/mtree/BSD.include.dist
===================================================================
--- etc/mtree/BSD.include.dist
+++ etc/mtree/BSD.include.dist
@@ -270,6 +270,8 @@
..
..
netinet
+ cc
+ ..
..
netinet6
..
Index: include/Makefile
===================================================================
--- include/Makefile
+++ include/Makefile
@@ -51,7 +51,7 @@
geom/cache geom/concat geom/eli geom/gate geom/journal geom/label \
geom/mirror geom/mountver geom/multipath geom/nop \
geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
- netgraph/atm netgraph/netflow \
+ netgraph/atm netgraph/netflow netinet/cc\
security/audit \
security/mac_biba security/mac_bsdextended security/mac_lomac \
security/mac_mls security/mac_partition \
Index: share/man/man4/mod_cc.4
===================================================================
--- share/man/man4/mod_cc.4
+++ share/man/man4/mod_cc.4
@@ -30,12 +30,16 @@
.\"
.\" $FreeBSD$
.\"
-.Dd September 15, 2011
+.Dd September 2, 2014
.Dt MOD_CC 4
.Os
.Sh NAME
.Nm mod_cc
.Nd Modular congestion control
+.Sh SYNOPSIS
+.In netinet/cc.h
+.Fn CC_SOCKOPT_DEFINE "ccsopt" "valsize"
+.Fn CC_SOCKOPT_RESET "ccsopt"
.Sh DESCRIPTION
The modular congestion control framework allows the TCP implementation to
dynamically change the congestion control algorithm used by new and existing
@@ -57,6 +61,56 @@
MIB variable detailed in the
.Sx MIB Variables
section below.
+.Pp
+Algorithm specific parameters can be set or queried using the TCP_CCALGOOPT
+socket option (see
+.Xr tcp 4
+for details).
+Callers must pass a pointer to a
+.Vt struct cc_sockopt
+as
+.Va optval ,
+which has the following members:
+.Bd -literal -offset indent
+struct cc_sockopt {
+ char cc_name[TCP_CA_NAME_MAX];
+ socklen_t size;
+ int sopt_dir;
+ int sopt_name;
+ size_t sopt_valsize;
+ uint8_t sopt_val[];
+};
+.Ed
+.Pp
+The
+.Va CC_SOCKOPT_DEFINE
+and
+.Va CC_SOCKOPT_RESET
+macros provide convenience wrappers for working with
+.Vt struct cc_sockopt .
+Only the
+.Va cc_name ,
+.Va sopt_name ,
+.Va sopt_valsize
+and
+.Va sopt_val
+fields of
+.Vt struct cc_sockopt
+should be set explicitly by callers as required.
+If reusing a
+.Vt struct cc_sockopt
+within a scoped area of code, the
+.Va CC_SOCKOPT_RESET
+macro should be used to reset certain struct members to appropriate values.
+.Pp
+The largest amount of data which can be passed through from/to the algorithm
+module's
+.Va ctl_output
+hook is dictated by the
+.Va valsize
+used in
+.Pa <sys/netinet/tcp_usrreq.c>
+for the temporary ccsopt variable (currently sizeof(uint32_t)).
.Sh MIB Variables
The framework exposes the following variables in the
.Va net.inet.tcp.cc
Index: share/man/man4/tcp.4
===================================================================
--- share/man/man4/tcp.4
+++ share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd November 8, 2013
+.Dd September 2, 2014
.Dt TCP 4
.Os
.Sh NAME
@@ -137,6 +137,11 @@
receive window size,
and
bandwidth-controlled window space.
+.It Dv TCP_CCALGOOPT
+Set or query congestion control algorithm specific parameters.
+See
+.Xr mod_cc 4
+for details.
.It Dv TCP_CONGESTION
Select or query the congestion control algorithm that TCP will use for the
connection.
Index: share/man/man9/mod_cc.9
===================================================================
--- share/man/man9/mod_cc.9
+++ share/man/man9/mod_cc.9
@@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd September 15, 2011
+.Dd September 2, 2014
.Dt MOD_CC 9
.Os
.Sh NAME
@@ -74,6 +74,7 @@
void (*cong_signal) (struct cc_var *ccv, uint32_t type);
void (*post_recovery) (struct cc_var *ccv);
void (*after_idle) (struct cc_var *ccv);
+ int (*ctl_output)(struct cc_var *ccv, struct cc_sockopt *ccsopt);
};
.Ed
.Pp
@@ -166,6 +167,23 @@
It should be implemented to adjust state as required.
.Pp
The
+.Va ctl_output
+function is called when
+.Xr getsockopt 2
+or
+.Xr setsockopt 2
+is called on a
+.Xr tcp 4
+socket with the
+.Va optname
+argument set to TCP_CCALGOOPT and allows the caller to query or set algorithm
+specific parameters.
+See
+.Xr mod_cc 4
+for information on
+.Vt struct cc_sockopt .
+.Pp
+The
.Fn DECLARE_CC_MODULE
macro provides a convenient wrapper around the
.Xr DECLARE_MODULE 9
Index: sys/netinet/cc.h
===================================================================
--- sys/netinet/cc.h
+++ sys/netinet/cc.h
@@ -54,6 +54,31 @@
/* XXX: TCP_CA_NAME_MAX define lives in tcp.h for compat reasons. */
#include <netinet/tcp.h>
+/*
+ * Shared structure with userspace for CC algorithm sockopt manipulation.
+ */
+struct cc_sockopt {
+ char cc_name[TCP_CA_NAME_MAX]; /* CC algo the sockopt applies to. */
+ socklen_t size; /* Struct size inc variable sopt_val len. */
+ int sopt_dir; /* SOPT_GET or SOPT_SET. */
+ int sopt_name; /* CC algo specific sockopt identifier. */
+ size_t sopt_valsize; /* Available space or data in sopt_val. */
+ uint8_t sopt_val[]; /* Variable length data. */
+};
+
+#define CC_SOCKOPT_RESET(x) \
+do { \
+ x->size = sizeof(_ccsopt_ ## x); \
+ x->sopt_valsize = sizeof(_ccsopt_ ## x) - sizeof(struct cc_sockopt); \
+} while(0)
+
+#define CC_SOCKOPT_DEFINE(x, valsize) \
+ uint8_t _ccsopt_ ## x [valsize + sizeof(struct cc_sockopt)]; \
+ struct cc_sockopt *x = (struct cc_sockopt *)_ccsopt_ ## x; \
+ CC_SOCKOPT_RESET(x)
+
+#ifdef _KERNEL
+
/* Global CC vars. */
extern STAILQ_HEAD(cc_head, cc_algo) cc_list;
extern const int tcprexmtthresh;
@@ -143,6 +168,9 @@
/* Called when data transfer resumes after an idle period. */
void (*after_idle)(struct cc_var *ccv);
+ /* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */
+ int (*ctl_output)(struct cc_var *ccv, struct cc_sockopt *ccsopt);
+
STAILQ_ENTRY (cc_algo) entries;
};
@@ -164,4 +192,6 @@
#define CC_LIST_WUNLOCK() rw_wunlock(&cc_list_lock)
#define CC_LIST_LOCK_ASSERT() rw_assert(&cc_list_lock, RA_LOCKED)
+#endif /* _KERNEL */
+
#endif /* _NETINET_CC_H_ */
Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -161,6 +161,7 @@
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
+#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
#define TCP_KEEPINIT 128 /* N, time to establish connection */
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1303,6 +1303,7 @@
struct tcp_info ti;
char buf[TCP_CA_NAME_MAX];
struct cc_algo *algo;
+ CC_SOCKOPT_DEFINE(ccsopt, sizeof(uint32_t));
error = 0;
inp = sotoinpcb(so);
@@ -1469,6 +1470,30 @@
CC_LIST_RUNLOCK();
goto unlock_and_done;
+ case TCP_CCALGOOPT:
+ INP_WUNLOCK(inp);
+ socklen_t tmp_size = ccsopt->size;
+ error = sooptcopyin(sopt, ccsopt, ccsopt->size,
+ sizeof(struct cc_sockopt));
+ if (error)
+ break;
+ if (ccsopt->size > tmp_size) {
+ /* Kernel ccsopt->sopt_val is too small. */
+ error = EMSGSIZE;
+ break;
+ }
+ INP_WLOCK_RECHECK(inp);
+
+ if (strlen(ccsopt->cc_name) == strlen(CC_ALGO(tp)->name) &&
+ strcmp(ccsopt->cc_name, CC_ALGO(tp)->name) == 0 &&
+ CC_ALGO(tp)->ctl_output) {
+ ccsopt->sopt_dir = SOPT_SET;
+ error = CC_ALGO(tp)->ctl_output(tp->ccv,
+ ccsopt);
+ } else
+ error = ENOENT;
+ goto unlock_and_done;
+
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:
@@ -1576,6 +1601,34 @@
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
break;
+ case TCP_CCALGOOPT:
+ INP_WUNLOCK(inp);
+ socklen_t tmp_size = ccsopt->size;
+ error = sooptcopyin(sopt, ccsopt, ccsopt->size,
+ sizeof(struct cc_sockopt));
+ if (error)
+ break;
+ /*
+ * If userspace ccsopt->sopt_val is larger than the
+ * in-kernel ccsop->sopt_val, use kernel's size.
+ */
+ if (ccsopt->size > tmp_size)
+ CC_SOCKOPT_RESET(ccsopt);
+ INP_WLOCK_RECHECK(inp);
+
+ if (strlen(ccsopt->cc_name) == strlen(CC_ALGO(tp)->name) &&
+ strcmp(ccsopt->cc_name, CC_ALGO(tp)->name) == 0 &&
+ CC_ALGO(tp)->ctl_output) {
+ ccsopt->sopt_dir = SOPT_GET;
+ error = CC_ALGO(tp)->ctl_output(tp->ccv,
+ ccsopt);
+ } else
+ error = ENOENT;
+ INP_WUNLOCK(inp);
+ if (!error)
+ error = sooptcopyout(sopt, ccsopt,
+ ccsopt->size);
+ break;
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:

File Metadata

Mime Type
text/plain
Expires
Wed, Jan 22, 6:44 PM (13 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16035213
Default Alt Text
D711.id1325.diff (8 KB)

Event Timeline