Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F153328420
D33249.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
D33249.diff
View Options
diff --git a/sys/netinet/cc/cc.h b/sys/netinet/cc/cc.h
--- a/sys/netinet/cc/cc.h
+++ b/sys/netinet/cc/cc.h
@@ -200,6 +200,7 @@
int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
STAILQ_ENTRY (cc_algo) entries;
+ u_int cc_refcount;
uint8_t flags;
};
@@ -236,5 +237,15 @@
void newreno_cc_cong_signal(struct cc_var *, uint32_t );
void newreno_cc_ack_received(struct cc_var *, uint16_t);
+/* Called to temporarily keep an algo from going away during change */
+void cc_refer(struct cc_algo *algo);
+/* Called to release the temporary hold */
+void cc_release(struct cc_algo *algo);
+
+/* Called to attach a CC algorithm to a tcpcb */
+void cc_attach(struct tcpcb *, struct cc_algo *);
+/* Called to detach a CC algorithm from a tcpcb */
+void cc_detach(struct tcpcb *);
+
#endif /* _KERNEL */
#endif /* _NETINET_CC_CC_H_ */
diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c
--- a/sys/netinet/cc/cc.c
+++ b/sys/netinet/cc/cc.c
@@ -107,6 +107,45 @@
VNET_DEFINE(uint32_t, newreno_beta) = 50;
#define V_newreno_beta VNET(newreno_beta)
+void
+cc_refer(struct cc_algo *algo)
+{
+ CC_LIST_LOCK_ASSERT();
+ refcount_acquire(&algo->cc_refcount);
+}
+
+void
+cc_release(struct cc_algo *algo)
+{
+ CC_LIST_LOCK_ASSERT();
+ refcount_release(&algo->cc_refcount);
+}
+
+
+void
+cc_attach(struct tcpcb *tp, struct cc_algo *algo)
+{
+ /*
+ * Attach the tcpcb to the algorithm.
+ */
+ CC_LIST_RLOCK();
+ CC_ALGO(tp) = algo;
+ cc_refer(algo);
+ CC_LIST_RUNLOCK();
+}
+
+void
+cc_detach(struct tcpcb *tp)
+{
+ struct cc_algo *algo;
+
+ CC_LIST_RLOCK();
+ algo = CC_ALGO(tp);
+ CC_ALGO(tp) = NULL;
+ cc_release(algo);
+ CC_LIST_RUNLOCK();
+}
+
/*
* Sysctl handler to show and change the default CC algorithm.
*/
@@ -137,6 +176,10 @@
STAILQ_FOREACH(funcs, &cc_list, entries) {
if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
continue;
+ if (funcs->flags & CC_MODULE_BEING_REMOVED) {
+ /* Its being removed, its not eligible */
+ continue;
+ }
V_default_cc_ptr = funcs;
error = 0;
break;
@@ -153,12 +196,12 @@
cc_list_available(SYSCTL_HANDLER_ARGS)
{
struct cc_algo *algo;
- struct sbuf *s;
- int err, first, nalgos;
-
- err = nalgos = 0;
- first = 1;
+ int error, nalgos;
+ int linesz;
+ char *buffer, *cp;
+ size_t bufsz, outsz;
+ error = nalgos = 0;
CC_LIST_RLOCK();
STAILQ_FOREACH(algo, &cc_list, entries) {
nalgos++;
@@ -167,37 +210,34 @@
if (nalgos == 0) {
return (ENOENT);
}
- s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
-
- if (s == NULL)
- return (ENOMEM);
-
- /*
- * It is theoretically possible for the CC list to have grown in size
- * since the call to sbuf_new() and therefore for the sbuf to be too
- * small. If this were to happen (incredibly unlikely), the sbuf will
- * reach an overflow condition, sbuf_printf() will return an error and
- * the sysctl will fail gracefully.
- */
+ bufsz = (nalgos+2) * ((TCP_CA_NAME_MAX + 13) + 1);
+ buffer = malloc(bufsz, M_TEMP, M_WAITOK);
+ cp = buffer;
+
+ linesz = snprintf(cp, bufsz, "\n%-16s%c %s\n", "CCmod", 'D',
+ "PCB count");
+ cp += linesz;
+ bufsz -= linesz;
+ outsz = linesz;
CC_LIST_RLOCK();
STAILQ_FOREACH(algo, &cc_list, entries) {
- err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
- if (err) {
- /* Sbuf overflow condition. */
- err = EOVERFLOW;
+ linesz = snprintf(cp, bufsz, "%-16s%c %u\n",
+ algo->name,
+ (algo == CC_DEFAULT_ALGO()) ? '*' : ' ',
+ algo->cc_refcount);
+ if (linesz >= bufsz) {
+ error = EOVERFLOW;
break;
}
- first = 0;
+ cp += linesz;
+ bufsz -= linesz;
+ outsz += linesz;
}
CC_LIST_RUNLOCK();
-
- if (!err) {
- sbuf_finish(s);
- err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
- }
-
- sbuf_delete(s);
- return (err);
+ if (error == 0)
+ error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
+ free(buffer, M_TEMP);
+ return (error);
}
/*
@@ -243,41 +283,36 @@
int
cc_deregister_algo(struct cc_algo *remove_cc)
{
- struct cc_algo *funcs, *tmpfuncs;
- int err;
-
- err = ENOENT;
+ struct cc_algo *funcs;
+ int found = 0;
/* Remove algo from cc_list so that new connections can't use it. */
CC_LIST_WLOCK();
- STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
- if (funcs == remove_cc) {
- if (cc_check_default(remove_cc)) {
- CC_LIST_WUNLOCK();
- return(EBUSY);
- }
- break;
- }
+
+ /* This is unlikely to fail */
+ STAILQ_FOREACH(funcs, &cc_list, entries) {
+ if (funcs == remove_cc)
+ found = 1;
}
- remove_cc->flags |= CC_MODULE_BEING_REMOVED;
- CC_LIST_WUNLOCK();
- err = tcp_ccalgounload(remove_cc);
- /*
- * Now back through and we either remove the temp flag
- * or pull the registration.
- */
- CC_LIST_WLOCK();
- STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
- if (funcs == remove_cc) {
- if (err == 0)
- STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
- else
- funcs->flags &= ~CC_MODULE_BEING_REMOVED;
- break;
- }
+ if (found == 0) {
+ /* Nothing to remove? */
+ CC_LIST_WUNLOCK();
+ return (ENOENT);
+ }
+ /* We assert it should have been MOD_QUIESCE'd */
+ KASSERT((remove_cc->flags & CC_MODULE_BEING_REMOVED),
+ ("remove_cc:%p does not have CC_MODULE_BEING_REMOVED flag", remove_cc));
+ if (cc_check_default(remove_cc)) {
+ CC_LIST_WUNLOCK();
+ return(EBUSY);
+ }
+ if (remove_cc->cc_refcount != 0) {
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
}
+ STAILQ_REMOVE(&cc_list, remove_cc, cc_algo, entries);
CC_LIST_WUNLOCK();
- return (err);
+ return (0);
}
/*
@@ -304,6 +339,9 @@
break;
}
}
+ /* Init its reference count */
+ if (err == 0)
+ refcount_init(&add_cc->cc_refcount, 0);
/*
* The first loaded congestion control module will become
* the default until we find the "CC_DEFAULT" defined in
@@ -526,6 +564,20 @@
}
}
+static int
+cc_stop_new_assignments(struct cc_algo *algo)
+{
+ CC_LIST_WLOCK();
+ if (cc_check_default(algo)) {
+ /* A default cannot be removed */
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
+ }
+ algo->flags |= CC_MODULE_BEING_REMOVED;
+ CC_LIST_WUNLOCK();
+ return (0);
+}
+
/*
* Handles kld related events. Returns 0 on success, non-zero on failure.
*/
@@ -555,16 +607,32 @@
err = cc_register_algo(algo);
break;
- case MOD_QUIESCE:
case MOD_SHUTDOWN:
+ break;
+ case MOD_QUIESCE:
+ /* Stop any new assigments */
+ err = cc_stop_new_assignments(algo);
+ break;
case MOD_UNLOAD:
+ /*
+ * Deregister and remove the module from the list
+ */
+ CC_LIST_WLOCK();
+ /* Even with -f we can't unload if its the default */
+ if (cc_check_default(algo)) {
+ /* A default cannot be removed */
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
+ }
+ /*
+ * If -f was used and users are still attached to
+ * the algorithm things are going to go boom.
+ */
err = cc_deregister_algo(algo);
- if (!err && algo->mod_destroy != NULL)
+ if ((err == 0) && (algo->mod_destroy != NULL)) {
algo->mod_destroy();
- if (err == ENOENT)
- err = 0;
+ }
break;
-
default:
err = EINVAL;
break;
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2171,10 +2171,7 @@
/*
* Use the current system default CC algorithm.
*/
- CC_LIST_RLOCK();
- KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
- CC_ALGO(tp) = CC_DEFAULT_ALGO();
- CC_LIST_RUNLOCK();
+ cc_attach(tp, CC_DEFAULT_ALGO());
/*
* The tcpcb will hold a reference on its inpcb until tcp_discardcb()
@@ -2185,6 +2182,7 @@
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv, NULL) > 0) {
+ cc_detach(tp);
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
in_pcbrele_wlocked(inp);
@@ -2276,93 +2274,6 @@
return (tp); /* XXX */
}
-/*
- * Switch the congestion control algorithm back to Vnet default for any active
- * control blocks using an algorithm which is about to go away. If the algorithm
- * has a cb_init function and it fails (no memory) then the operation fails and
- * the unload will not succeed.
- *
- */
-int
-tcp_ccalgounload(struct cc_algo *unload_algo)
-{
- struct cc_algo *oldalgo, *newalgo;
- struct inpcb *inp;
- struct tcpcb *tp;
- VNET_ITERATOR_DECL(vnet_iter);
-
- /*
- * Check all active control blocks across all network stacks and change
- * any that are using "unload_algo" back to its default. If "unload_algo"
- * requires cleanup code to be run, call it.
- */
- VNET_LIST_RLOCK();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
- struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_tcbinfo,
- INPLOOKUP_WLOCKPCB);
- /*
- * XXXGL: would new accept(2)d connections use algo being
- * unloaded?
- */
- newalgo = CC_DEFAULT_ALGO();
- while ((inp = inp_next(&inpi)) != NULL) {
- /* Important to skip tcptw structs. */
- if (!(inp->inp_flags & INP_TIMEWAIT) &&
- (tp = intotcpcb(inp)) != NULL) {
- /*
- * By holding INP_WLOCK here, we are assured
- * that the connection is not currently
- * executing inside the CC module's functions.
- * We attempt to switch to the Vnets default,
- * if the init fails then we fail the whole
- * operation and the module unload will fail.
- */
- if (CC_ALGO(tp) == unload_algo) {
- struct cc_var cc_mem;
- int err;
-
- oldalgo = CC_ALGO(tp);
- memset(&cc_mem, 0, sizeof(cc_mem));
- cc_mem.ccvc.tcp = tp;
- if (newalgo->cb_init == NULL) {
- /*
- * No init we can skip the
- * dance around a possible failure.
- */
- CC_DATA(tp) = NULL;
- goto proceed;
- }
- err = (newalgo->cb_init)(&cc_mem, NULL);
- if (err) {
- /*
- * Presumably no memory the caller will
- * need to try again.
- */
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- VNET_LIST_RUNLOCK();
- return (err);
- }
-proceed:
- if (oldalgo->cb_destroy != NULL)
- oldalgo->cb_destroy(tp->ccv);
- CC_ALGO(tp) = newalgo;
- memcpy(tp->ccv, &cc_mem, sizeof(struct cc_var));
- if (TCPS_HAVEESTABLISHED(tp->t_state) &&
- (CC_ALGO(tp)->conn_init != NULL)) {
- /* Yep run the connection init for the new CC */
- CC_ALGO(tp)->conn_init(tp->ccv);
- }
- }
- }
- }
- CURVNET_RESTORE();
- }
- VNET_LIST_RUNLOCK();
- return (0);
-}
-
/*
* Drop a TCP connection, reporting
* the specified error. If connection is synchronized,
@@ -2443,6 +2354,8 @@
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
CC_DATA(tp) = NULL;
+ /* Detach from the CC algorithm */
+ cc_detach(tp);
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2004,7 +2004,7 @@
extern struct cc_algo newreno_cc_algo;
static int
-tcp_congestion(struct inpcb *inp, struct sockopt *sopt)
+tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
{
struct cc_algo *algo;
void *ptr = NULL;
@@ -2020,7 +2020,7 @@
return(error);
buf[sopt->sopt_valsize] = '\0';
CC_LIST_RLOCK();
- STAILQ_FOREACH(algo, &cc_list, entries)
+ STAILQ_FOREACH(algo, &cc_list, entries) {
if (strncmp(buf, algo->name,
TCP_CA_NAME_MAX) == 0) {
if (algo->flags & CC_MODULE_BEING_REMOVED) {
@@ -2029,30 +2029,24 @@
}
break;
}
+ }
if (algo == NULL) {
CC_LIST_RUNLOCK();
return(ESRCH);
}
-do_over:
+ /*
+ * With a reference the algorithm cannot be removed
+ * so we hold a reference through the change process.
+ */
+ cc_refer(algo);
+ CC_LIST_RUNLOCK();
if (algo->cb_init != NULL) {
/* We can now pre-get the memory for the CC */
mem_sz = (*algo->cc_data_sz)();
if (mem_sz == 0) {
goto no_mem_needed;
}
- CC_LIST_RUNLOCK();
ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
- CC_LIST_RLOCK();
- STAILQ_FOREACH(algo, &cc_list, entries)
- if (strncmp(buf, algo->name,
- TCP_CA_NAME_MAX) == 0)
- break;
- if (algo == NULL) {
- if (ptr)
- free(ptr, M_CC_MEM);
- CC_LIST_RUNLOCK();
- return(ESRCH);
- }
} else {
no_mem_needed:
mem_sz = 0;
@@ -2063,22 +2057,20 @@
* back the inplock.
*/
memset(&cc_mem, 0, sizeof(cc_mem));
- if (mem_sz != (*algo->cc_data_sz)()) {
- if (ptr)
- free(ptr, M_CC_MEM);
- goto do_over;
- }
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
INP_WUNLOCK(inp);
+ if (ptr)
+ free(ptr, M_CC_MEM);
+ /* Release our temp reference */
+ CC_LIST_RLOCK();
+ cc_release(algo);
CC_LIST_RUNLOCK();
- free(ptr, M_CC_MEM);
return (ECONNRESET);
}
tp = intotcpcb(inp);
if (ptr != NULL)
memset(ptr, 0, mem_sz);
- CC_LIST_RUNLOCK();
cc_mem.ccvc.tcp = tp;
/*
* We once again hold a write lock over the tcb so it's
@@ -2103,8 +2095,12 @@
*/
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
+ /* Detach the old CC from the tcpcb */
+ cc_detach(tp);
+ /* Copy in our temp memory that was inited */
memcpy(tp->ccv, &cc_mem, sizeof(struct cc_var));
- tp->cc_algo = algo;
+ /* Now attach the new, which takes a reference */
+ cc_attach(tp, algo);
/* Ok now are we where we have gotten past any conn_init? */
if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
/* Yep run the connection init for the new CC */
@@ -2113,6 +2109,10 @@
} else if (ptr)
free(ptr, M_CC_MEM);
INP_WUNLOCK(inp);
+ /* Now lets release our temp reference */
+ CC_LIST_RLOCK();
+ cc_release(algo);
+ CC_LIST_RUNLOCK();
return (error);
}
@@ -2336,7 +2336,7 @@
break;
case TCP_CONGESTION:
- error = tcp_congestion(inp, sopt);
+ error = tcp_set_cc_mod(inp, sopt);
break;
case TCP_REUSPORT_LB_NUMA:
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1076,7 +1076,6 @@
#endif
int tcp_addoptions(struct tcpopt *, u_char *);
-int tcp_ccalgounload(struct cc_algo *unload_algo);
struct tcpcb *
tcp_close(struct tcpcb *);
void tcp_discardcb(struct tcpcb *);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Apr 21, 12:30 PM (9 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31910442
Default Alt Text
D33249.diff (13 KB)
Attached To
Mode
D33249: tcp: Congestion control move to using reference counting.
Attached
Detach File
Event Timeline
Log In to Comment