Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F145241187
D33249.id100082.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D33249.id100082.diff
View Options
Index: sys/netinet/cc/cc.h
===================================================================
--- sys/netinet/cc/cc.h
+++ sys/netinet/cc/cc.h
@@ -188,6 +188,7 @@
int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
STAILQ_ENTRY (cc_algo) entries;
+ u_int cc_refcount;
uint8_t flags;
};
@@ -224,5 +225,15 @@
void newreno_cc_cong_signal(struct cc_var *, uint32_t );
void newreno_cc_ack_received(struct cc_var *, uint16_t);
+/* Called to temporarily keep an algo from going away during change */
+void cc_refer(struct cc_algo *algo);
+/* Called to release the temporary hold */
+void cc_release(struct cc_algo *algo);
+
+/* Called to attach a CC algorithm to a tcpcb */
+void cc_attach(struct tcpcb *, struct cc_algo *);
+/* Called to detach a CC algorithm from a tcpcb */
+void cc_detach(struct tcpcb *);
+
#endif /* _KERNEL */
#endif /* _NETINET_CC_CC_H_ */
Index: sys/netinet/cc/cc.c
===================================================================
--- sys/netinet/cc/cc.c
+++ sys/netinet/cc/cc.c
@@ -100,6 +100,45 @@
VNET_DEFINE(uint32_t, newreno_beta) = 50;
#define V_newreno_beta VNET(newreno_beta)
+void
+cc_refer(struct cc_algo *algo)
+{
+ CC_LIST_LOCK_ASSERT();
+ refcount_acquire(&algo->cc_refcount);
+}
+
+void
+cc_release(struct cc_algo *algo)
+{
+ CC_LIST_LOCK_ASSERT();
+ refcount_release(&algo->cc_refcount);
+}
+
+
+void
+cc_attach(struct tcpcb *tp, struct cc_algo *algo)
+{
+ /*
+ * Attach the tcpcb to the algorithm.
+ */
+ CC_LIST_RLOCK();
+ CC_ALGO(tp) = algo;
+ cc_refer(algo);
+ CC_LIST_RUNLOCK();
+}
+
+void
+cc_detach(struct tcpcb *tp)
+{
+ struct cc_algo *algo;
+
+ CC_LIST_RLOCK();
+ algo = CC_ALGO(tp);
+ CC_ALGO(tp) = NULL;
+ cc_release(algo);
+ CC_LIST_RUNLOCK();
+}
+
/*
* Sysctl handler to show and change the default CC algorithm.
*/
@@ -130,6 +169,10 @@
STAILQ_FOREACH(funcs, &cc_list, entries) {
if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
continue;
+ if (funcs->flags & CC_MODULE_BEING_REMOVED) {
+ /* Its being removed, its not eligible */
+ continue;
+ }
V_default_cc_ptr = funcs;
error = 0;
break;
@@ -146,12 +189,12 @@
cc_list_available(SYSCTL_HANDLER_ARGS)
{
struct cc_algo *algo;
- struct sbuf *s;
- int err, first, nalgos;
-
- err = nalgos = 0;
- first = 1;
+ int error, nalgos;
+ int linesz;
+ char *buffer, *cp;
+ size_t bufsz, outsz;
+ error = nalgos = 0;
CC_LIST_RLOCK();
STAILQ_FOREACH(algo, &cc_list, entries) {
nalgos++;
@@ -160,37 +203,34 @@
if (nalgos == 0) {
return (ENOENT);
}
- s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
-
- if (s == NULL)
- return (ENOMEM);
-
- /*
- * It is theoretically possible for the CC list to have grown in size
- * since the call to sbuf_new() and therefore for the sbuf to be too
- * small. If this were to happen (incredibly unlikely), the sbuf will
- * reach an overflow condition, sbuf_printf() will return an error and
- * the sysctl will fail gracefully.
- */
+ bufsz = (nalgos+2) * ((TCP_CA_NAME_MAX + 13) + 1);
+ buffer = malloc(bufsz, M_TEMP, M_WAITOK);
+ cp = buffer;
+
+ linesz = snprintf(cp, bufsz, "\n%-16s%c %s\n", "CCmod", 'D',
+ "PCB count");
+ cp += linesz;
+ bufsz -= linesz;
+ outsz = linesz;
CC_LIST_RLOCK();
STAILQ_FOREACH(algo, &cc_list, entries) {
- err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
- if (err) {
- /* Sbuf overflow condition. */
- err = EOVERFLOW;
+ linesz = snprintf(cp, bufsz, "%-16s%c %u\n",
+ algo->name,
+ (algo == CC_DEFAULT_ALGO()) ? '*' : ' ',
+ algo->cc_refcount);
+ if (linesz >= bufsz) {
+ error = EOVERFLOW;
break;
}
- first = 0;
+ cp += linesz;
+ bufsz -= linesz;
+ outsz += linesz;
}
CC_LIST_RUNLOCK();
-
- if (!err) {
- sbuf_finish(s);
- err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
- }
-
- sbuf_delete(s);
- return (err);
+ if (error == 0)
+ error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
+ free(buffer, M_TEMP);
+ return (error);
}
/*
@@ -236,41 +276,36 @@
int
cc_deregister_algo(struct cc_algo *remove_cc)
{
- struct cc_algo *funcs, *tmpfuncs;
- int err;
-
- err = ENOENT;
+ struct cc_algo *funcs;
+ int found = 0;
/* Remove algo from cc_list so that new connections can't use it. */
CC_LIST_WLOCK();
- STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
- if (funcs == remove_cc) {
- if (cc_check_default(remove_cc)) {
- CC_LIST_WUNLOCK();
- return(EBUSY);
- }
- break;
- }
+
+ /* This is unlikely to fail */
+ STAILQ_FOREACH(funcs, &cc_list, entries) {
+ if (funcs == remove_cc)
+ found = 1;
}
- remove_cc->flags |= CC_MODULE_BEING_REMOVED;
- CC_LIST_WUNLOCK();
- err = tcp_ccalgounload(remove_cc);
- /*
- * Now back through and we either remove the temp flag
- * or pull the registration.
- */
- CC_LIST_WLOCK();
- STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
- if (funcs == remove_cc) {
- if (err == 0)
- STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
- else
- funcs->flags &= ~CC_MODULE_BEING_REMOVED;
- break;
- }
+ if (found == 0) {
+ /* Nothing to remove?? */
+ CC_LIST_WUNLOCK();
+ return (ENOENT);
+ }
+ /* We assert it should have been MOD_QUIESCE'd */
+ KASSERT((remove_cc->flags & CC_MODULE_BEING_REMOVED),
+ ("remove_cc:%p does not have CC_MODULE_BEING_REMOVED flag", remove_cc));
+ if (cc_check_default(remove_cc)) {
+ CC_LIST_WUNLOCK();
+ return(EBUSY);
+ }
+ if (remove_cc->cc_refcount != 0) {
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
}
+ STAILQ_REMOVE(&cc_list, remove_cc, cc_algo, entries);
CC_LIST_WUNLOCK();
- return (err);
+ return (0);
}
/*
@@ -297,6 +332,9 @@
break;
}
}
+ /* Init its reference count */
+ if (err == 0)
+ refcount_init(&add_cc->cc_refcount, 0);
/*
* The first loaded congestion control module will become
* the default until we find the "CC_DEFAULT" defined in
@@ -519,6 +557,28 @@
}
}
+static int
+cc_stop_new_assignments(struct cc_algo *algo)
+{
+ int err;
+
+ CC_LIST_WLOCK();
+ if (cc_check_default(algo)) {
+ /* A default cannot be removed */
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
+ }
+ algo->flags |= CC_MODULE_BEING_REMOVED;
+ CC_LIST_WUNLOCK();
+ err = tcp_ccalgounload(algo);
+ if (err) {
+ CC_LIST_WLOCK();
+ algo->flags &= ~CC_MODULE_BEING_REMOVED;
+ CC_LIST_WUNLOCK();
+ }
+ return (err);
+}
+
/*
* Handles kld related events. Returns 0 on success, non-zero on failure.
*/
@@ -550,14 +610,28 @@
case MOD_QUIESCE:
case MOD_SHUTDOWN:
+ /* Stop any new assigments */
+ err = cc_stop_new_assignments(algo);
+ break;
case MOD_UNLOAD:
+ /*
+ * Deregister and remove the module from the list
+ */
+ CC_LIST_WLOCK();
+ /* Even with -f we can't unload if its the default */
+ if (cc_check_default(algo)) {
+ /* A default cannot be removed */
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
+ }
+ /*
+ * If -f was used and users are still attached to
+ * the algorithm things are going to go boom.
+ */
err = cc_deregister_algo(algo);
if (!err && algo->mod_destroy != NULL)
algo->mod_destroy();
- if (err == ENOENT)
- err = 0;
break;
-
default:
err = EINVAL;
break;
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -2186,7 +2186,7 @@
*/
CC_LIST_RLOCK();
KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
- CC_ALGO(tp) = CC_DEFAULT_ALGO();
+ cc_attach(tp, CC_DEFAULT_ALGO());
CC_LIST_RUNLOCK();
/*
@@ -2198,6 +2198,7 @@
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv, NULL) > 0) {
+ cc_detach(tp);
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
in_pcbrele_wlocked(inp);
@@ -2455,6 +2456,8 @@
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
CC_DATA(tp) = NULL;
+ /* Detach from the CC algorithm */
+ cc_detach(tp);
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1993,7 +1993,7 @@
extern struct cc_algo newreno_cc_algo;
static int
-tcp_congestion(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
+tcp_set_cc_mod(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
{
struct cc_algo *algo;
void *ptr = NULL;
@@ -2021,26 +2021,16 @@
CC_LIST_RUNLOCK();
return(ESRCH);
}
-do_over:
+ /* With a reference the algorithm cannot be removed */
+ cc_refer(algo);
+ CC_LIST_RUNLOCK();
if (algo->cb_init != NULL) {
/* We can now pre-get the memory for the CC */
mem_sz = (*algo->cc_data_sz)();
if (mem_sz == 0) {
goto no_mem_needed;
}
- CC_LIST_RUNLOCK();
ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
- CC_LIST_RLOCK();
- STAILQ_FOREACH(algo, &cc_list, entries)
- if (strncmp(buf, algo->name,
- TCP_CA_NAME_MAX) == 0)
- break;
- if (algo == NULL) {
- if (ptr)
- free(ptr, M_CC_MEM);
- CC_LIST_RUNLOCK();
- return(ESRCH);
- }
} else {
no_mem_needed:
mem_sz = 0;
@@ -2051,22 +2041,16 @@
* back the inplock.
*/
memset(&cc_mem, 0, sizeof(cc_mem));
- if (mem_sz != (*algo->cc_data_sz)()) {
- if (ptr)
- free(ptr, M_CC_MEM);
- goto do_over;
- }
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
INP_WUNLOCK(inp);
- CC_LIST_RUNLOCK();
- free(ptr, M_CC_MEM);
+ if (ptr)
+ free(ptr, M_CC_MEM);
return (ECONNRESET);
}
tp = intotcpcb(inp);
if (ptr != NULL)
memset(ptr, 0, mem_sz);
- CC_LIST_RUNLOCK();
cc_mem.ccvc.tcp = tp;
/*
* We once again hold a write lock over the tcb so it's
@@ -2091,8 +2075,9 @@
*/
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
+ cc_detach(tp);
memcpy(tp->ccv, &cc_mem, sizeof(struct cc_var));
- tp->cc_algo = algo;
+ cc_attach(tp, algo);
/* Ok now are we where we have gotten past any conn_init? */
if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
/* Yep run the connection init for the new CC */
@@ -2101,6 +2086,10 @@
} else if (ptr)
free(ptr, M_CC_MEM);
INP_WUNLOCK(inp);
+ /* Now lets release our reference */
+ CC_LIST_RLOCK();
+ cc_release(algo);
+ CC_LIST_RUNLOCK();
return (error);
}
@@ -2319,7 +2308,7 @@
break;
case TCP_CONGESTION:
- error = tcp_congestion(so, sopt, inp, tp);
+ error = tcp_set_cc_mod(so, sopt, inp, tp);
break;
case TCP_REUSPORT_LB_NUMA:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Feb 18, 12:04 PM (4 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28846165
Default Alt Text
D33249.id100082.diff (10 KB)
Attached To
Mode
D33249: tcp: Congestion control move to using reference counting.
Attached
Detach File
Event Timeline
Log In to Comment