Page MenuHomeFreeBSD

D33249.id100082.diff
No OneTemporary

D33249.id100082.diff

Index: sys/netinet/cc/cc.h
===================================================================
--- sys/netinet/cc/cc.h
+++ sys/netinet/cc/cc.h
@@ -188,6 +188,7 @@
int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
STAILQ_ENTRY (cc_algo) entries;
+ u_int cc_refcount;
uint8_t flags;
};
@@ -224,5 +225,15 @@
void newreno_cc_cong_signal(struct cc_var *, uint32_t );
void newreno_cc_ack_received(struct cc_var *, uint16_t);
+/* Called to temporarily keep an algo from going away during change */
+void cc_refer(struct cc_algo *algo);
+/* Called to release the temporary hold */
+void cc_release(struct cc_algo *algo);
+
+/* Called to attach a CC algorithm to a tcpcb */
+void cc_attach(struct tcpcb *, struct cc_algo *);
+/* Called to detach a CC algorithm from a tcpcb */
+void cc_detach(struct tcpcb *);
+
#endif /* _KERNEL */
#endif /* _NETINET_CC_CC_H_ */
Index: sys/netinet/cc/cc.c
===================================================================
--- sys/netinet/cc/cc.c
+++ sys/netinet/cc/cc.c
@@ -100,6 +100,45 @@
VNET_DEFINE(uint32_t, newreno_beta) = 50;
#define V_newreno_beta VNET(newreno_beta)
+void
+cc_refer(struct cc_algo *algo)
+{
+ CC_LIST_LOCK_ASSERT();
+ refcount_acquire(&algo->cc_refcount);
+}
+
+void
+cc_release(struct cc_algo *algo)
+{
+ CC_LIST_LOCK_ASSERT();
+ refcount_release(&algo->cc_refcount);
+}
+
+
+void
+cc_attach(struct tcpcb *tp, struct cc_algo *algo)
+{
+ /*
+ * Attach the tcpcb to the algorithm.
+ */
+ CC_LIST_RLOCK();
+ CC_ALGO(tp) = algo;
+ cc_refer(algo);
+ CC_LIST_RUNLOCK();
+}
+
+void
+cc_detach(struct tcpcb *tp)
+{
+ struct cc_algo *algo;
+
+ CC_LIST_RLOCK();
+ algo = CC_ALGO(tp);
+ CC_ALGO(tp) = NULL;
+ cc_release(algo);
+ CC_LIST_RUNLOCK();
+}
+
/*
* Sysctl handler to show and change the default CC algorithm.
*/
@@ -130,6 +169,10 @@
STAILQ_FOREACH(funcs, &cc_list, entries) {
if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
continue;
+ if (funcs->flags & CC_MODULE_BEING_REMOVED) {
+ /* Its being removed, its not eligible */
+ continue;
+ }
V_default_cc_ptr = funcs;
error = 0;
break;
@@ -146,12 +189,12 @@
cc_list_available(SYSCTL_HANDLER_ARGS)
{
struct cc_algo *algo;
- struct sbuf *s;
- int err, first, nalgos;
-
- err = nalgos = 0;
- first = 1;
+ int error, nalgos;
+ int linesz;
+ char *buffer, *cp;
+ size_t bufsz, outsz;
+ error = nalgos = 0;
CC_LIST_RLOCK();
STAILQ_FOREACH(algo, &cc_list, entries) {
nalgos++;
@@ -160,37 +203,34 @@
if (nalgos == 0) {
return (ENOENT);
}
- s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
-
- if (s == NULL)
- return (ENOMEM);
-
- /*
- * It is theoretically possible for the CC list to have grown in size
- * since the call to sbuf_new() and therefore for the sbuf to be too
- * small. If this were to happen (incredibly unlikely), the sbuf will
- * reach an overflow condition, sbuf_printf() will return an error and
- * the sysctl will fail gracefully.
- */
+ bufsz = (nalgos+2) * ((TCP_CA_NAME_MAX + 13) + 1);
+ buffer = malloc(bufsz, M_TEMP, M_WAITOK);
+ cp = buffer;
+
+ linesz = snprintf(cp, bufsz, "\n%-16s%c %s\n", "CCmod", 'D',
+ "PCB count");
+ cp += linesz;
+ bufsz -= linesz;
+ outsz = linesz;
CC_LIST_RLOCK();
STAILQ_FOREACH(algo, &cc_list, entries) {
- err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
- if (err) {
- /* Sbuf overflow condition. */
- err = EOVERFLOW;
+ linesz = snprintf(cp, bufsz, "%-16s%c %u\n",
+ algo->name,
+ (algo == CC_DEFAULT_ALGO()) ? '*' : ' ',
+ algo->cc_refcount);
+ if (linesz >= bufsz) {
+ error = EOVERFLOW;
break;
}
- first = 0;
+ cp += linesz;
+ bufsz -= linesz;
+ outsz += linesz;
}
CC_LIST_RUNLOCK();
-
- if (!err) {
- sbuf_finish(s);
- err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
- }
-
- sbuf_delete(s);
- return (err);
+ if (error == 0)
+ error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
+ free(buffer, M_TEMP);
+ return (error);
}
/*
@@ -236,41 +276,36 @@
int
cc_deregister_algo(struct cc_algo *remove_cc)
{
- struct cc_algo *funcs, *tmpfuncs;
- int err;
-
- err = ENOENT;
+ struct cc_algo *funcs;
+ int found = 0;
/* Remove algo from cc_list so that new connections can't use it. */
CC_LIST_WLOCK();
- STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
- if (funcs == remove_cc) {
- if (cc_check_default(remove_cc)) {
- CC_LIST_WUNLOCK();
- return(EBUSY);
- }
- break;
- }
+
+ /* This is unlikely to fail */
+ STAILQ_FOREACH(funcs, &cc_list, entries) {
+ if (funcs == remove_cc)
+ found = 1;
}
- remove_cc->flags |= CC_MODULE_BEING_REMOVED;
- CC_LIST_WUNLOCK();
- err = tcp_ccalgounload(remove_cc);
- /*
- * Now back through and we either remove the temp flag
- * or pull the registration.
- */
- CC_LIST_WLOCK();
- STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
- if (funcs == remove_cc) {
- if (err == 0)
- STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
- else
- funcs->flags &= ~CC_MODULE_BEING_REMOVED;
- break;
- }
+ if (found == 0) {
+ /* Nothing to remove?? */
+ CC_LIST_WUNLOCK();
+ return (ENOENT);
+ }
+ /* We assert it should have been MOD_QUIESCE'd */
+ KASSERT((remove_cc->flags & CC_MODULE_BEING_REMOVED),
+ ("remove_cc:%p does not have CC_MODULE_BEING_REMOVED flag", remove_cc));
+ if (cc_check_default(remove_cc)) {
+ CC_LIST_WUNLOCK();
+ return(EBUSY);
+ }
+ if (remove_cc->cc_refcount != 0) {
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
}
+ STAILQ_REMOVE(&cc_list, remove_cc, cc_algo, entries);
CC_LIST_WUNLOCK();
- return (err);
+ return (0);
}
/*
@@ -297,6 +332,9 @@
break;
}
}
+ /* Init its reference count */
+ if (err == 0)
+ refcount_init(&add_cc->cc_refcount, 0);
/*
* The first loaded congestion control module will become
* the default until we find the "CC_DEFAULT" defined in
@@ -519,6 +557,28 @@
}
}
+static int
+cc_stop_new_assignments(struct cc_algo *algo)
+{
+ int err;
+
+ CC_LIST_WLOCK();
+ if (cc_check_default(algo)) {
+ /* A default cannot be removed */
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
+ }
+ algo->flags |= CC_MODULE_BEING_REMOVED;
+ CC_LIST_WUNLOCK();
+ err = tcp_ccalgounload(algo);
+ if (err) {
+ CC_LIST_WLOCK();
+ algo->flags &= ~CC_MODULE_BEING_REMOVED;
+ CC_LIST_WUNLOCK();
+ }
+ return (err);
+}
+
/*
* Handles kld related events. Returns 0 on success, non-zero on failure.
*/
@@ -550,14 +610,28 @@
case MOD_QUIESCE:
case MOD_SHUTDOWN:
+ /* Stop any new assigments */
+ err = cc_stop_new_assignments(algo);
+ break;
case MOD_UNLOAD:
+ /*
+ * Deregister and remove the module from the list
+ */
+ CC_LIST_WLOCK();
+ /* Even with -f we can't unload if its the default */
+ if (cc_check_default(algo)) {
+ /* A default cannot be removed */
+ CC_LIST_WUNLOCK();
+ return (EBUSY);
+ }
+ /*
+ * If -f was used and users are still attached to
+ * the algorithm things are going to go boom.
+ */
err = cc_deregister_algo(algo);
if (!err && algo->mod_destroy != NULL)
algo->mod_destroy();
- if (err == ENOENT)
- err = 0;
break;
-
default:
err = EINVAL;
break;
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -2186,7 +2186,7 @@
*/
CC_LIST_RLOCK();
KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
- CC_ALGO(tp) = CC_DEFAULT_ALGO();
+ cc_attach(tp, CC_DEFAULT_ALGO());
CC_LIST_RUNLOCK();
/*
@@ -2198,6 +2198,7 @@
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv, NULL) > 0) {
+ cc_detach(tp);
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
in_pcbrele_wlocked(inp);
@@ -2455,6 +2456,8 @@
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
CC_DATA(tp) = NULL;
+ /* Detach from the CC algorithm */
+ cc_detach(tp);
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1993,7 +1993,7 @@
extern struct cc_algo newreno_cc_algo;
static int
-tcp_congestion(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
+tcp_set_cc_mod(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
{
struct cc_algo *algo;
void *ptr = NULL;
@@ -2021,26 +2021,16 @@
CC_LIST_RUNLOCK();
return(ESRCH);
}
-do_over:
+ /* With a reference the algorithm cannot be removed */
+ cc_refer(algo);
+ CC_LIST_RUNLOCK();
if (algo->cb_init != NULL) {
/* We can now pre-get the memory for the CC */
mem_sz = (*algo->cc_data_sz)();
if (mem_sz == 0) {
goto no_mem_needed;
}
- CC_LIST_RUNLOCK();
ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
- CC_LIST_RLOCK();
- STAILQ_FOREACH(algo, &cc_list, entries)
- if (strncmp(buf, algo->name,
- TCP_CA_NAME_MAX) == 0)
- break;
- if (algo == NULL) {
- if (ptr)
- free(ptr, M_CC_MEM);
- CC_LIST_RUNLOCK();
- return(ESRCH);
- }
} else {
no_mem_needed:
mem_sz = 0;
@@ -2051,22 +2041,16 @@
* back the inplock.
*/
memset(&cc_mem, 0, sizeof(cc_mem));
- if (mem_sz != (*algo->cc_data_sz)()) {
- if (ptr)
- free(ptr, M_CC_MEM);
- goto do_over;
- }
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
INP_WUNLOCK(inp);
- CC_LIST_RUNLOCK();
- free(ptr, M_CC_MEM);
+ if (ptr)
+ free(ptr, M_CC_MEM);
return (ECONNRESET);
}
tp = intotcpcb(inp);
if (ptr != NULL)
memset(ptr, 0, mem_sz);
- CC_LIST_RUNLOCK();
cc_mem.ccvc.tcp = tp;
/*
* We once again hold a write lock over the tcb so it's
@@ -2091,8 +2075,9 @@
*/
if (CC_ALGO(tp)->cb_destroy != NULL)
CC_ALGO(tp)->cb_destroy(tp->ccv);
+ cc_detach(tp);
memcpy(tp->ccv, &cc_mem, sizeof(struct cc_var));
- tp->cc_algo = algo;
+ cc_attach(tp, algo);
/* Ok now are we where we have gotten past any conn_init? */
if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
/* Yep run the connection init for the new CC */
@@ -2101,6 +2086,10 @@
} else if (ptr)
free(ptr, M_CC_MEM);
INP_WUNLOCK(inp);
+ /* Now lets release our reference */
+ CC_LIST_RLOCK();
+ cc_release(algo);
+ CC_LIST_RUNLOCK();
return (error);
}
@@ -2319,7 +2308,7 @@
break;
case TCP_CONGESTION:
- error = tcp_congestion(so, sopt, inp, tp);
+ error = tcp_set_cc_mod(so, sopt, inp, tp);
break;
case TCP_REUSPORT_LB_NUMA:

File Metadata

Mime Type
text/plain
Expires
Wed, Feb 18, 12:04 PM (4 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28846165
Default Alt Text
D33249.id100082.diff (10 KB)

Event Timeline