Page MenuHomeFreeBSD

D4055.diff
No OneTemporary

D4055.diff

Index: head/sys/modules/Makefile
===================================================================
--- head/sys/modules/Makefile
+++ head/sys/modules/Makefile
@@ -346,6 +346,7 @@
${_syscons} \
sysvipc \
${_ti} \
+ tcp/fastpath \
tests/framework \
tests/callout_test \
tl \
Index: head/sys/netinet/tcp.h
===================================================================
--- head/sys/netinet/tcp.h
+++ head/sys/netinet/tcp.h
@@ -167,7 +167,7 @@
#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
#define TCP_PCAP_OUT 2048 /* number of output packets to keep */
#define TCP_PCAP_IN 4096 /* number of input packets to keep */
-
+#define TCP_FUNCTION_BLK 8192 /* Set the tcp function pointers to the specified stack */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
@@ -245,5 +245,11 @@
u_int32_t __tcpi_pad[26]; /* Padding. */
};
#endif
+#define TCP_FUNCTION_NAME_LEN_MAX 32
+
+struct tcp_function_set {
+ char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
+ uint32_t pcbcnt;
+};
#endif /* !_NETINET_TCP_H_ */
Index: head/sys/netinet/tcp_input.c
===================================================================
--- head/sys/netinet/tcp_input.c
+++ head/sys/netinet/tcp_input.c
@@ -230,23 +230,6 @@
#define tcb6 tcb /* for KAME src sync over BSD*'s */
VNET_DEFINE(struct inpcbinfo, tcbinfo);
-static void tcp_dooptions(struct tcpopt *, u_char *, int, int);
-static void tcp_do_segment(struct mbuf *, struct tcphdr *,
- struct socket *, struct tcpcb *, int, int, uint8_t,
- int);
-static void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
- struct tcpcb *, int, int);
-static void tcp_pulloutofband(struct socket *,
- struct tcphdr *, struct mbuf *, int);
-static void tcp_xmit_timer(struct tcpcb *, int);
-static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
-static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
- uint16_t type);
-static void inline cc_conn_init(struct tcpcb *tp);
-static void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
-static void inline hhook_run_tcp_est_in(struct tcpcb *tp,
- struct tcphdr *th, struct tcpopt *to);
-
/*
* TCP statistics are stored in an "array" of counter(9)s.
*/
@@ -272,7 +255,7 @@
/*
* Wrapper for the TCP established input helper hook.
*/
-static void inline
+void
hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
{
struct tcp_hhook_data hhook_data;
@@ -290,7 +273,7 @@
/*
* CC wrapper hook functions
*/
-static void inline
+void
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
{
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -322,7 +305,7 @@
}
}
-static void inline
+void
cc_conn_init(struct tcpcb *tp)
{
struct hc_metrics_lite metrics;
@@ -446,7 +429,7 @@
}
}
-static void inline
+void inline
cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
{
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -601,9 +584,6 @@
struct tcpopt to; /* options in this segment */
char *s = NULL; /* address and port logging */
int ti_locked;
-#define TI_UNLOCKED 1
-#define TI_RLOCKED 2
-
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -1175,7 +1155,7 @@
* contains. tcp_do_segment() consumes
* the mbuf chain and unlocks the inpcb.
*/
- tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
+ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
iptos, ti_locked);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
@@ -1421,7 +1401,7 @@
* state. tcp_do_segment() always consumes the mbuf chain, unlocks
* the inpcb, and unlocks pcbinfo.
*/
- tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
+ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
@@ -1476,7 +1456,7 @@
return (IPPROTO_DONE);
}
-static void
+void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
int ti_locked)
@@ -1788,7 +1768,7 @@
tp->t_rxtcur);
sowwakeup(so);
if (sbavail(&so->so_snd))
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto check_delack;
}
} else if (th->th_ack == tp->snd_una &&
@@ -1907,7 +1887,7 @@
tp->t_flags |= TF_DELACK;
} else {
tp->t_flags |= TF_ACKNOW;
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
goto check_delack;
}
@@ -2522,7 +2502,7 @@
}
} else
tp->snd_cwnd += tp->t_maxseg;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto drop;
} else if (tp->t_dupacks == tcprexmtthresh) {
tcp_seq onxt = tp->snd_nxt;
@@ -2556,12 +2536,12 @@
tcps_sack_recovery_episode);
tp->sack_newdata = tp->snd_nxt;
tp->snd_cwnd = tp->t_maxseg;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto drop;
}
tp->snd_nxt = th->th_ack;
tp->snd_cwnd = tp->t_maxseg;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
KASSERT(tp->snd_limited <= 2,
("%s: tp->snd_limited too big",
__func__));
@@ -2608,7 +2588,7 @@
(tp->snd_nxt - tp->snd_una);
SOCKBUF_UNLOCK(&so->so_snd);
if (avail > 0)
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
sent = tp->snd_max - oldsndmax;
if (sent > tp->t_maxseg) {
KASSERT((tp->t_dupacks == 2 &&
@@ -3074,7 +3054,7 @@
* Return any desired output.
*/
if (needoutput || (tp->t_flags & TF_ACKNOW))
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
check_delack:
KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
@@ -3122,7 +3102,7 @@
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(tp->t_inpcb);
m_freem(m);
return;
@@ -3168,7 +3148,7 @@
* The mbuf must still include the original packet header.
* tp may be NULL.
*/
-static void
+void
tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
int tlen, int rstreason)
{
@@ -3231,7 +3211,7 @@
/*
* Parse TCP options and place in tcpopt.
*/
-static void
+void
tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
{
int opt, optlen;
@@ -3325,7 +3305,7 @@
* It is still reflected in the segment length for
* sequencing purposes.
*/
-static void
+void
tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
int off)
{
@@ -3358,7 +3338,7 @@
* Collect new round-trip time estimate
* and update averages and current timeout.
*/
-static void
+void
tcp_xmit_timer(struct tcpcb *tp, int rtt)
{
int delta;
@@ -3738,7 +3718,7 @@
* By setting snd_nxt to ti_ack, this forces retransmission timer to
* be started again.
*/
-static void
+void
tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
{
tcp_seq onxt = tp->snd_nxt;
@@ -3755,7 +3735,7 @@
*/
tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th);
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
tp->snd_cwnd = ocwnd;
if (SEQ_GT(onxt, tp->snd_nxt))
tp->snd_nxt = onxt;
Index: head/sys/netinet/tcp_sack.c
===================================================================
--- head/sys/netinet/tcp_sack.c
+++ head/sys/netinet/tcp_sack.c
@@ -599,7 +599,7 @@
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
}
#if 0
Index: head/sys/netinet/tcp_subr.c
===================================================================
--- head/sys/netinet/tcp_subr.c
+++ head/sys/netinet/tcp_subr.c
@@ -47,6 +47,7 @@
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
+#include <sys/refcount.h>
#include <sys/mbuf.h>
#ifdef INET6
#include <sys/domain.h>
@@ -125,6 +126,8 @@
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
#endif
+struct rwlock tcp_function_lock;
+
static int
sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
{
@@ -236,6 +239,179 @@
void *ip4hdr, const void *ip6hdr);
static void tcp_timer_discard(struct tcpcb *, uint32_t);
+
+static struct tcp_function_block tcp_def_funcblk = {
+ "default",
+ tcp_output,
+ tcp_do_segment,
+ tcp_default_ctloutput,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ 0,
+ 0
+};
+
+struct tcp_funchead t_functions;
+static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
+
+static struct tcp_function_block *
+find_tcp_functions_locked(struct tcp_function_set *fs)
+{
+ struct tcp_function *f;
+ struct tcp_function_block *blk=NULL;
+
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) {
+ blk = f->tf_fb;
+ break;
+ }
+ }
+ return(blk);
+}
+
+static struct tcp_function_block *
+find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
+{
+ struct tcp_function_block *rblk=NULL;
+ struct tcp_function *f;
+
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ if (f->tf_fb == blk) {
+ rblk = blk;
+ if (s) {
+ *s = f;
+ }
+ break;
+ }
+ }
+ return (rblk);
+}
+
+struct tcp_function_block *
+find_and_ref_tcp_functions(struct tcp_function_set *fs)
+{
+ struct tcp_function_block *blk;
+
+ rw_rlock(&tcp_function_lock);
+ blk = find_tcp_functions_locked(fs);
+ if (blk)
+ refcount_acquire(&blk->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ return(blk);
+}
+
+struct tcp_function_block *
+find_and_ref_tcp_fb(struct tcp_function_block *blk)
+{
+ struct tcp_function_block *rblk;
+
+ rw_rlock(&tcp_function_lock);
+ rblk = find_tcp_fb_locked(blk, NULL);
+ if (rblk)
+ refcount_acquire(&rblk->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ return(rblk);
+}
+
+
+static int
+sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
+{
+ int error=ENOENT;
+ struct tcp_function_set fs;
+ struct tcp_function_block *blk;
+
+ memset(&fs, 0, sizeof(fs));
+ rw_rlock(&tcp_function_lock);
+ blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
+ if (blk) {
+ /* Found him */
+ strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
+ fs.pcbcnt = blk->tfb_refcnt;
+ }
+ rw_runlock(&tcp_function_lock);
+ error = sysctl_handle_string(oidp, fs.function_set_name,
+ sizeof(fs.function_set_name), req);
+
+ /* Check for error or no change */
+ if (error != 0 || req->newptr == NULL)
+ return(error);
+
+ rw_wlock(&tcp_function_lock);
+ blk = find_tcp_functions_locked(&fs);
+ if ((blk == NULL) ||
+ (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) {
+ error = ENOENT;
+ goto done;
+ }
+ tcp_func_set_ptr = blk;
+done:
+ rw_wunlock(&tcp_function_lock);
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
+ CTLTYPE_STRING | CTLFLAG_RW,
+ NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
+ "Set/get the default TCP functions");
+
+static int
+sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
+{
+ int error, cnt, linesz;
+ struct tcp_function *f;
+ char *buffer, *cp;
+ size_t bufsz, outsz;
+
+ cnt = 0;
+ rw_rlock(&tcp_function_lock);
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ cnt++;
+ }
+ rw_runlock(&tcp_function_lock);
+
+ bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1;
+ buffer = malloc(bufsz, M_TEMP, M_WAITOK);
+
+ error = 0;
+ cp = buffer;
+
+ linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count");
+ cp += linesz;
+ bufsz -= linesz;
+ outsz = linesz;
+
+ rw_rlock(&tcp_function_lock);
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ linesz = snprintf(cp, bufsz, "%-32s%c %u\n",
+ f->tf_fb->tfb_tcp_block_name,
+ (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
+ f->tf_fb->tfb_refcnt);
+ if (linesz >= bufsz) {
+ error = EOVERFLOW;
+ break;
+ }
+ cp += linesz;
+ bufsz -= linesz;
+ outsz += linesz;
+ }
+ rw_runlock(&tcp_function_lock);
+ if (error == 0)
+ error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
+ free(buffer, M_TEMP);
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
+ CTLTYPE_STRING|CTLFLAG_RD,
+ NULL, 0, sysctl_net_inet_list_available, "A",
+ "list available TCP Function sets");
+
/*
* Target size of TCP PCB hash tables. Must be a power of two.
*
@@ -263,6 +439,8 @@
#define V_tcpcb_zone VNET(tcpcb_zone)
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
+MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
+
static struct mtx isn_mtx;
#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
@@ -311,6 +489,96 @@
return (hashsize);
}
+int
+register_tcp_functions(struct tcp_function_block *blk, int wait)
+{
+ struct tcp_function_block *lblk;
+ struct tcp_function *n;
+ struct tcp_function_set fs;
+
+ if ((blk->tfb_tcp_output == NULL) ||
+ (blk->tfb_tcp_do_segment == NULL) ||
+ (blk->tfb_tcp_ctloutput == NULL) ||
+ (strlen(blk->tfb_tcp_block_name) == 0)) {
+ /*
+ * These functions are required and you
+ * need a name.
+ */
+ return (EINVAL);
+ }
+ if (blk->tfb_tcp_timer_stop_all ||
+ blk->tfb_tcp_timers_left ||
+ blk->tfb_tcp_timer_activate ||
+ blk->tfb_tcp_timer_active ||
+ blk->tfb_tcp_timer_stop) {
+ /*
+ * If you define one timer function you
+ * must have them all.
+ */
+ if ((blk->tfb_tcp_timer_stop_all == NULL) ||
+ (blk->tfb_tcp_timers_left == NULL) ||
+ (blk->tfb_tcp_timer_activate == NULL) ||
+ (blk->tfb_tcp_timer_active == NULL) ||
+ (blk->tfb_tcp_timer_stop == NULL)) {
+ return (EINVAL);
+ }
+ }
+ n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
+ if (n == NULL) {
+ return (ENOMEM);
+ }
+ n->tf_fb = blk;
+ strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
+ rw_wlock(&tcp_function_lock);
+ lblk = find_tcp_functions_locked(&fs);
+ if (lblk) {
+ /* Duplicate name space not allowed */
+ rw_wunlock(&tcp_function_lock);
+ free(n, M_TCPFUNCTIONS);
+ return (EALREADY);
+ }
+ refcount_init(&blk->tfb_refcnt, 0);
+ blk->tfb_flags = 0;
+ TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
+ rw_wunlock(&tcp_function_lock);
+ return(0);
+}
+
+int
+deregister_tcp_functions(struct tcp_function_block *blk)
+{
+ struct tcp_function_block *lblk;
+ struct tcp_function *f;
+ int error=ENOENT;
+
+ if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
+ /* You can't un-register the default */
+ return (EPERM);
+ }
+ rw_wlock(&tcp_function_lock);
+ if (blk == tcp_func_set_ptr) {
+ /* You can't free the current default */
+ rw_wunlock(&tcp_function_lock);
+ return (EBUSY);
+ }
+ if (blk->tfb_refcnt) {
+ /* Still tcb attached, mark it. */
+ blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
+ rw_wunlock(&tcp_function_lock);
+ return (EBUSY);
+ }
+ lblk = find_tcp_fb_locked(blk, &f);
+ if (lblk) {
+ /* Found */
+ TAILQ_REMOVE(&t_functions, f, tf_next);
+ f->tf_fb = NULL;
+ free(f, M_TCPFUNCTIONS);
+ error = 0;
+ }
+ rw_wunlock(&tcp_function_lock);
+ return (error);
+}
+
void
tcp_init(void)
{
@@ -325,7 +593,10 @@
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
-
+ /* Setup the tcp function block list */
+ TAILQ_INIT(&t_functions);
+ rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
+ register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
if (hashsize == 0) {
@@ -768,7 +1039,13 @@
tp->ccv = &tm->ccv;
tp->ccv->type = IPPROTO_TCP;
tp->ccv->ccvc.tcp = tp;
-
+ rw_rlock(&tcp_function_lock);
+ tp->t_fb = tcp_func_set_ptr;
+ refcount_acquire(&tp->t_fb->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
/*
* Use the current system default CC algorithm.
*/
@@ -779,12 +1056,18 @@
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
}
tp->osd = &tm->osd;
if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
}
@@ -925,7 +1208,7 @@
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tcp_state_change(tp, TCPS_CLOSED);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@@ -960,6 +1243,10 @@
tcp_timer_stop(tp, TT_KEEP);
tcp_timer_stop(tp, TT_2MSL);
tcp_timer_stop(tp, TT_DELACK);
+ if (tp->t_fb->tfb_tcp_timer_stop_all) {
+ /* Call the stop-all function of the methods */
+ tp->t_fb->tfb_tcp_timer_stop_all(tp);
+ }
/*
* If we got enough samples through the srtt filter,
@@ -1044,6 +1331,14 @@
inp->inp_ppcb = NULL;
if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
/* We own the last reference on tcpcb, let's free it. */
+ if ((tp->t_fb->tfb_tcp_timers_left) &&
+ (tp->t_fb->tfb_tcp_timers_left(tp))) {
+ /* Some fb timers left running! */
+ return;
+ }
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
released = in_pcbrele_wlocked(inp);
@@ -1105,6 +1400,14 @@
tp->t_timers->tt_flags &= ~timer_type;
if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
/* We own the last reference on this tcpcb, let's free it. */
+ if ((tp->t_fb->tfb_tcp_timers_left) &&
+ (tp->t_fb->tfb_tcp_timers_left(tp))) {
+ /* Some fb timers left running! */
+ goto leave;
+ }
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) {
@@ -1113,6 +1416,7 @@
return;
}
}
+leave:
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
@@ -1865,7 +2169,7 @@
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
#ifdef INET
Index: head/sys/netinet/tcp_syncache.c
===================================================================
--- head/sys/netinet/tcp_syncache.c
+++ head/sys/netinet/tcp_syncache.c
@@ -41,6 +41,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/hash.h>
+#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/limits.h>
@@ -626,6 +627,7 @@
static struct socket *
syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
{
+ struct tcp_function_block *blk;
struct inpcb *inp = NULL;
struct socket *so;
struct tcpcb *tp;
@@ -817,6 +819,26 @@
tp->irs = sc->sc_irs;
tcp_rcvseqinit(tp);
tcp_sendseqinit(tp);
+ blk = sototcpcb(lso)->t_fb;
+ if (blk != tp->t_fb) {
+ /*
+ * Our parents t_fb was not the default,
+ * we need to release our ref on tp->t_fb and
+ * pickup one on the new entry.
+ */
+ struct tcp_function_block *rblk;
+
+ rblk = find_and_ref_tcp_fb(blk);
+ KASSERT(rblk != NULL,
+ ("cannot find blk %p out of syncache?", blk));
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
+ tp->t_fb = rblk;
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
+ }
tp->snd_wl1 = sc->sc_irs;
tp->snd_max = tp->iss + 1;
tp->snd_nxt = tp->iss + 1;
Index: head/sys/netinet/tcp_timer.c
===================================================================
--- head/sys/netinet/tcp_timer.c
+++ head/sys/netinet/tcp_timer.c
@@ -292,7 +292,7 @@
tp->t_flags |= TF_ACKNOW;
TCPSTAT_INC(tcps_delack);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(inp);
CURVNET_RESTORE();
}
@@ -543,7 +543,7 @@
}
tcp_setpersist(tp);
tp->t_flags |= TF_FORCEDATA;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
out:
@@ -798,7 +798,7 @@
cc_cong_signal(tp, NULL, CC_RTO);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
out:
#ifdef TCPDEBUG
@@ -858,6 +858,10 @@
f_reset = TT_2MSL_RST;
break;
default:
+ if (tp->t_fb->tfb_tcp_timer_activate) {
+ tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
+ return;
+ }
panic("tp %p bad timer_type %#x", tp, timer_type);
}
if (delta == 0) {
@@ -904,6 +908,9 @@
t_callout = &tp->t_timers->tt_2msl;
break;
default:
+ if (tp->t_fb->tfb_tcp_timer_active) {
+ return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
+ }
panic("tp %p bad timer_type %#x", tp, timer_type);
}
return callout_active(t_callout);
@@ -945,6 +952,14 @@
f_reset = TT_2MSL_RST;
break;
default:
+ if (tp->t_fb->tfb_tcp_timer_stop) {
+ /*
+ * XXXrrs we need to look at this with the
+ * stop case below (flags).
+ */
+ tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
+ return;
+ }
panic("tp %p bad timer_type %#x", tp, timer_type);
}
Index: head/sys/netinet/tcp_usrreq.c
===================================================================
--- head/sys/netinet/tcp_usrreq.c
+++ head/sys/netinet/tcp_usrreq.c
@@ -47,6 +47,7 @@
#include <sys/systm.h>
#include <sys/limits.h>
#include <sys/malloc.h>
+#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
@@ -509,7 +510,7 @@
goto out;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
@@ -579,7 +580,7 @@
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
goto out;
}
#endif
@@ -597,7 +598,7 @@
goto out;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
@@ -773,7 +774,7 @@
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
@@ -809,7 +810,7 @@
tcp_offload_rcvd(tp);
else
#endif
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_RCVD);
@@ -911,7 +912,7 @@
!(flags & PRUS_NOTREADY)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@@ -961,7 +962,7 @@
tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
if (!(flags & PRUS_NOTREADY)) {
tp->t_flags |= TF_FORCEDATA;
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
}
@@ -997,7 +998,7 @@
error = sbready(&so->so_snd, m, count);
SOCKBUF_UNLOCK(&so->so_snd);
if (error == 0)
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(inp);
return (error);
@@ -1349,13 +1350,11 @@
int
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
{
- int error, opt, optval;
- u_int ui;
+ int error;
struct inpcb *inp;
struct tcpcb *tp;
- struct tcp_info ti;
- char buf[TCP_CA_NAME_MAX];
- struct cc_algo *algo;
+ struct tcp_function_block *blk;
+ struct tcp_function_set fsn;
error = 0;
inp = sotoinpcb(so);
@@ -1383,7 +1382,83 @@
INP_WUNLOCK(inp);
return (ECONNRESET);
}
+ tp = intotcpcb(inp);
+ /*
+ * Protect the TCP option TCP_FUNCTION_BLK so
+ * that a sub-function can *never* overwrite this.
+ */
+ if ((sopt->sopt_dir == SOPT_SET) &&
+ (sopt->sopt_name == TCP_FUNCTION_BLK)) {
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &fsn, sizeof fsn,
+ sizeof fsn);
+ if (error)
+ return (error);
+ INP_WLOCK_RECHECK(inp);
+ if (tp->t_state != TCPS_CLOSED) {
+ /*
+ * The user has advanced the state
+ * past the initial point, we can't
+ * switch since we are down the road
+ * and a new set of functions may
+ * not be compatibile.
+ */
+ INP_WUNLOCK(inp);
+ return(EINVAL);
+ }
+ blk = find_and_ref_tcp_functions(&fsn);
+ if (blk == NULL) {
+ INP_WUNLOCK(inp);
+ return (ENOENT);
+ }
+ if (tp->t_fb != blk) {
+ if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
+ refcount_release(&blk->tfb_refcnt);
+ INP_WUNLOCK(inp);
+ return (ENOENT);
+ }
+ /*
+ * Release the old refcnt, the
+ * lookup acquires a ref on the
+ * new one.
+ */
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
+ tp->t_fb = blk;
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
+ }
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_ctloutput(tp, sopt->sopt_dir,
+ sopt->sopt_name);
+ }
+#endif
+ INP_WUNLOCK(inp);
+ return (error);
+ } else if ((sopt->sopt_dir == SOPT_GET) &&
+ (sopt->sopt_name == TCP_FUNCTION_BLK)) {
+ strcpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name);
+ fsn.pcbcnt = tp->t_fb->tfb_refcnt;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &fsn, sizeof fsn);
+ return (error);
+ }
+ /* Pass in the INP locked, called must unlock it */
+ return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp));
+}
+int
+tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
+{
+ int error, opt, optval;
+ u_int ui;
+ struct tcp_info ti;
+ struct cc_algo *algo;
+ char buf[TCP_CA_NAME_MAX];
+
switch (sopt->sopt_dir) {
case SOPT_SET:
switch (sopt->sopt_name) {
@@ -1451,7 +1526,7 @@
else if (tp->t_flags & TF_NOPUSH) {
tp->t_flags &= ~TF_NOPUSH;
if (TCPS_HAVEESTABLISHED(tp->t_state))
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
}
goto unlock_and_done;
@@ -1770,7 +1845,7 @@
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
}
Index: head/sys/netinet/tcp_var.h
===================================================================
--- head/sys/netinet/tcp_var.h
+++ head/sys/netinet/tcp_var.h
@@ -89,6 +89,52 @@
#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */
+/*
+ * TODO: We yet need to brave plowing in
+ * to tcp_input() and the pru_usrreq() block.
+ * Right now these go to the old standards which
+ * are somewhat ok, but in the long term may
+ * need to be changed. If we do tackle tcp_input()
+ * then we need to get rid of the tcp_do_segment()
+ * function below.
+ */
+/* Flags for tcp functions */
+#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */
+struct tcpcb;
+struct inpcb;
+struct sockopt;
+struct socket;
+
+struct tcp_function_block {
+ char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
+ int (*tfb_tcp_output)(struct tcpcb *);
+ void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
+ struct socket *, struct tcpcb *,
+ int, int, uint8_t,
+ int);
+ int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
+ struct inpcb *inp, struct tcpcb *tp);
+ /* Optional memory allocation/free routine */
+ void (*tfb_tcp_fb_init)(struct tcpcb *);
+ void (*tfb_tcp_fb_fini)(struct tcpcb *);
+ /* Optional timers, must define all if you define one */
+ int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
+ int (*tfb_tcp_timers_left)(struct tcpcb *);
+ void (*tfb_tcp_timer_activate)(struct tcpcb *,
+ uint32_t, u_int);
+ int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
+ void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
+ volatile uint32_t tfb_refcnt;
+ uint32_t tfb_flags;
+};
+
+struct tcp_function {
+ TAILQ_ENTRY(tcp_function) tf_next;
+ struct tcp_function_block *tf_fb;
+};
+
+TAILQ_HEAD(tcp_funchead, tcp_function);
+
/*
* Tcp control block, one per tcp; fields:
* Organized for 16 byte cacheline efficiency.
@@ -207,9 +253,10 @@
u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */
u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */
u_int t_flags2; /* More tcpcb flags storage */
-
uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
- void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */
+ struct tcp_function_block *t_fb;/* TCP function call block */
+ void *t_fb_ptr; /* Pointer to t_fb specific data */
+ void *t_pspare2[2]; /* 1 TCP_SIGNATURE, 1 TBD */
#if defined(_KERNEL) && defined(TCPPCAP)
struct mbufq t_inpkts; /* List of saved input packets. */
struct mbufq t_outpkts; /* List of saved output packets. */
@@ -534,6 +581,8 @@
#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
#ifdef _KERNEL
+#define TI_UNLOCKED 1
+#define TI_RLOCKED 2
#include <sys/counter.h>
VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
@@ -684,7 +733,32 @@
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
+void tcp_dooptions(struct tcpopt *, u_char *, int, int);
+void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
+ struct tcpcb *, int, int);
+void tcp_pulloutofband(struct socket *,
+ struct tcphdr *, struct mbuf *, int);
+void tcp_xmit_timer(struct tcpcb *, int);
+void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
+void cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
+ uint16_t type);
+void cc_conn_init(struct tcpcb *tp);
+void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
+void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
+void hhook_run_tcp_est_in(struct tcpcb *tp,
+ struct tcphdr *th, struct tcpopt *to);
+
int tcp_input(struct mbuf **, int *, int);
+void tcp_do_segment(struct mbuf *, struct tcphdr *,
+ struct socket *, struct tcpcb *, int, int, uint8_t,
+ int);
+
+int register_tcp_functions(struct tcp_function_block *blk, int wait);
+int deregister_tcp_functions(struct tcp_function_block *blk);
+struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs);
+struct tcp_function_block *find_and_ref_tcp_fb(struct tcp_function_block *blk);
+int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp);
+
u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
u_long tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
@@ -752,8 +826,6 @@
u_long tcp_seq_subtract(u_long, u_long );
int tcp_compute_pipe(struct tcpcb *);
-void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
-
static inline void
tcp_fields_to_host(struct tcphdr *th)
{
Index: head/sys/netinet/toecore.c
===================================================================
--- head/sys/netinet/toecore.c
+++ head/sys/netinet/toecore.c
@@ -509,7 +509,7 @@
KASSERT(!(tp->t_flags & TF_TOE),
("%s: tp %p still offloaded.", __func__, tp));
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
} else {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 23, 5:56 AM (5 h, 24 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16789191
Default Alt Text
D4055.diff (30 KB)

Event Timeline