Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F110714412
D4055.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
30 KB
Referenced Files
None
Subscribers
None
D4055.diff
View Options
Index: head/sys/modules/Makefile
===================================================================
--- head/sys/modules/Makefile
+++ head/sys/modules/Makefile
@@ -346,6 +346,7 @@
${_syscons} \
sysvipc \
${_ti} \
+ tcp/fastpath \
tests/framework \
tests/callout_test \
tl \
Index: head/sys/netinet/tcp.h
===================================================================
--- head/sys/netinet/tcp.h
+++ head/sys/netinet/tcp.h
@@ -167,7 +167,7 @@
#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
#define TCP_PCAP_OUT 2048 /* number of output packets to keep */
#define TCP_PCAP_IN 4096 /* number of input packets to keep */
-
+#define TCP_FUNCTION_BLK 8192 /* Set the tcp function pointers to the specified stack */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
@@ -245,5 +245,11 @@
u_int32_t __tcpi_pad[26]; /* Padding. */
};
#endif
+#define TCP_FUNCTION_NAME_LEN_MAX 32
+
+struct tcp_function_set {
+ char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
+ uint32_t pcbcnt;
+};
#endif /* !_NETINET_TCP_H_ */
Index: head/sys/netinet/tcp_input.c
===================================================================
--- head/sys/netinet/tcp_input.c
+++ head/sys/netinet/tcp_input.c
@@ -230,23 +230,6 @@
#define tcb6 tcb /* for KAME src sync over BSD*'s */
VNET_DEFINE(struct inpcbinfo, tcbinfo);
-static void tcp_dooptions(struct tcpopt *, u_char *, int, int);
-static void tcp_do_segment(struct mbuf *, struct tcphdr *,
- struct socket *, struct tcpcb *, int, int, uint8_t,
- int);
-static void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
- struct tcpcb *, int, int);
-static void tcp_pulloutofband(struct socket *,
- struct tcphdr *, struct mbuf *, int);
-static void tcp_xmit_timer(struct tcpcb *, int);
-static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
-static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
- uint16_t type);
-static void inline cc_conn_init(struct tcpcb *tp);
-static void inline cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
-static void inline hhook_run_tcp_est_in(struct tcpcb *tp,
- struct tcphdr *th, struct tcpopt *to);
-
/*
* TCP statistics are stored in an "array" of counter(9)s.
*/
@@ -272,7 +255,7 @@
/*
* Wrapper for the TCP established input helper hook.
*/
-static void inline
+void
hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
{
struct tcp_hhook_data hhook_data;
@@ -290,7 +273,7 @@
/*
* CC wrapper hook functions
*/
-static void inline
+void
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
{
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -322,7 +305,7 @@
}
}
-static void inline
+void
cc_conn_init(struct tcpcb *tp)
{
struct hc_metrics_lite metrics;
@@ -446,7 +429,7 @@
}
}
-static void inline
+void inline
cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
{
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -601,9 +584,6 @@
struct tcpopt to; /* options in this segment */
char *s = NULL; /* address and port logging */
int ti_locked;
-#define TI_UNLOCKED 1
-#define TI_RLOCKED 2
-
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -1175,7 +1155,7 @@
* contains. tcp_do_segment() consumes
* the mbuf chain and unlocks the inpcb.
*/
- tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
+ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
iptos, ti_locked);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
@@ -1421,7 +1401,7 @@
* state. tcp_do_segment() always consumes the mbuf chain, unlocks
* the inpcb, and unlocks pcbinfo.
*/
- tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
+ tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
@@ -1476,7 +1456,7 @@
return (IPPROTO_DONE);
}
-static void
+void
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
int ti_locked)
@@ -1788,7 +1768,7 @@
tp->t_rxtcur);
sowwakeup(so);
if (sbavail(&so->so_snd))
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto check_delack;
}
} else if (th->th_ack == tp->snd_una &&
@@ -1907,7 +1887,7 @@
tp->t_flags |= TF_DELACK;
} else {
tp->t_flags |= TF_ACKNOW;
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
goto check_delack;
}
@@ -2522,7 +2502,7 @@
}
} else
tp->snd_cwnd += tp->t_maxseg;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto drop;
} else if (tp->t_dupacks == tcprexmtthresh) {
tcp_seq onxt = tp->snd_nxt;
@@ -2556,12 +2536,12 @@
tcps_sack_recovery_episode);
tp->sack_newdata = tp->snd_nxt;
tp->snd_cwnd = tp->t_maxseg;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
goto drop;
}
tp->snd_nxt = th->th_ack;
tp->snd_cwnd = tp->t_maxseg;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
KASSERT(tp->snd_limited <= 2,
("%s: tp->snd_limited too big",
__func__));
@@ -2608,7 +2588,7 @@
(tp->snd_nxt - tp->snd_una);
SOCKBUF_UNLOCK(&so->so_snd);
if (avail > 0)
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
sent = tp->snd_max - oldsndmax;
if (sent > tp->t_maxseg) {
KASSERT((tp->t_dupacks == 2 &&
@@ -3074,7 +3054,7 @@
* Return any desired output.
*/
if (needoutput || (tp->t_flags & TF_ACKNOW))
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
check_delack:
KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
@@ -3122,7 +3102,7 @@
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(tp->t_inpcb);
m_freem(m);
return;
@@ -3168,7 +3148,7 @@
* The mbuf must still include the original packet header.
* tp may be NULL.
*/
-static void
+void
tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
int tlen, int rstreason)
{
@@ -3231,7 +3211,7 @@
/*
* Parse TCP options and place in tcpopt.
*/
-static void
+void
tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
{
int opt, optlen;
@@ -3325,7 +3305,7 @@
* It is still reflected in the segment length for
* sequencing purposes.
*/
-static void
+void
tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
int off)
{
@@ -3358,7 +3338,7 @@
* Collect new round-trip time estimate
* and update averages and current timeout.
*/
-static void
+void
tcp_xmit_timer(struct tcpcb *tp, int rtt)
{
int delta;
@@ -3738,7 +3718,7 @@
* By setting snd_nxt to ti_ack, this forces retransmission timer to
* be started again.
*/
-static void
+void
tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
{
tcp_seq onxt = tp->snd_nxt;
@@ -3755,7 +3735,7 @@
*/
tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th);
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
tp->snd_cwnd = ocwnd;
if (SEQ_GT(onxt, tp->snd_nxt))
tp->snd_nxt = onxt;
Index: head/sys/netinet/tcp_sack.c
===================================================================
--- head/sys/netinet/tcp_sack.c
+++ head/sys/netinet/tcp_sack.c
@@ -599,7 +599,7 @@
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
}
#if 0
Index: head/sys/netinet/tcp_subr.c
===================================================================
--- head/sys/netinet/tcp_subr.c
+++ head/sys/netinet/tcp_subr.c
@@ -47,6 +47,7 @@
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
+#include <sys/refcount.h>
#include <sys/mbuf.h>
#ifdef INET6
#include <sys/domain.h>
@@ -125,6 +126,8 @@
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
#endif
+struct rwlock tcp_function_lock;
+
static int
sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
{
@@ -236,6 +239,179 @@
void *ip4hdr, const void *ip6hdr);
static void tcp_timer_discard(struct tcpcb *, uint32_t);
+
+static struct tcp_function_block tcp_def_funcblk = {
+ "default",
+ tcp_output,
+ tcp_do_segment,
+ tcp_default_ctloutput,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ 0,
+ 0
+};
+
+struct tcp_funchead t_functions;
+static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
+
+static struct tcp_function_block *
+find_tcp_functions_locked(struct tcp_function_set *fs)
+{
+ struct tcp_function *f;
+ struct tcp_function_block *blk=NULL;
+
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) {
+ blk = f->tf_fb;
+ break;
+ }
+ }
+ return(blk);
+}
+
+static struct tcp_function_block *
+find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
+{
+ struct tcp_function_block *rblk=NULL;
+ struct tcp_function *f;
+
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ if (f->tf_fb == blk) {
+ rblk = blk;
+ if (s) {
+ *s = f;
+ }
+ break;
+ }
+ }
+ return (rblk);
+}
+
+struct tcp_function_block *
+find_and_ref_tcp_functions(struct tcp_function_set *fs)
+{
+ struct tcp_function_block *blk;
+
+ rw_rlock(&tcp_function_lock);
+ blk = find_tcp_functions_locked(fs);
+ if (blk)
+ refcount_acquire(&blk->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ return(blk);
+}
+
+struct tcp_function_block *
+find_and_ref_tcp_fb(struct tcp_function_block *blk)
+{
+ struct tcp_function_block *rblk;
+
+ rw_rlock(&tcp_function_lock);
+ rblk = find_tcp_fb_locked(blk, NULL);
+ if (rblk)
+ refcount_acquire(&rblk->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ return(rblk);
+}
+
+
+static int
+sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
+{
+ int error=ENOENT;
+ struct tcp_function_set fs;
+ struct tcp_function_block *blk;
+
+ memset(&fs, 0, sizeof(fs));
+ rw_rlock(&tcp_function_lock);
+ blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
+ if (blk) {
+ /* Found him */
+ strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
+ fs.pcbcnt = blk->tfb_refcnt;
+ }
+ rw_runlock(&tcp_function_lock);
+ error = sysctl_handle_string(oidp, fs.function_set_name,
+ sizeof(fs.function_set_name), req);
+
+ /* Check for error or no change */
+ if (error != 0 || req->newptr == NULL)
+ return(error);
+
+ rw_wlock(&tcp_function_lock);
+ blk = find_tcp_functions_locked(&fs);
+ if ((blk == NULL) ||
+ (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) {
+ error = ENOENT;
+ goto done;
+ }
+ tcp_func_set_ptr = blk;
+done:
+ rw_wunlock(&tcp_function_lock);
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
+ CTLTYPE_STRING | CTLFLAG_RW,
+ NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
+ "Set/get the default TCP functions");
+
+static int
+sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
+{
+ int error, cnt, linesz;
+ struct tcp_function *f;
+ char *buffer, *cp;
+ size_t bufsz, outsz;
+
+ cnt = 0;
+ rw_rlock(&tcp_function_lock);
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ cnt++;
+ }
+ rw_runlock(&tcp_function_lock);
+
+ bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1;
+ buffer = malloc(bufsz, M_TEMP, M_WAITOK);
+
+ error = 0;
+ cp = buffer;
+
+ linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count");
+ cp += linesz;
+ bufsz -= linesz;
+ outsz = linesz;
+
+ rw_rlock(&tcp_function_lock);
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ linesz = snprintf(cp, bufsz, "%-32s%c %u\n",
+ f->tf_fb->tfb_tcp_block_name,
+ (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
+ f->tf_fb->tfb_refcnt);
+ if (linesz >= bufsz) {
+ error = EOVERFLOW;
+ break;
+ }
+ cp += linesz;
+ bufsz -= linesz;
+ outsz += linesz;
+ }
+ rw_runlock(&tcp_function_lock);
+ if (error == 0)
+ error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
+ free(buffer, M_TEMP);
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
+ CTLTYPE_STRING|CTLFLAG_RD,
+ NULL, 0, sysctl_net_inet_list_available, "A",
+ "list available TCP Function sets");
+
/*
* Target size of TCP PCB hash tables. Must be a power of two.
*
@@ -263,6 +439,8 @@
#define V_tcpcb_zone VNET(tcpcb_zone)
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
+MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
+
static struct mtx isn_mtx;
#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
@@ -311,6 +489,96 @@
return (hashsize);
}
+int
+register_tcp_functions(struct tcp_function_block *blk, int wait)
+{
+ struct tcp_function_block *lblk;
+ struct tcp_function *n;
+ struct tcp_function_set fs;
+
+ if ((blk->tfb_tcp_output == NULL) ||
+ (blk->tfb_tcp_do_segment == NULL) ||
+ (blk->tfb_tcp_ctloutput == NULL) ||
+ (strlen(blk->tfb_tcp_block_name) == 0)) {
+ /*
+ * These functions are required and you
+ * need a name.
+ */
+ return (EINVAL);
+ }
+ if (blk->tfb_tcp_timer_stop_all ||
+ blk->tfb_tcp_timers_left ||
+ blk->tfb_tcp_timer_activate ||
+ blk->tfb_tcp_timer_active ||
+ blk->tfb_tcp_timer_stop) {
+ /*
+ * If you define one timer function you
+ * must have them all.
+ */
+ if ((blk->tfb_tcp_timer_stop_all == NULL) ||
+ (blk->tfb_tcp_timers_left == NULL) ||
+ (blk->tfb_tcp_timer_activate == NULL) ||
+ (blk->tfb_tcp_timer_active == NULL) ||
+ (blk->tfb_tcp_timer_stop == NULL)) {
+ return (EINVAL);
+ }
+ }
+ n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
+ if (n == NULL) {
+ return (ENOMEM);
+ }
+ n->tf_fb = blk;
+ strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
+ rw_wlock(&tcp_function_lock);
+ lblk = find_tcp_functions_locked(&fs);
+ if (lblk) {
+ /* Duplicate name space not allowed */
+ rw_wunlock(&tcp_function_lock);
+ free(n, M_TCPFUNCTIONS);
+ return (EALREADY);
+ }
+ refcount_init(&blk->tfb_refcnt, 0);
+ blk->tfb_flags = 0;
+ TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
+ rw_wunlock(&tcp_function_lock);
+ return(0);
+}
+
+int
+deregister_tcp_functions(struct tcp_function_block *blk)
+{
+ struct tcp_function_block *lblk;
+ struct tcp_function *f;
+ int error=ENOENT;
+
+ if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
+ /* You can't un-register the default */
+ return (EPERM);
+ }
+ rw_wlock(&tcp_function_lock);
+ if (blk == tcp_func_set_ptr) {
+ /* You can't free the current default */
+ rw_wunlock(&tcp_function_lock);
+ return (EBUSY);
+ }
+ if (blk->tfb_refcnt) {
+ /* Still tcb attached, mark it. */
+ blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
+ rw_wunlock(&tcp_function_lock);
+ return (EBUSY);
+ }
+ lblk = find_tcp_fb_locked(blk, &f);
+ if (lblk) {
+ /* Found */
+ TAILQ_REMOVE(&t_functions, f, tf_next);
+ f->tf_fb = NULL;
+ free(f, M_TCPFUNCTIONS);
+ error = 0;
+ }
+ rw_wunlock(&tcp_function_lock);
+ return (error);
+}
+
void
tcp_init(void)
{
@@ -325,7 +593,10 @@
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
-
+ /* Setup the tcp function block list */
+ TAILQ_INIT(&t_functions);
+ rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
+ register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
if (hashsize == 0) {
@@ -768,7 +1039,13 @@
tp->ccv = &tm->ccv;
tp->ccv->type = IPPROTO_TCP;
tp->ccv->ccvc.tcp = tp;
-
+ rw_rlock(&tcp_function_lock);
+ tp->t_fb = tcp_func_set_ptr;
+ refcount_acquire(&tp->t_fb->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
/*
* Use the current system default CC algorithm.
*/
@@ -779,12 +1056,18 @@
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
}
tp->osd = &tm->osd;
if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
}
@@ -925,7 +1208,7 @@
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tcp_state_change(tp, TCPS_CLOSED);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@@ -960,6 +1243,10 @@
tcp_timer_stop(tp, TT_KEEP);
tcp_timer_stop(tp, TT_2MSL);
tcp_timer_stop(tp, TT_DELACK);
+ if (tp->t_fb->tfb_tcp_timer_stop_all) {
+ /* Call the stop-all function of the methods */
+ tp->t_fb->tfb_tcp_timer_stop_all(tp);
+ }
/*
* If we got enough samples through the srtt filter,
@@ -1044,6 +1331,14 @@
inp->inp_ppcb = NULL;
if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
/* We own the last reference on tcpcb, let's free it. */
+ if ((tp->t_fb->tfb_tcp_timers_left) &&
+ (tp->t_fb->tfb_tcp_timers_left(tp))) {
+ /* Some fb timers left running! */
+ return;
+ }
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
released = in_pcbrele_wlocked(inp);
@@ -1105,6 +1400,14 @@
tp->t_timers->tt_flags &= ~timer_type;
if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
/* We own the last reference on this tcpcb, let's free it. */
+ if ((tp->t_fb->tfb_tcp_timers_left) &&
+ (tp->t_fb->tfb_tcp_timers_left(tp))) {
+ /* Some fb timers left running! */
+ goto leave;
+ }
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) {
@@ -1113,6 +1416,7 @@
return;
}
}
+leave:
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
@@ -1865,7 +2169,7 @@
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
#ifdef INET
Index: head/sys/netinet/tcp_syncache.c
===================================================================
--- head/sys/netinet/tcp_syncache.c
+++ head/sys/netinet/tcp_syncache.c
@@ -41,6 +41,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/hash.h>
+#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/limits.h>
@@ -626,6 +627,7 @@
static struct socket *
syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
{
+ struct tcp_function_block *blk;
struct inpcb *inp = NULL;
struct socket *so;
struct tcpcb *tp;
@@ -817,6 +819,26 @@
tp->irs = sc->sc_irs;
tcp_rcvseqinit(tp);
tcp_sendseqinit(tp);
+ blk = sototcpcb(lso)->t_fb;
+ if (blk != tp->t_fb) {
+ /*
+ * Our parents t_fb was not the default,
+ * we need to release our ref on tp->t_fb and
+ * pickup one on the new entry.
+ */
+ struct tcp_function_block *rblk;
+
+ rblk = find_and_ref_tcp_fb(blk);
+ KASSERT(rblk != NULL,
+ ("cannot find blk %p out of syncache?", blk));
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
+ tp->t_fb = rblk;
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
+ }
tp->snd_wl1 = sc->sc_irs;
tp->snd_max = tp->iss + 1;
tp->snd_nxt = tp->iss + 1;
Index: head/sys/netinet/tcp_timer.c
===================================================================
--- head/sys/netinet/tcp_timer.c
+++ head/sys/netinet/tcp_timer.c
@@ -292,7 +292,7 @@
tp->t_flags |= TF_ACKNOW;
TCPSTAT_INC(tcps_delack);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(inp);
CURVNET_RESTORE();
}
@@ -543,7 +543,7 @@
}
tcp_setpersist(tp);
tp->t_flags |= TF_FORCEDATA;
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
out:
@@ -798,7 +798,7 @@
cc_cong_signal(tp, NULL, CC_RTO);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
out:
#ifdef TCPDEBUG
@@ -858,6 +858,10 @@
f_reset = TT_2MSL_RST;
break;
default:
+ if (tp->t_fb->tfb_tcp_timer_activate) {
+ tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
+ return;
+ }
panic("tp %p bad timer_type %#x", tp, timer_type);
}
if (delta == 0) {
@@ -904,6 +908,9 @@
t_callout = &tp->t_timers->tt_2msl;
break;
default:
+ if (tp->t_fb->tfb_tcp_timer_active) {
+ return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
+ }
panic("tp %p bad timer_type %#x", tp, timer_type);
}
return callout_active(t_callout);
@@ -945,6 +952,14 @@
f_reset = TT_2MSL_RST;
break;
default:
+ if (tp->t_fb->tfb_tcp_timer_stop) {
+ /*
+ * XXXrrs we need to look at this with the
+ * stop case below (flags).
+ */
+ tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
+ return;
+ }
panic("tp %p bad timer_type %#x", tp, timer_type);
}
Index: head/sys/netinet/tcp_usrreq.c
===================================================================
--- head/sys/netinet/tcp_usrreq.c
+++ head/sys/netinet/tcp_usrreq.c
@@ -47,6 +47,7 @@
#include <sys/systm.h>
#include <sys/limits.h>
#include <sys/malloc.h>
+#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
@@ -509,7 +510,7 @@
goto out;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
@@ -579,7 +580,7 @@
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
goto out;
}
#endif
@@ -597,7 +598,7 @@
goto out;
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
@@ -773,7 +774,7 @@
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
@@ -809,7 +810,7 @@
tcp_offload_rcvd(tp);
else
#endif
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
out:
TCPDEBUG2(PRU_RCVD);
@@ -911,7 +912,7 @@
!(flags & PRUS_NOTREADY)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@@ -961,7 +962,7 @@
tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
if (!(flags & PRUS_NOTREADY)) {
tp->t_flags |= TF_FORCEDATA;
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
}
@@ -997,7 +998,7 @@
error = sbready(&so->so_snd, m, count);
SOCKBUF_UNLOCK(&so->so_snd);
if (error == 0)
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
INP_WUNLOCK(inp);
return (error);
@@ -1349,13 +1350,11 @@
int
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
{
- int error, opt, optval;
- u_int ui;
+ int error;
struct inpcb *inp;
struct tcpcb *tp;
- struct tcp_info ti;
- char buf[TCP_CA_NAME_MAX];
- struct cc_algo *algo;
+ struct tcp_function_block *blk;
+ struct tcp_function_set fsn;
error = 0;
inp = sotoinpcb(so);
@@ -1383,7 +1382,83 @@
INP_WUNLOCK(inp);
return (ECONNRESET);
}
+ tp = intotcpcb(inp);
+ /*
+ * Protect the TCP option TCP_FUNCTION_BLK so
+ * that a sub-function can *never* overwrite this.
+ */
+ if ((sopt->sopt_dir == SOPT_SET) &&
+ (sopt->sopt_name == TCP_FUNCTION_BLK)) {
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &fsn, sizeof fsn,
+ sizeof fsn);
+ if (error)
+ return (error);
+ INP_WLOCK_RECHECK(inp);
+ if (tp->t_state != TCPS_CLOSED) {
+ /*
+ * The user has advanced the state
+ * past the initial point, we can't
+ * switch since we are down the road
+ * and a new set of functions may
+ * not be compatibile.
+ */
+ INP_WUNLOCK(inp);
+ return(EINVAL);
+ }
+ blk = find_and_ref_tcp_functions(&fsn);
+ if (blk == NULL) {
+ INP_WUNLOCK(inp);
+ return (ENOENT);
+ }
+ if (tp->t_fb != blk) {
+ if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
+ refcount_release(&blk->tfb_refcnt);
+ INP_WUNLOCK(inp);
+ return (ENOENT);
+ }
+ /*
+ * Release the old refcnt, the
+ * lookup acquires a ref on the
+ * new one.
+ */
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
+ tp->t_fb = blk;
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
+ }
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_ctloutput(tp, sopt->sopt_dir,
+ sopt->sopt_name);
+ }
+#endif
+ INP_WUNLOCK(inp);
+ return (error);
+ } else if ((sopt->sopt_dir == SOPT_GET) &&
+ (sopt->sopt_name == TCP_FUNCTION_BLK)) {
+ strcpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name);
+ fsn.pcbcnt = tp->t_fb->tfb_refcnt;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &fsn, sizeof fsn);
+ return (error);
+ }
+ /* Pass in the INP locked, called must unlock it */
+ return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp));
+}
+int
+tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
+{
+ int error, opt, optval;
+ u_int ui;
+ struct tcp_info ti;
+ struct cc_algo *algo;
+ char buf[TCP_CA_NAME_MAX];
+
switch (sopt->sopt_dir) {
case SOPT_SET:
switch (sopt->sopt_name) {
@@ -1451,7 +1526,7 @@
else if (tp->t_flags & TF_NOPUSH) {
tp->t_flags &= ~TF_NOPUSH;
if (TCPS_HAVEESTABLISHED(tp->t_state))
- error = tcp_output(tp);
+ error = tp->t_fb->tfb_tcp_output(tp);
}
goto unlock_and_done;
@@ -1770,7 +1845,7 @@
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
}
Index: head/sys/netinet/tcp_var.h
===================================================================
--- head/sys/netinet/tcp_var.h
+++ head/sys/netinet/tcp_var.h
@@ -89,6 +89,52 @@
#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */
+/*
+ * TODO: We yet need to brave plowing in
+ * to tcp_input() and the pru_usrreq() block.
+ * Right now these go to the old standards which
+ * are somewhat ok, but in the long term may
+ * need to be changed. If we do tackle tcp_input()
+ * then we need to get rid of the tcp_do_segment()
+ * function below.
+ */
+/* Flags for tcp functions */
+#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */
+struct tcpcb;
+struct inpcb;
+struct sockopt;
+struct socket;
+
+struct tcp_function_block {
+ char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
+ int (*tfb_tcp_output)(struct tcpcb *);
+ void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
+ struct socket *, struct tcpcb *,
+ int, int, uint8_t,
+ int);
+ int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
+ struct inpcb *inp, struct tcpcb *tp);
+ /* Optional memory allocation/free routine */
+ void (*tfb_tcp_fb_init)(struct tcpcb *);
+ void (*tfb_tcp_fb_fini)(struct tcpcb *);
+ /* Optional timers, must define all if you define one */
+ int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
+ int (*tfb_tcp_timers_left)(struct tcpcb *);
+ void (*tfb_tcp_timer_activate)(struct tcpcb *,
+ uint32_t, u_int);
+ int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
+ void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
+ volatile uint32_t tfb_refcnt;
+ uint32_t tfb_flags;
+};
+
+struct tcp_function {
+ TAILQ_ENTRY(tcp_function) tf_next;
+ struct tcp_function_block *tf_fb;
+};
+
+TAILQ_HEAD(tcp_funchead, tcp_function);
+
/*
* Tcp control block, one per tcp; fields:
* Organized for 16 byte cacheline efficiency.
@@ -207,9 +253,10 @@
u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */
u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */
u_int t_flags2; /* More tcpcb flags storage */
-
uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
- void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */
+ struct tcp_function_block *t_fb;/* TCP function call block */
+ void *t_fb_ptr; /* Pointer to t_fb specific data */
+ void *t_pspare2[2]; /* 1 TCP_SIGNATURE, 1 TBD */
#if defined(_KERNEL) && defined(TCPPCAP)
struct mbufq t_inpkts; /* List of saved input packets. */
struct mbufq t_outpkts; /* List of saved output packets. */
@@ -534,6 +581,8 @@
#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
#ifdef _KERNEL
+#define TI_UNLOCKED 1
+#define TI_RLOCKED 2
#include <sys/counter.h>
VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
@@ -684,7 +733,32 @@
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
+void tcp_dooptions(struct tcpopt *, u_char *, int, int);
+void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
+ struct tcpcb *, int, int);
+void tcp_pulloutofband(struct socket *,
+ struct tcphdr *, struct mbuf *, int);
+void tcp_xmit_timer(struct tcpcb *, int);
+void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
+void cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
+ uint16_t type);
+void cc_conn_init(struct tcpcb *tp);
+void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
+void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
+void hhook_run_tcp_est_in(struct tcpcb *tp,
+ struct tcphdr *th, struct tcpopt *to);
+
int tcp_input(struct mbuf **, int *, int);
+void tcp_do_segment(struct mbuf *, struct tcphdr *,
+ struct socket *, struct tcpcb *, int, int, uint8_t,
+ int);
+
+int register_tcp_functions(struct tcp_function_block *blk, int wait);
+int deregister_tcp_functions(struct tcp_function_block *blk);
+struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs);
+struct tcp_function_block *find_and_ref_tcp_fb(struct tcp_function_block *blk);
+int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp);
+
u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
u_long tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
@@ -752,8 +826,6 @@
u_long tcp_seq_subtract(u_long, u_long );
int tcp_compute_pipe(struct tcpcb *);
-void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
-
static inline void
tcp_fields_to_host(struct tcphdr *th)
{
Index: head/sys/netinet/toecore.c
===================================================================
--- head/sys/netinet/toecore.c
+++ head/sys/netinet/toecore.c
@@ -509,7 +509,7 @@
KASSERT(!(tp->t_flags & TF_TOE),
("%s: tp %p still offloaded.", __func__, tp));
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
} else {
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 23, 5:56 AM (5 h, 24 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16789191
Default Alt Text
D4055.diff (30 KB)
Attached To
Mode
D4055: First step to modularize TCP including two added tcp modules that can be played with.
Attached
Detach File
Event Timeline
Log In to Comment