D4055.id10089.diff
No OneTemporary
Actions

Size

106 KB

Referenced Files

None

Subscribers

None

D4055.id10089.diff
View Options

	Index: modules/Makefile
	===================================================================
	--- modules/Makefile
	+++ modules/Makefile
	@@ -343,6 +343,7 @@
	${_syscons} \
	sysvipc \
	${_ti} \
	+ tcp/fastpath \
	tl \
	tmpfs \
	${_toecore} \
	Index: modules/tcp/fastpath/Makefile
	===================================================================
	--- modules/tcp/fastpath/Makefile
	+++ modules/tcp/fastpath/Makefile
	@@ -0,0 +1,15 @@
	+#
	+# $FreeBSD$
	+#
	+
	+.PATH: ${.CURDIR}
	+
	+KMOD= fastpath
	+SRCS= fastpath.c
	+
	+#
	+# Enable full debugging
	+#
	+#CFLAGS += -g
	+
	+.include <bsd.kmod.mk>
	Index: modules/tcp/fastpath/fastpath.c
	===================================================================
	--- modules/tcp/fastpath/fastpath.c
	+++ modules/tcp/fastpath/fastpath.c
	@@ -0,0 +1,2469 @@
	+/*-
	+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
	+ * The Regents of the University of California. All rights reserved.
	+ * Copyright (c) 2007-2008,2010
	+ * Swinburne University of Technology, Melbourne, Australia.
	+ * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
	+ * Copyright (c) 2010 The FreeBSD Foundation
	+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
	+ * Copyright (c) 2015 Netflix Inc.
	+ * All rights reserved.
	+ *
	+ * Portions of this software were developed at the Centre for Advanced Internet
	+ * Architectures, Swinburne University of Technology, by Lawrence Stewart,
	+ * James Healy and David Hayes, made possible in part by a grant from the Cisco
	+ * University Research Program Fund at Community Foundation Silicon Valley.
	+ *
	+ * Portions of this software were developed at the Centre for Advanced
	+ * Internet Architectures, Swinburne University of Technology, Melbourne,
	+ * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
	+ *
	+ * Portions of this software were developed by Robert N. M. Watson under
	+ * contract to Juniper Networks, Inc.
	+ *
	+ * Portions of this software were developed by Randall R. Stewart while
	+ * working for Netflix Inc.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 4. Neither the name of the University nor the names of its contributors
	+ * may be used to endorse or promote products derived from this software
	+ * without specific prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include "opt_ipfw.h" /* for ipfw_fwd */
	+#include "opt_inet.h"
	+#include "opt_inet6.h"
	+#include "opt_ipsec.h"
	+#include "opt_kdtrace.h"
	+#include "opt_tcpdebug.h"
	+
	+#include <sys/param.h>
	+#include <sys/module.h>
	+#include <sys/kernel.h>
	+#include <sys/hhook.h>
	+#include <sys/malloc.h>
	+#include <sys/mbuf.h>
	+#include <sys/proc.h> /* for proc0 declaration */
	+#include <sys/protosw.h>
	+#include <sys/sdt.h>
	+#include <sys/signalvar.h>
	+#include <sys/socket.h>
	+#include <sys/socketvar.h>
	+#include <sys/sysctl.h>
	+#include <sys/syslog.h>
	+#include <sys/systm.h>
	+
	+#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
	+
	+#include <vm/uma.h>
	+
	+#include <net/if.h>
	+#include <net/if_var.h>
	+#include <net/route.h>
	+#include <net/vnet.h>
	+
	+#define TCPSTATES /* for logging */
	+
	+#include <netinet/cc.h>
	+#include <netinet/in.h>
	+#include <netinet/in_kdtrace.h>
	+#include <netinet/in_pcb.h>
	+#include <netinet/in_systm.h>
	+#include <netinet/in_var.h>
	+#include <netinet/ip.h>
	+#include <netinet/ip_icmp.h> /* required for icmp_var.h */
	+#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
	+#include <netinet/ip_var.h>
	+#include <netinet/ip_options.h>
	+#include <netinet/ip6.h>
	+#include <netinet/icmp6.h>
	+#include <netinet6/in6_pcb.h>
	+#include <netinet6/ip6_var.h>
	+#include <netinet6/nd6.h>
	+#include <netinet/tcp_fsm.h>
	+#include <netinet/tcp_seq.h>
	+#include <netinet/tcp_timer.h>
	+#include <netinet/tcp_var.h>
	+#include <netinet6/tcp6_var.h>
	+#include <netinet/tcpip.h>
	+#include <netinet/tcp_syncache.h>
	+#ifdef TCPDEBUG
	+#include <netinet/tcp_debug.h>
	+#endif /* TCPDEBUG */
	+#ifdef TCP_OFFLOAD
	+#include <netinet/tcp_offload.h>
	+#endif
	+
	+#ifdef IPSEC
	+#include <netipsec/ipsec.h>
	+#include <netipsec/ipsec6.h>
	+#endif /IPSEC/
	+
	+#include <machine/in_cksum.h>
	+
	+#include <security/mac/mac_framework.h>
	+
	+const int tcprexmtthresh;
	+
	+VNET_DECLARE(int, tcp_autorcvbuf_inc);
	+#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
	+VNET_DECLARE(int, tcp_autorcvbuf_max);
	+#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
	+VNET_DECLARE(int, tcp_do_rfc3042);
	+#define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042)
	+VNET_DECLARE(int, tcp_do_autorcvbuf);
	+#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
	+VNET_DECLARE(int, tcp_insecure_rst);
	+#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
	+VNET_DECLARE(int, tcp_insecure_syn);
	+#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
	+
	+
	+
	+
	+extern void tcp_dooptions(struct tcpopt , u_char , int, int);
	+extern void tcp_dropwithreset(struct mbuf , struct tcphdr ,
	+ struct tcpcb *, int, int);
	+extern void tcp_pulloutofband(struct socket *,
	+ struct tcphdr , struct mbuf , int);
	+extern void tcp_xmit_timer(struct tcpcb *, int);
	+extern void tcp_newreno_partial_ack(struct tcpcb , struct tcphdr );
	+extern void tcp_mss(struct tcpcb *tp, int offer);
	+extern void cc_ack_received(struct tcpcb tp, struct tcphdr th,
	+ uint16_t type);
	+extern void cc_conn_init(struct tcpcb *tp);
	+extern void cc_post_recovery(struct tcpcb tp, struct tcphdr th);
	+extern void cc_cong_signal(struct tcpcb tp, struct tcphdr th, uint32_t type);
	+extern void hhook_run_tcp_est_in(struct tcpcb *tp,
	+ struct tcphdr th, struct tcpopt to);
	+
	+extern void kmod_tcpstat_inc(int statnum);
	+#ifdef TCP_SIGNATURE
	+extern int tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen,
	+ struct tcpopt to, struct tcphdr th, u_int tcpbflag);
	+#endif
	+
	+static void tcp_do_segment_fastslow(struct mbuf , struct tcphdr ,
	+ struct socket , struct tcpcb , int, int, uint8_t,
	+ int);
	+
	+static void tcp_do_segment_fastack(struct mbuf , struct tcphdr ,
	+ struct socket , struct tcpcb , int, int, uint8_t,
	+ int);
	+
	+/*
	+ * Indicate whether this ack should be delayed. We can delay the ack if
	+ * following conditions are met:
	+ * - There is no delayed ack timer in progress.
	+ * - Our last ack wasn't a 0-sized window. We never want to delay
	+ * the ack that opens up a 0-sized window.
	+ * - LRO wasn't used for this segment. We make sure by checking that the
	+ * segment size is not larger than the MSS.
	+ * - Delayed acks are enabled or this is a half-synchronized T/TCP
	+ * connection.
	+ */
	+#define DELAY_ACK(tp, tlen) \
	+ ((!tcp_timer_active(tp, TT_DELACK) && \
	+ (tp->t_flags & TF_RXWIN0SENT) == 0) && \
	+ (tlen <= tp->t_maxopd) && \
	+ (V_tcp_delack_enabled \|\| (tp->t_flags & TF_NEEDSYN)))
	+
	+/*
	+ * So how is this faster than the normal fast ack?
	+ * It basically allows us to also stay in the fastpath
	+ * when a window-update ack also arrives. In testing
	+ * we saw only 25-30% of connections doing fastpath
	+ * due to the fact that along with moving forward
	+ * in sequence the window was also updated.
	+ */
	+static void
	+tcp_do_fastack(struct mbuf m, struct tcphdr th, struct socket *so,
	+ struct tcpcb tp, struct tcpopt to, int drop_hdrlen, int tlen,
	+ int ti_locked, u_long tiwin)
	+{
	+ int acked;
	+ int winup_only=0;
	+ /*
	+ * The following if statment will be true if
	+ * we are doing the win_up_in_fp <and>
	+ * - We have more new data (SEQ_LT(tp->snd_wl1, th->th_seq)) <or>
	+ * - No more new data, but we have an ack for new data
	+ * (tp->snd_wl1 == th->th_seq && SEQ_LT(tp->snd_wl2, th->th_ack))
	+ * - No more new data, the same ack point but the window grew
	+ * (tp->snd_wl1 == th->th_seq && tp->snd_wl2 == th->th_ack && twin > tp->snd_wnd)
	+ */
	+ if ((SEQ_LT(tp->snd_wl1, th->th_seq) \|\|
	+ (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) \|\|
	+ (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
	+ /* keep track of pure window updates */
	+ if (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) {
	+ winup_only = 1;
	+ TCPSTAT_INC(tcps_rcvwinupd);
	+ }
	+ tp->snd_wnd = tiwin;
	+ tp->snd_wl1 = th->th_seq;
	+ tp->snd_wl2 = th->th_ack;
	+ if (tp->snd_wnd > tp->max_sndwnd)
	+ tp->max_sndwnd = tp->snd_wnd;
	+ }
	+ /*
	+ * If last ACK falls within this segment's sequence numbers,
	+ * record the timestamp.
	+ * NOTE that the test is modified according to the latest
	+ * proposal of the tcplw@cray.com list (Braden 1993/04/26).
	+ */
	+ if ((to->to_flags & TOF_TS) != 0 &&
	+ SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
	+ tp->ts_recent_age = tcp_ts_getticks();
	+ tp->ts_recent = to->to_tsval;
	+ }
	+ /*
	+ * This is a pure ack for outstanding data.
	+ */
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ }
	+ ti_locked = TI_UNLOCKED;
	+
	+ TCPSTAT_INC(tcps_predack);
	+
	+ /*
	+ * "bad retransmit" recovery.
	+ */
	+ if (tp->t_rxtshift == 1 &&
	+ tp->t_flags & TF_PREVVALID &&
	+ (int)(ticks - tp->t_badrxtwin) < 0) {
	+ cc_cong_signal(tp, th, CC_RTO_ERR);
	+ }
	+
	+ /*
	+ * Recalculate the transmit timer / rtt.
	+ *
	+ * Some boxes send broken timestamp replies
	+ * during the SYN+ACK phase, ignore
	+ * timestamps of 0 or we could calculate a
	+ * huge RTT and blow up the retransmit timer.
	+ */
	+ if ((to->to_flags & TOF_TS) != 0 &&
	+ to->to_tsecr) {
	+ u_int t;
	+
	+ t = tcp_ts_getticks() - to->to_tsecr;
	+ if (!tp->t_rttlow \|\| tp->t_rttlow > t)
	+ tp->t_rttlow = t;
	+ tcp_xmit_timer(tp,
	+ TCP_TS_TO_TICKS(t) + 1);
	+ } else if (tp->t_rtttime &&
	+ SEQ_GT(th->th_ack, tp->t_rtseq)) {
	+ if (!tp->t_rttlow \|\|
	+ tp->t_rttlow > ticks - tp->t_rtttime)
	+ tp->t_rttlow = ticks - tp->t_rtttime;
	+ tcp_xmit_timer(tp,
	+ ticks - tp->t_rtttime);
	+ }
	+ if (winup_only == 0) {
	+ acked = BYTES_THIS_ACK(tp, th);
	+
	+ /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
	+ hhook_run_tcp_est_in(tp, th, to);
	+
	+ TCPSTAT_ADD(tcps_rcvackbyte, acked);
	+ sbdrop(&so->so_snd, acked);
	+ if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
	+ SEQ_LEQ(th->th_ack, tp->snd_recover))
	+ tp->snd_recover = th->th_ack - 1;
	+
	+ /*
	+ * Let the congestion control algorithm update
	+ * congestion control related information. This
	+ * typically means increasing the congestion
	+ * window.
	+ */
	+ cc_ack_received(tp, th, CC_ACK);
	+
	+ tp->snd_una = th->th_ack;
	+ /*
	+ * Pull snd_wl2 up to prevent seq wrap relative
	+ * to th_ack.
	+ */
	+ tp->snd_wl2 = th->th_ack;
	+ tp->t_dupacks = 0;
	+ m_freem(m);
	+
	+ /*
	+ * If all outstanding data are acked, stop
	+ * retransmit timer, otherwise restart timer
	+ * using current (possibly backed-off) value.
	+ * If process is waiting for space,
	+ * wakeup/selwakeup/signal. If data
	+ * are ready to send, let tcp_output
	+ * decide between more output or persist.
	+ */
	+#ifdef TCPDEBUG
	+ if (so->so_options & SO_DEBUG)
	+ tcp_trace(TA_INPUT, ostate, tp,
	+ (void *)tcp_saveipgen,
	+ &tcp_savetcp, 0);
	+#endif
	+ if (tp->snd_una == tp->snd_max)
	+ tcp_timer_activate(tp, TT_REXMT, 0);
	+ else if (!tcp_timer_active(tp, TT_PERSIST))
	+ tcp_timer_activate(tp, TT_REXMT,
	+ tp->t_rxtcur);
	+ } else {
	+ /*
	+ * Window update only, just free the mbufs and
	+ * send out whatever we can.
	+ */
	+ m_freem(m);
	+ }
	+ sowwakeup(so);
	+ if (sbavail(&so->so_snd))
	+ (void) tcp_output(tp);
	+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
	+ __func__, ti_locked));
	+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+
	+ if (tp->t_flags & TF_DELACK) {
	+ tp->t_flags &= ~TF_DELACK;
	+ tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
	+ }
	+ INP_WUNLOCK(tp->t_inpcb);
	+}
	+
	+/*
	+ * Here nothing is really faster, its just that we
	+ * have broken out the fast-data path also just like
	+ * the fast-ack.
	+ */
	+static void
	+tcp_do_fastnewdata(struct mbuf m, struct tcphdr th, struct socket *so,
	+ struct tcpcb tp, struct tcpopt to, int drop_hdrlen, int tlen,
	+ int ti_locked, u_long tiwin)
	+{
	+ int newsize = 0; /* automatic sockbuf scaling */
	+
	+ /*
	+ * If last ACK falls within this segment's sequence numbers,
	+ * record the timestamp.
	+ * NOTE that the test is modified according to the latest
	+ * proposal of the tcplw@cray.com list (Braden 1993/04/26).
	+ */
	+ if ((to->to_flags & TOF_TS) != 0 &&
	+ SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
	+ tp->ts_recent_age = tcp_ts_getticks();
	+ tp->ts_recent = to->to_tsval;
	+ }
	+
	+ /*
	+ * This is a pure, in-sequence data packet with
	+ * nothing on the reassembly queue and we have enough
	+ * buffer space to take it.
	+ */
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ }
	+ ti_locked = TI_UNLOCKED;
	+
	+ /* Clean receiver SACK report if present */
	+ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
	+ tcp_clean_sackreport(tp);
	+ TCPSTAT_INC(tcps_preddat);
	+ tp->rcv_nxt += tlen;
	+ /*
	+ * Pull snd_wl1 up to prevent seq wrap relative to
	+ * th_seq.
	+ */
	+ tp->snd_wl1 = th->th_seq;
	+ /*
	+ * Pull rcv_up up to prevent seq wrap relative to
	+ * rcv_nxt.
	+ */
	+ tp->rcv_up = tp->rcv_nxt;
	+ TCPSTAT_ADD(tcps_rcvbyte, tlen);
	+#ifdef TCPDEBUG
	+ if (so->so_options & SO_DEBUG)
	+ tcp_trace(TA_INPUT, ostate, tp,
	+ (void *)tcp_saveipgen, &tcp_savetcp, 0);
	+#endif
	+ /*
	+ * Automatic sizing of receive socket buffer. Often the send
	+ * buffer size is not optimally adjusted to the actual network
	+ * conditions at hand (delay bandwidth product). Setting the
	+ * buffer size too small limits throughput on links with high
	+ * bandwidth and high delay (eg. trans-continental/oceanic links).
	+ *
	+ * On the receive side the socket buffer memory is only rarely
	+ * used to any significant extent. This allows us to be much
	+ * more aggressive in scaling the receive socket buffer. For
	+ * the case that the buffer space is actually used to a large
	+ * extent and we run out of kernel memory we can simply drop
	+ * the new segments; TCP on the sender will just retransmit it
	+ * later. Setting the buffer size too big may only consume too
	+ * much kernel memory if the application doesn't read() from
	+ * the socket or packet loss or reordering makes use of the
	+ * reassembly queue.
	+ *
	+ * The criteria to step up the receive buffer one notch are:
	+ * 1. Application has not set receive buffer size with
	+ * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
	+ * 2. the number of bytes received during the time it takes
	+ * one timestamp to be reflected back to us (the RTT);
	+ * 3. received bytes per RTT is within seven eighth of the
	+ * current socket buffer size;
	+ * 4. receive buffer size has not hit maximal automatic size;
	+ *
	+ * This algorithm does one step per RTT at most and only if
	+ * we receive a bulk stream w/o packet losses or reorderings.
	+ * Shrinking the buffer during idle times is not necessary as
	+ * it doesn't consume any memory when idle.
	+ *
	+ * TODO: Only step up if the application is actually serving
	+ * the buffer to better manage the socket buffer resources.
	+ */
	+ if (V_tcp_do_autorcvbuf &&
	+ (to->to_flags & TOF_TS) &&
	+ to->to_tsecr &&
	+ (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
	+ if (TSTMP_GT(to->to_tsecr, tp->rfbuf_ts) &&
	+ to->to_tsecr - tp->rfbuf_ts < hz) {
	+ if (tp->rfbuf_cnt >
	+ (so->so_rcv.sb_hiwat / 8 * 7) &&
	+ so->so_rcv.sb_hiwat <
	+ V_tcp_autorcvbuf_max) {
	+ newsize =
	+ min(so->so_rcv.sb_hiwat +
	+ V_tcp_autorcvbuf_inc,
	+ V_tcp_autorcvbuf_max);
	+ }
	+ /* Start over with next RTT. */
	+ tp->rfbuf_ts = 0;
	+ tp->rfbuf_cnt = 0;
	+ } else
	+ tp->rfbuf_cnt += tlen; /* add up */
	+ }
	+
	+ /* Add data to socket buffer. */
	+ SOCKBUF_LOCK(&so->so_rcv);
	+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
	+ m_freem(m);
	+ } else {
	+ /*
	+ * Set new socket buffer size.
	+ * Give up when limit is reached.
	+ */
	+ if (newsize)
	+ if (!sbreserve_locked(&so->so_rcv,
	+ newsize, so, NULL))
	+ so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
	+ m_adj(m, drop_hdrlen); /* delayed header drop */
	+ sbappendstream_locked(&so->so_rcv, m, 0);
	+ }
	+ /* NB: sorwakeup_locked() does an implicit unlock. */
	+ sorwakeup_locked(so);
	+ if (DELAY_ACK(tp, tlen)) {
	+ tp->t_flags \|= TF_DELACK;
	+ } else {
	+ tp->t_flags \|= TF_ACKNOW;
	+ tcp_output(tp);
	+ }
	+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
	+ __func__, ti_locked));
	+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+
	+ if (tp->t_flags & TF_DELACK) {
	+ tp->t_flags &= ~TF_DELACK;
	+ tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
	+ }
	+ INP_WUNLOCK(tp->t_inpcb);
	+}
	+
	+/*
	+ * The slow-path is the clone of the long long part
	+ * of tcp_do_segment past all the fast-path stuff. We
	+ * use it here by two different callers, the fast/slow and
	+ * the fastack only.
	+ */
	+static void
	+tcp_do_slowpath(struct mbuf m, struct tcphdr th, struct socket *so,
	+ struct tcpcb tp, struct tcpopt to, int drop_hdrlen, int tlen,
	+ int ti_locked, u_long tiwin, int thflags)
	+{
	+ int acked, ourfinisacked, needoutput = 0;
	+ int rstreason, todrop, win;
	+ char *s;
	+ struct in_conninfo *inc;
	+ struct mbuf *mfree = NULL;
	+
	+ /*
	+ * Calculate amount of space in receive window,
	+ * and then do TCP input processing.
	+ * Receive window is amount of space in rcv queue,
	+ * but not less than advertised window.
	+ */
	+ inc = &tp->t_inpcb->inp_inc;
	+ win = sbspace(&so->so_rcv);
	+ if (win < 0)
	+ win = 0;
	+ tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
	+
	+ /* Reset receive buffer auto scaling when not in bulk receive mode. */
	+ tp->rfbuf_ts = 0;
	+ tp->rfbuf_cnt = 0;
	+
	+ switch (tp->t_state) {
	+
	+ /*
	+ * If the state is SYN_RECEIVED:
	+ * if seg contains an ACK, but not for our SYN/ACK, send a RST.
	+ */
	+ case TCPS_SYN_RECEIVED:
	+ if ((thflags & TH_ACK) &&
	+ (SEQ_LEQ(th->th_ack, tp->snd_una) \|\|
	+ SEQ_GT(th->th_ack, tp->snd_max))) {
	+ rstreason = BANDLIM_RST_OPENPORT;
	+ goto dropwithreset;
	+ }
	+ break;
	+
	+ /*
	+ * If the state is SYN_SENT:
	+ * if seg contains an ACK, but not for our SYN, drop the input.
	+ * if seg contains a RST, then drop the connection.
	+ * if seg does not contain SYN, then drop it.
	+ * Otherwise this is an acceptable SYN segment
	+ * initialize tp->rcv_nxt and tp->irs
	+ * if seg contains ack then advance tp->snd_una
	+ * if seg contains an ECE and ECN support is enabled, the stream
	+ * is ECN capable.
	+ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
	+ * arrange for segment to be acked (eventually)
	+ * continue processing rest of data/controls, beginning with URG
	+ */
	+ case TCPS_SYN_SENT:
	+ if ((thflags & TH_ACK) &&
	+ (SEQ_LEQ(th->th_ack, tp->iss) \|\|
	+ SEQ_GT(th->th_ack, tp->snd_max))) {
	+ rstreason = BANDLIM_UNLIMITED;
	+ goto dropwithreset;
	+ }
	+ if ((thflags & (TH_ACK\|TH_RST)) == (TH_ACK\|TH_RST)) {
	+ TCP_PROBE5(connect__refused, NULL, tp,
	+ mtod(m, const char *), tp, th);
	+ tp = tcp_drop(tp, ECONNREFUSED);
	+ }
	+ if (thflags & TH_RST)
	+ goto drop;
	+ if (!(thflags & TH_SYN))
	+ goto drop;
	+
	+ tp->irs = th->th_seq;
	+ tcp_rcvseqinit(tp);
	+ if (thflags & TH_ACK) {
	+ TCPSTAT_INC(tcps_connects);
	+ soisconnected(so);
	+#ifdef MAC
	+ mac_socketpeer_set_from_mbuf(m, so);
	+#endif
	+ /* Do window scaling on this connection? */
	+ if ((tp->t_flags & (TF_RCVD_SCALE\|TF_REQ_SCALE)) ==
	+ (TF_RCVD_SCALE\|TF_REQ_SCALE)) {
	+ tp->rcv_scale = tp->request_r_scale;
	+ }
	+ tp->rcv_adv += imin(tp->rcv_wnd,
	+ TCP_MAXWIN << tp->rcv_scale);
	+ tp->snd_una++; /* SYN is acked */
	+ /*
	+ * If there's data, delay ACK; if there's also a FIN
	+ * ACKNOW will be turned on later.
	+ */
	+ if (DELAY_ACK(tp, tlen) && tlen != 0)
	+ tcp_timer_activate(tp, TT_DELACK,
	+ tcp_delacktime);
	+ else
	+ tp->t_flags \|= TF_ACKNOW;
	+
	+ if ((thflags & TH_ECE) && V_tcp_do_ecn) {
	+ tp->t_flags \|= TF_ECN_PERMIT;
	+ TCPSTAT_INC(tcps_ecn_shs);
	+ }
	+
	+ /*
	+ * Received <SYN,ACK> in SYN_SENT[*] state.
	+ * Transitions:
	+ * SYN_SENT --> ESTABLISHED
	+ * SYN_SENT* --> FIN_WAIT_1
	+ */
	+ tp->t_starttime = ticks;
	+ if (tp->t_flags & TF_NEEDFIN) {
	+ tcp_state_change(tp, TCPS_FIN_WAIT_1);
	+ tp->t_flags &= ~TF_NEEDFIN;
	+ thflags &= ~TH_SYN;
	+ } else {
	+ tcp_state_change(tp, TCPS_ESTABLISHED);
	+ TCP_PROBE5(connect__established, NULL, tp,
	+ mtod(m, const char *), tp, th);
	+ cc_conn_init(tp);
	+ tcp_timer_activate(tp, TT_KEEP,
	+ TP_KEEPIDLE(tp));
	+ }
	+ } else {
	+ /*
	+ * Received initial SYN in SYN-SENT[*] state =>
	+ * simultaneous open.
	+ * If it succeeds, connection is * half-synchronized.
	+ * Otherwise, do 3-way handshake:
	+ * SYN-SENT -> SYN-RECEIVED
	+ * SYN-SENT* -> SYN-RECEIVED*
	+ */
	+ tp->t_flags \|= (TF_ACKNOW \| TF_NEEDSYN);
	+ tcp_timer_activate(tp, TT_REXMT, 0);
	+ tcp_state_change(tp, TCPS_SYN_RECEIVED);
	+ }
	+
	+ KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
	+ "ti_locked %d", __func__, ti_locked));
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+
	+ /*
	+ * Advance th->th_seq to correspond to first data byte.
	+ * If data, trim to stay within window,
	+ * dropping FIN if necessary.
	+ */
	+ th->th_seq++;
	+ if (tlen > tp->rcv_wnd) {
	+ todrop = tlen - tp->rcv_wnd;
	+ m_adj(m, -todrop);
	+ tlen = tp->rcv_wnd;
	+ thflags &= ~TH_FIN;
	+ TCPSTAT_INC(tcps_rcvpackafterwin);
	+ TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
	+ }
	+ tp->snd_wl1 = th->th_seq - 1;
	+ tp->rcv_up = th->th_seq;
	+ /*
	+ * Client side of transaction: already sent SYN and data.
	+ * If the remote host used T/TCP to validate the SYN,
	+ * our data will be ACK'd; if so, enter normal data segment
	+ * processing in the middle of step 5, ack processing.
	+ * Otherwise, goto step 6.
	+ */
	+ if (thflags & TH_ACK)
	+ goto process_ACK;
	+
	+ goto step6;
	+
	+ /*
	+ * If the state is LAST_ACK or CLOSING or TIME_WAIT:
	+ * do normal processing.
	+ *
	+ * NB: Leftover from RFC1644 T/TCP. Cases to be reused later.
	+ */
	+ case TCPS_LAST_ACK:
	+ case TCPS_CLOSING:
	+ break; /* continue normal processing */
	+ }
	+
	+ /*
	+ * States other than LISTEN or SYN_SENT.
	+ * First check the RST flag and sequence number since reset segments
	+ * are exempt from the timestamp and connection count tests. This
	+ * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
	+ * below which allowed reset segments in half the sequence space
	+ * to fall though and be processed (which gives forged reset
	+ * segments with a random sequence number a 50 percent chance of
	+ * killing a connection).
	+ * Then check timestamp, if present.
	+ * Then check the connection count, if present.
	+ * Then check that at least some bytes of segment are within
	+ * receive window. If segment begins before rcv_nxt,
	+ * drop leading data (and SYN); if nothing left, just ack.
	+ */
	+ if (thflags & TH_RST) {
	+ /*
	+ * RFC5961 Section 3.2
	+ *
	+ * - RST drops connection only if SEG.SEQ == RCV.NXT.
	+ * - If RST is in window, we send challenge ACK.
	+ *
	+ * Note: to take into account delayed ACKs, we should
	+ * test against last_ack_sent instead of rcv_nxt.
	+ * Note 2: we handle special case of closed window, not
	+ * covered by the RFC.
	+ */
	+ if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
	+ SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) \|\|
	+ (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ KASSERT(ti_locked == TI_RLOCKED,
	+ ("%s: TH_RST ti_locked %d, th %p tp %p",
	+ __func__, ti_locked, th, tp));
	+ KASSERT(tp->t_state != TCPS_SYN_SENT,
	+ ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
	+ __func__, th, tp));
	+
	+ if (V_tcp_insecure_rst \|\|
	+ tp->last_ack_sent == th->th_seq) {
	+ TCPSTAT_INC(tcps_drops);
	+ /* Drop the connection. */
	+ switch (tp->t_state) {
	+ case TCPS_SYN_RECEIVED:
	+ so->so_error = ECONNREFUSED;
	+ goto close;
	+ case TCPS_ESTABLISHED:
	+ case TCPS_FIN_WAIT_1:
	+ case TCPS_FIN_WAIT_2:
	+ case TCPS_CLOSE_WAIT:
	+ so->so_error = ECONNRESET;
	+ close:
	+ tcp_state_change(tp, TCPS_CLOSED);
	+ /* FALLTHROUGH */
	+ default:
	+ tp = tcp_close(tp);
	+ }
	+ } else {
	+ TCPSTAT_INC(tcps_badrst);
	+ /* Send challenge ACK. */
	+ tcp_respond(tp, mtod(m, void *), th, m,
	+ tp->rcv_nxt, tp->snd_nxt, TH_ACK);
	+ tp->last_ack_sent = tp->rcv_nxt;
	+ m = NULL;
	+ }
	+ }
	+ goto drop;
	+ }
	+
	+ /*
	+ * RFC5961 Section 4.2
	+ * Send challenge ACK for any SYN in synchronized state.
	+ */
	+ if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
	+ KASSERT(ti_locked == TI_RLOCKED,
	+ ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+
	+ TCPSTAT_INC(tcps_badsyn);
	+ if (V_tcp_insecure_syn &&
	+ SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
	+ SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
	+ tp = tcp_drop(tp, ECONNRESET);
	+ rstreason = BANDLIM_UNLIMITED;
	+ } else {
	+ /* Send challenge ACK. */
	+ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
	+ tp->snd_nxt, TH_ACK);
	+ tp->last_ack_sent = tp->rcv_nxt;
	+ m = NULL;
	+ }
	+ goto drop;
	+ }
	+
	+ /*
	+ * RFC 1323 PAWS: If we have a timestamp reply on this segment
	+ * and it's less than ts_recent, drop it.
	+ */
	+ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
	+ TSTMP_LT(to->to_tsval, tp->ts_recent)) {
	+
	+ /* Check to see if ts_recent is over 24 days old. */
	+ if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
	+ /*
	+ * Invalidate ts_recent. If this segment updates
	+ * ts_recent, the age will be reset later and ts_recent
	+ * will get a valid value. If it does not, setting
	+ * ts_recent to zero will at least satisfy the
	+ * requirement that zero be placed in the timestamp
	+ * echo reply when ts_recent isn't valid. The
	+ * age isn't reset until we get a valid ts_recent
	+ * because we don't want out-of-order segments to be
	+ * dropped when ts_recent is old.
	+ */
	+ tp->ts_recent = 0;
	+ } else {
	+ TCPSTAT_INC(tcps_rcvduppack);
	+ TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
	+ TCPSTAT_INC(tcps_pawsdrop);
	+ if (tlen)
	+ goto dropafterack;
	+ goto drop;
	+ }
	+ }
	+
	+ /*
	+ * In the SYN-RECEIVED state, validate that the packet belongs to
	+ * this connection before trimming the data to fit the receive
	+ * window. Check the sequence number versus IRS since we know
	+ * the sequence numbers haven't wrapped. This is a partial fix
	+ * for the "LAND" DoS attack.
	+ */
	+ if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
	+ rstreason = BANDLIM_RST_OPENPORT;
	+ goto dropwithreset;
	+ }
	+
	+ todrop = tp->rcv_nxt - th->th_seq;
	+ if (todrop > 0) {
	+ if (thflags & TH_SYN) {
	+ thflags &= ~TH_SYN;
	+ th->th_seq++;
	+ if (th->th_urp > 1)
	+ th->th_urp--;
	+ else
	+ thflags &= ~TH_URG;
	+ todrop--;
	+ }
	+ /*
	+ * Following if statement from Stevens, vol. 2, p. 960.
	+ */
	+ if (todrop > tlen
	+ \|\| (todrop == tlen && (thflags & TH_FIN) == 0)) {
	+ /*
	+ * Any valid FIN must be to the left of the window.
	+ * At this point the FIN must be a duplicate or out
	+ * of sequence; drop it.
	+ */
	+ thflags &= ~TH_FIN;
	+
	+ /*
	+ * Send an ACK to resynchronize and drop any data.
	+ * But keep on processing for RST or ACK.
	+ */
	+ tp->t_flags \|= TF_ACKNOW;
	+ todrop = tlen;
	+ TCPSTAT_INC(tcps_rcvduppack);
	+ TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
	+ } else {
	+ TCPSTAT_INC(tcps_rcvpartduppack);
	+ TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
	+ }
	+ drop_hdrlen += todrop; /* drop from the top afterwards */
	+ th->th_seq += todrop;
	+ tlen -= todrop;
	+ if (th->th_urp > todrop)
	+ th->th_urp -= todrop;
	+ else {
	+ thflags &= ~TH_URG;
	+ th->th_urp = 0;
	+ }
	+ }
	+
	+ /*
	+ * If new data are received on a connection after the
	+ * user processes are gone, then RST the other end.
	+ */
	+ if ((so->so_state & SS_NOFDREF) &&
	+ tp->t_state > TCPS_CLOSE_WAIT && tlen) {
	+ KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
	+ "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+
	+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
	+ log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
	+ "after socket was closed, "
	+ "sending RST and removing tcpcb\n",
	+ s, __func__, tcpstates[tp->t_state], tlen);
	+ free(s, M_TCPLOG);
	+ }
	+ tp = tcp_close(tp);
	+ TCPSTAT_INC(tcps_rcvafterclose);
	+ rstreason = BANDLIM_UNLIMITED;
	+ goto dropwithreset;
	+ }
	+
	+ /*
	+ * If segment ends after window, drop trailing data
	+ * (and PUSH and FIN); if nothing left, just ACK.
	+ */
	+ todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
	+ if (todrop > 0) {
	+ TCPSTAT_INC(tcps_rcvpackafterwin);
	+ if (todrop >= tlen) {
	+ TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
	+ /*
	+ * If window is closed can only take segments at
	+ * window edge, and have to drop data and PUSH from
	+ * incoming segments. Continue processing, but
	+ * remember to ack. Otherwise, drop segment
	+ * and ack.
	+ */
	+ if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
	+ tp->t_flags \|= TF_ACKNOW;
	+ TCPSTAT_INC(tcps_rcvwinprobe);
	+ } else
	+ goto dropafterack;
	+ } else
	+ TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
	+ m_adj(m, -todrop);
	+ tlen -= todrop;
	+ thflags &= ~(TH_PUSH\|TH_FIN);
	+ }
	+
	+ /*
	+ * If last ACK falls within this segment's sequence numbers,
	+ * record its timestamp.
	+ * NOTE:
	+ * 1) That the test incorporates suggestions from the latest
	+ * proposal of the tcplw@cray.com list (Braden 1993/04/26).
	+ * 2) That updating only on newer timestamps interferes with
	+ * our earlier PAWS tests, so this check should be solely
	+ * predicated on the sequence space of this segment.
	+ * 3) That we modify the segment boundary check to be
	+ * Last.ACK.Sent <= SEG.SEQ + SEG.Len
	+ * instead of RFC1323's
	+ * Last.ACK.Sent < SEG.SEQ + SEG.Len,
	+ * This modified check allows us to overcome RFC1323's
	+ * limitations as described in Stevens TCP/IP Illustrated
	+ * Vol. 2 p.869. In such cases, we can still calculate the
	+ * RTT correctly when RCV.NXT == Last.ACK.Sent.
	+ */
	+ if ((to->to_flags & TOF_TS) != 0 &&
	+ SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
	+ SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
	+ ((thflags & (TH_SYN\|TH_FIN)) != 0))) {
	+ tp->ts_recent_age = tcp_ts_getticks();
	+ tp->ts_recent = to->to_tsval;
	+ }
	+
	+ /*
	+ * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN
	+ * flag is on (half-synchronized state), then queue data for
	+ * later processing; else drop segment and return.
	+ */
	+ if ((thflags & TH_ACK) == 0) {
	+ if (tp->t_state == TCPS_SYN_RECEIVED \|\|
	+ (tp->t_flags & TF_NEEDSYN))
	+ goto step6;
	+ else if (tp->t_flags & TF_ACKNOW)
	+ goto dropafterack;
	+ else
	+ goto drop;
	+ }
	+
	+ /*
	+ * Ack processing.
	+ */
	+ switch (tp->t_state) {
	+
	+ /*
	+ * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
	+ * ESTABLISHED state and continue processing.
	+ * The ACK was checked above.
	+ */
	+ case TCPS_SYN_RECEIVED:
	+
	+ TCPSTAT_INC(tcps_connects);
	+ soisconnected(so);
	+ /* Do window scaling? */
	+ if ((tp->t_flags & (TF_RCVD_SCALE\|TF_REQ_SCALE)) ==
	+ (TF_RCVD_SCALE\|TF_REQ_SCALE)) {
	+ tp->rcv_scale = tp->request_r_scale;
	+ tp->snd_wnd = tiwin;
	+ }
	+ /*
	+ * Make transitions:
	+ * SYN-RECEIVED -> ESTABLISHED
	+ * SYN-RECEIVED* -> FIN-WAIT-1
	+ */
	+ tp->t_starttime = ticks;
	+ if (tp->t_flags & TF_NEEDFIN) {
	+ tcp_state_change(tp, TCPS_FIN_WAIT_1);
	+ tp->t_flags &= ~TF_NEEDFIN;
	+ } else {
	+ tcp_state_change(tp, TCPS_ESTABLISHED);
	+ TCP_PROBE5(accept__established, NULL, tp,
	+ mtod(m, const char *), tp, th);
	+ cc_conn_init(tp);
	+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
	+ }
	+ /*
	+ * If segment contains data or ACK, will call tcp_reass()
	+ * later; if not, do so now to pass queued data to user.
	+ */
	+ if (tlen == 0 && (thflags & TH_FIN) == 0)
	+ (void) tcp_reass(tp, (struct tcphdr *)0, 0,
	+ (struct mbuf *)0);
	+ tp->snd_wl1 = th->th_seq - 1;
	+ /* FALLTHROUGH */
	+
	+ /*
	+ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
	+ * ACKs. If the ack is in the range
	+ * tp->snd_una < th->th_ack <= tp->snd_max
	+ * then advance tp->snd_una to th->th_ack and drop
	+ * data from the retransmission queue. If this ACK reflects
	+ * more up to date window information we update our window information.
	+ */
	+ case TCPS_ESTABLISHED:
	+ case TCPS_FIN_WAIT_1:
	+ case TCPS_FIN_WAIT_2:
	+ case TCPS_CLOSE_WAIT:
	+ case TCPS_CLOSING:
	+ case TCPS_LAST_ACK:
	+ if (SEQ_GT(th->th_ack, tp->snd_max)) {
	+ TCPSTAT_INC(tcps_rcvacktoomuch);
	+ goto dropafterack;
	+ }
	+ if ((tp->t_flags & TF_SACK_PERMIT) &&
	+ ((to->to_flags & TOF_SACK) \|\|
	+ !TAILQ_EMPTY(&tp->snd_holes)))
	+ tcp_sack_doack(tp, to, th->th_ack);
	+ else
	+ /*
	+ * Reset the value so that previous (valid) value
	+ * from the last ack with SACK doesn't get used.
	+ */
	+ tp->sackhint.sacked_bytes = 0;
	+
	+ /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
	+ hhook_run_tcp_est_in(tp, th, to);
	+
	+ if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
	+ if (tlen == 0 && tiwin == tp->snd_wnd) {
	+ /*
	+ * If this is the first time we've seen a
	+ * FIN from the remote, this is not a
	+ * duplicate and it needs to be processed
	+ * normally. This happens during a
	+ * simultaneous close.
	+ */
	+ if ((thflags & TH_FIN) &&
	+ (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
	+ tp->t_dupacks = 0;
	+ break;
	+ }
	+ TCPSTAT_INC(tcps_rcvdupack);
	+ /*
	+ * If we have outstanding data (other than
	+ * a window probe), this is a completely
	+ * duplicate ack (ie, window info didn't
	+ * change and FIN isn't set),
	+ * the ack is the biggest we've
	+ * seen and we've seen exactly our rexmt
	+ * threshhold of them, assume a packet
	+ * has been dropped and retransmit it.
	+ * Kludge snd_nxt & the congestion
	+ * window so we send only this one
	+ * packet.
	+ *
	+ * We know we're losing at the current
	+ * window size so do congestion avoidance
	+ * (set ssthresh to half the current window
	+ * and pull our congestion window back to
	+ * the new ssthresh).
	+ *
	+ * Dup acks mean that packets have left the
	+ * network (they're now cached at the receiver)
	+ * so bump cwnd by the amount in the receiver
	+ * to keep a constant cwnd packets in the
	+ * network.
	+ *
	+ * When using TCP ECN, notify the peer that
	+ * we reduced the cwnd.
	+ */
	+ if (!tcp_timer_active(tp, TT_REXMT) \|\|
	+ th->th_ack != tp->snd_una)
	+ tp->t_dupacks = 0;
	+ else if (++tp->t_dupacks > tcprexmtthresh \|\|
	+ IN_FASTRECOVERY(tp->t_flags)) {
	+ cc_ack_received(tp, th, CC_DUPACK);
	+ if ((tp->t_flags & TF_SACK_PERMIT) &&
	+ IN_FASTRECOVERY(tp->t_flags)) {
	+ int awnd;
	+
	+ /*
	+ * Compute the amount of data in flight first.
	+ * We can inject new data into the pipe iff
	+ * we have less than 1/2 the original window's
	+ * worth of data in flight.
	+ */
	+ if (V_tcp_do_rfc6675_pipe)
	+ awnd = tcp_compute_pipe(tp);
	+ else
	+ awnd = (tp->snd_nxt - tp->snd_fack) +
	+ tp->sackhint.sack_bytes_rexmit;
	+
	+ if (awnd < tp->snd_ssthresh) {
	+ tp->snd_cwnd += tp->t_maxseg;
	+ if (tp->snd_cwnd > tp->snd_ssthresh)
	+ tp->snd_cwnd = tp->snd_ssthresh;
	+ }
	+ } else
	+ tp->snd_cwnd += tp->t_maxseg;
	+ (void) tp->t_fb->tcp_output(tp);
	+ goto drop;
	+ } else if (tp->t_dupacks == tcprexmtthresh) {
	+ tcp_seq onxt = tp->snd_nxt;
	+
	+ /*
	+ * If we're doing sack, check to
	+ * see if we're already in sack
	+ * recovery. If we're not doing sack,
	+ * check to see if we're in newreno
	+ * recovery.
	+ */
	+ if (tp->t_flags & TF_SACK_PERMIT) {
	+ if (IN_FASTRECOVERY(tp->t_flags)) {
	+ tp->t_dupacks = 0;
	+ break;
	+ }
	+ } else {
	+ if (SEQ_LEQ(th->th_ack,
	+ tp->snd_recover)) {
	+ tp->t_dupacks = 0;
	+ break;
	+ }
	+ }
	+ /* Congestion signal before ack. */
	+ cc_cong_signal(tp, th, CC_NDUPACK);
	+ cc_ack_received(tp, th, CC_DUPACK);
	+ tcp_timer_activate(tp, TT_REXMT, 0);
	+ tp->t_rtttime = 0;
	+ if (tp->t_flags & TF_SACK_PERMIT) {
	+ TCPSTAT_INC(
	+ tcps_sack_recovery_episode);
	+ tp->sack_newdata = tp->snd_nxt;
	+ tp->snd_cwnd = tp->t_maxseg;
	+ (void) tp->t_fb->tcp_output(tp);
	+ goto drop;
	+ }
	+ tp->snd_nxt = th->th_ack;
	+ tp->snd_cwnd = tp->t_maxseg;
	+ (void) tp->t_fb->tcp_output(tp);
	+ KASSERT(tp->snd_limited <= 2,
	+ ("%s: tp->snd_limited too big",
	+ __func__));
	+ tp->snd_cwnd = tp->snd_ssthresh +
	+ tp->t_maxseg *
	+ (tp->t_dupacks - tp->snd_limited);
	+ if (SEQ_GT(onxt, tp->snd_nxt))
	+ tp->snd_nxt = onxt;
	+ goto drop;
	+ } else if (V_tcp_do_rfc3042) {
	+ /*
	+ * Process first and second duplicate
	+ * ACKs. Each indicates a segment
	+ * leaving the network, creating room
	+ * for more. Make sure we can send a
	+ * packet on reception of each duplicate
	+ * ACK by increasing snd_cwnd by one
	+ * segment. Restore the original
	+ * snd_cwnd after packet transmission.
	+ */
	+ cc_ack_received(tp, th, CC_DUPACK);
	+ u_long oldcwnd = tp->snd_cwnd;
	+ tcp_seq oldsndmax = tp->snd_max;
	+ u_int sent;
	+ int avail;
	+
	+ KASSERT(tp->t_dupacks == 1 \|\|
	+ tp->t_dupacks == 2,
	+ ("%s: dupacks not 1 or 2",
	+ __func__));
	+ if (tp->t_dupacks == 1)
	+ tp->snd_limited = 0;
	+ tp->snd_cwnd =
	+ (tp->snd_nxt - tp->snd_una) +
	+ (tp->t_dupacks - tp->snd_limited) *
	+ tp->t_maxseg;
	+ /*
	+ * Only call tcp_output when there
	+ * is new data available to be sent.
	+ * Otherwise we would send pure ACKs.
	+ */
	+ SOCKBUF_LOCK(&so->so_snd);
	+ avail = sbavail(&so->so_snd) -
	+ (tp->snd_nxt - tp->snd_una);
	+ SOCKBUF_UNLOCK(&so->so_snd);
	+ if (avail > 0)
	+ (void) tp->t_fb->tcp_output(tp);
	+ sent = tp->snd_max - oldsndmax;
	+ if (sent > tp->t_maxseg) {
	+ KASSERT((tp->t_dupacks == 2 &&
	+ tp->snd_limited == 0) \|\|
	+ (sent == tp->t_maxseg + 1 &&
	+ tp->t_flags & TF_SENTFIN),
	+ ("%s: sent too much",
	+ __func__));
	+ tp->snd_limited = 2;
	+ } else if (sent > 0)
	+ ++tp->snd_limited;
	+ tp->snd_cwnd = oldcwnd;
	+ goto drop;
	+ }
	+ } else
	+ tp->t_dupacks = 0;
	+ break;
	+ }
	+
	+ KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
	+ ("%s: th_ack <= snd_una", __func__));
	+
	+ /*
	+ * If the congestion window was inflated to account
	+ * for the other side's cached packets, retract it.
	+ */
	+ if (IN_FASTRECOVERY(tp->t_flags)) {
	+ if (SEQ_LT(th->th_ack, tp->snd_recover)) {
	+ if (tp->t_flags & TF_SACK_PERMIT)
	+ tcp_sack_partialack(tp, th);
	+ else
	+ tcp_newreno_partial_ack(tp, th);
	+ } else
	+ cc_post_recovery(tp, th);
	+ }
	+ tp->t_dupacks = 0;
	+ /*
	+ * If we reach this point, ACK is not a duplicate,
	+ * i.e., it ACKs something we sent.
	+ */
	+ if (tp->t_flags & TF_NEEDSYN) {
	+ /*
	+ * T/TCP: Connection was half-synchronized, and our
	+ * SYN has been ACK'd (so connection is now fully
	+ * synchronized). Go to non-starred state,
	+ * increment snd_una for ACK of SYN, and check if
	+ * we can do window scaling.
	+ */
	+ tp->t_flags &= ~TF_NEEDSYN;
	+ tp->snd_una++;
	+ /* Do window scaling? */
	+ if ((tp->t_flags & (TF_RCVD_SCALE\|TF_REQ_SCALE)) ==
	+ (TF_RCVD_SCALE\|TF_REQ_SCALE)) {
	+ tp->rcv_scale = tp->request_r_scale;
	+ /* Send window already scaled. */
	+ }
	+ }
	+
	+process_ACK:
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+
	+ acked = BYTES_THIS_ACK(tp, th);
	+ TCPSTAT_INC(tcps_rcvackpack);
	+ TCPSTAT_ADD(tcps_rcvackbyte, acked);
	+
	+ /*
	+ * If we just performed our first retransmit, and the ACK
	+ * arrives within our recovery window, then it was a mistake
	+ * to do the retransmit in the first place. Recover our
	+ * original cwnd and ssthresh, and proceed to transmit where
	+ * we left off.
	+ */
	+ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
	+ (int)(ticks - tp->t_badrxtwin) < 0)
	+ cc_cong_signal(tp, th, CC_RTO_ERR);
	+
	+ /*
	+ * If we have a timestamp reply, update smoothed
	+ * round trip time. If no timestamp is present but
	+ * transmit timer is running and timed sequence
	+ * number was acked, update smoothed round trip time.
	+ * Since we now have an rtt measurement, cancel the
	+ * timer backoff (cf., Phil Karn's retransmit alg.).
	+ * Recompute the initial retransmit timer.
	+ *
	+ * Some boxes send broken timestamp replies
	+ * during the SYN+ACK phase, ignore
	+ * timestamps of 0 or we could calculate a
	+ * huge RTT and blow up the retransmit timer.
	+ */
	+ if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
	+ u_int t;
	+
	+ t = tcp_ts_getticks() - to->to_tsecr;
	+ if (!tp->t_rttlow \|\| tp->t_rttlow > t)
	+ tp->t_rttlow = t;
	+ tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
	+ } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
	+ if (!tp->t_rttlow \|\| tp->t_rttlow > ticks - tp->t_rtttime)
	+ tp->t_rttlow = ticks - tp->t_rtttime;
	+ tcp_xmit_timer(tp, ticks - tp->t_rtttime);
	+ }
	+
	+ /*
	+ * If all outstanding data is acked, stop retransmit
	+ * timer and remember to restart (more output or persist).
	+ * If there is more data to be acked, restart retransmit
	+ * timer, using current (possibly backed-off) value.
	+ */
	+ if (th->th_ack == tp->snd_max) {
	+ tcp_timer_activate(tp, TT_REXMT, 0);
	+ needoutput = 1;
	+ } else if (!tcp_timer_active(tp, TT_PERSIST))
	+ tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
	+
	+ /*
	+ * If no data (only SYN) was ACK'd,
	+ * skip rest of ACK processing.
	+ */
	+ if (acked == 0)
	+ goto step6;
	+
	+ /*
	+ * Let the congestion control algorithm update congestion
	+ * control related information. This typically means increasing
	+ * the congestion window.
	+ */
	+ cc_ack_received(tp, th, CC_ACK);
	+
	+ SOCKBUF_LOCK(&so->so_snd);
	+ if (acked > sbavail(&so->so_snd)) {
	+ tp->snd_wnd -= sbavail(&so->so_snd);
	+ mfree = sbcut_locked(&so->so_snd,
	+ (int)sbavail(&so->so_snd));
	+ ourfinisacked = 1;
	+ } else {
	+ mfree = sbcut_locked(&so->so_snd, acked);
	+ tp->snd_wnd -= acked;
	+ ourfinisacked = 0;
	+ }
	+ /* NB: sowwakeup_locked() does an implicit unlock. */
	+ sowwakeup_locked(so);
	+ m_freem(mfree);
	+ /* Detect una wraparound. */
	+ if (!IN_RECOVERY(tp->t_flags) &&
	+ SEQ_GT(tp->snd_una, tp->snd_recover) &&
	+ SEQ_LEQ(th->th_ack, tp->snd_recover))
	+ tp->snd_recover = th->th_ack - 1;
	+ /* XXXLAS: Can this be moved up into cc_post_recovery? */
	+ if (IN_RECOVERY(tp->t_flags) &&
	+ SEQ_GEQ(th->th_ack, tp->snd_recover)) {
	+ EXIT_RECOVERY(tp->t_flags);
	+ }
	+ tp->snd_una = th->th_ack;
	+ if (tp->t_flags & TF_SACK_PERMIT) {
	+ if (SEQ_GT(tp->snd_una, tp->snd_recover))
	+ tp->snd_recover = tp->snd_una;
	+ }
	+ if (SEQ_LT(tp->snd_nxt, tp->snd_una))
	+ tp->snd_nxt = tp->snd_una;
	+
	+ switch (tp->t_state) {
	+
	+ /*
	+ * In FIN_WAIT_1 STATE in addition to the processing
	+ * for the ESTABLISHED state if our FIN is now acknowledged
	+ * then enter FIN_WAIT_2.
	+ */
	+ case TCPS_FIN_WAIT_1:
	+ if (ourfinisacked) {
	+ /*
	+ * If we can't receive any more
	+ * data, then closing user can proceed.
	+ * Starting the timer is contrary to the
	+ * specification, but if we don't get a FIN
	+ * we'll hang forever.
	+ *
	+ * XXXjl:
	+ * we should release the tp also, and use a
	+ * compressed state.
	+ */
	+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
	+ soisdisconnected(so);
	+ tcp_timer_activate(tp, TT_2MSL,
	+ (tcp_fast_finwait2_recycle ?
	+ tcp_finwait2_timeout :
	+ TP_MAXIDLE(tp)));
	+ }
	+ tcp_state_change(tp, TCPS_FIN_WAIT_2);
	+ }
	+ break;
	+
	+ /*
	+ * In CLOSING STATE in addition to the processing for
	+ * the ESTABLISHED state if the ACK acknowledges our FIN
	+ * then enter the TIME-WAIT state, otherwise ignore
	+ * the segment.
	+ */
	+ case TCPS_CLOSING:
	+ if (ourfinisacked) {
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ tcp_twstart(tp);
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ m_freem(m);
	+ return;
	+ }
	+ break;
	+
	+ /*
	+ * In LAST_ACK, we may still be waiting for data to drain
	+ * and/or to be acked, as well as for the ack of our FIN.
	+ * If our FIN is now acknowledged, delete the TCB,
	+ * enter the closed state and return.
	+ */
	+ case TCPS_LAST_ACK:
	+ if (ourfinisacked) {
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ tp = tcp_close(tp);
	+ goto drop;
	+ }
	+ break;
	+ }
	+ }
	+
	+step6:
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+
	+ /*
	+ * Update window information.
	+ * Don't look at window if no ACK: TAC's send garbage on first SYN.
	+ */
	+ if ((thflags & TH_ACK) &&
	+ (SEQ_LT(tp->snd_wl1, th->th_seq) \|\|
	+ (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) \|\|
	+ (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
	+ /* keep track of pure window updates */
	+ if (tlen == 0 &&
	+ tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
	+ TCPSTAT_INC(tcps_rcvwinupd);
	+ tp->snd_wnd = tiwin;
	+ tp->snd_wl1 = th->th_seq;
	+ tp->snd_wl2 = th->th_ack;
	+ if (tp->snd_wnd > tp->max_sndwnd)
	+ tp->max_sndwnd = tp->snd_wnd;
	+ needoutput = 1;
	+ }
	+
	+ /*
	+ * Process segments with URG.
	+ */
	+ if ((thflags & TH_URG) && th->th_urp &&
	+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
	+ /*
	+ * This is a kludge, but if we receive and accept
	+ * random urgent pointers, we'll crash in
	+ * soreceive. It's hard to imagine someone
	+ * actually wanting to send this much urgent data.
	+ */
	+ SOCKBUF_LOCK(&so->so_rcv);
	+ if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
	+ th->th_urp = 0; /* XXX */
	+ thflags &= ~TH_URG; /* XXX */
	+ SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */
	+ goto dodata; /* XXX */
	+ }
	+ /*
	+ * If this segment advances the known urgent pointer,
	+ * then mark the data stream. This should not happen
	+ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
	+ * a FIN has been received from the remote side.
	+ * In these states we ignore the URG.
	+ *
	+ * According to RFC961 (Assigned Protocols),
	+ * the urgent pointer points to the last octet
	+ * of urgent data. We continue, however,
	+ * to consider it to indicate the first octet
	+ * of data past the urgent section as the original
	+ * spec states (in one of two places).
	+ */
	+ if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
	+ tp->rcv_up = th->th_seq + th->th_urp;
	+ so->so_oobmark = sbavail(&so->so_rcv) +
	+ (tp->rcv_up - tp->rcv_nxt) - 1;
	+ if (so->so_oobmark == 0)
	+ so->so_rcv.sb_state \|= SBS_RCVATMARK;
	+ sohasoutofband(so);
	+ tp->t_oobflags &= ~(TCPOOB_HAVEDATA \| TCPOOB_HADDATA);
	+ }
	+ SOCKBUF_UNLOCK(&so->so_rcv);
	+ /*
	+ * Remove out of band data so doesn't get presented to user.
	+ * This can happen independent of advancing the URG pointer,
	+ * but if two URG's are pending at once, some out-of-band
	+ * data may creep in... ick.
	+ */
	+ if (th->th_urp <= (u_long)tlen &&
	+ !(so->so_options & SO_OOBINLINE)) {
	+ /* hdr drop is delayed */
	+ tcp_pulloutofband(so, th, m, drop_hdrlen);
	+ }
	+ } else {
	+ /*
	+ * If no out of band data is expected,
	+ * pull receive urgent pointer along
	+ * with the receive window.
	+ */
	+ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
	+ tp->rcv_up = tp->rcv_nxt;
	+ }
	+dodata: /* XXX */
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+
	+ /*
	+ * Process the segment text, merging it into the TCP sequencing queue,
	+ * and arranging for acknowledgment of receipt if necessary.
	+ * This process logically involves adjusting tp->rcv_wnd as data
	+ * is presented to the user (this happens in tcp_usrreq.c,
	+ * case PRU_RCVD). If a FIN has already been received on this
	+ * connection then we just ignore the text.
	+ */
	+ if ((tlen \|\| (thflags & TH_FIN)) &&
	+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
	+ tcp_seq save_start = th->th_seq;
	+ m_adj(m, drop_hdrlen); /* delayed header drop */
	+ /*
	+ * Insert segment which includes th into TCP reassembly queue
	+ * with control block tp. Set thflags to whether reassembly now
	+ * includes a segment with FIN. This handles the common case
	+ * inline (segment is the next to be received on an established
	+ * connection, and the queue is empty), avoiding linkage into
	+ * and removal from the queue and repetition of various
	+ * conversions.
	+ * Set DELACK for segments received in order, but ack
	+ * immediately when segments are out of order (so
	+ * fast retransmit can work).
	+ */
	+ if (th->th_seq == tp->rcv_nxt &&
	+ LIST_EMPTY(&tp->t_segq) &&
	+ TCPS_HAVEESTABLISHED(tp->t_state)) {
	+ if (DELAY_ACK(tp, tlen))
	+ tp->t_flags \|= TF_DELACK;
	+ else
	+ tp->t_flags \|= TF_ACKNOW;
	+ tp->rcv_nxt += tlen;
	+ thflags = th->th_flags & TH_FIN;
	+ TCPSTAT_INC(tcps_rcvpack);
	+ TCPSTAT_ADD(tcps_rcvbyte, tlen);
	+ SOCKBUF_LOCK(&so->so_rcv);
	+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
	+ m_freem(m);
	+ else
	+ sbappendstream_locked(&so->so_rcv, m, 0);
	+ /* NB: sorwakeup_locked() does an implicit unlock. */
	+ sorwakeup_locked(so);
	+ } else {
	+ /*
	+ * XXX: Due to the header drop above "th" is
	+ * theoretically invalid by now. Fortunately
	+ * m_adj() doesn't actually frees any mbufs
	+ * when trimming from the head.
	+ */
	+ thflags = tcp_reass(tp, th, &tlen, m);
	+ tp->t_flags \|= TF_ACKNOW;
	+ }
	+ if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
	+ tcp_update_sack_list(tp, save_start, save_start + tlen);
	+#if 0
	+ /*
	+ * Note the amount of data that peer has sent into
	+ * our window, in order to estimate the sender's
	+ * buffer size.
	+ * XXX: Unused.
	+ */
	+ if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
	+ len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
	+ else
	+ len = so->so_rcv.sb_hiwat;
	+#endif
	+ } else {
	+ m_freem(m);
	+ thflags &= ~TH_FIN;
	+ }
	+
	+ /*
	+ * If FIN is received ACK the FIN and let the user know
	+ * that the connection is closing.
	+ */
	+ if (thflags & TH_FIN) {
	+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
	+ socantrcvmore(so);
	+ /*
	+ * If connection is half-synchronized
	+ * (ie NEEDSYN flag on) then delay ACK,
	+ * so it may be piggybacked when SYN is sent.
	+ * Otherwise, since we received a FIN then no
	+ * more input can be expected, send ACK now.
	+ */
	+ if (tp->t_flags & TF_NEEDSYN)
	+ tp->t_flags \|= TF_DELACK;
	+ else
	+ tp->t_flags \|= TF_ACKNOW;
	+ tp->rcv_nxt++;
	+ }
	+ switch (tp->t_state) {
	+
	+ /*
	+ * In SYN_RECEIVED and ESTABLISHED STATES
	+ * enter the CLOSE_WAIT state.
	+ */
	+ case TCPS_SYN_RECEIVED:
	+ tp->t_starttime = ticks;
	+ /* FALLTHROUGH */
	+ case TCPS_ESTABLISHED:
	+ tcp_state_change(tp, TCPS_CLOSE_WAIT);
	+ break;
	+
	+ /*
	+ * If still in FIN_WAIT_1 STATE FIN has not been acked so
	+ * enter the CLOSING state.
	+ */
	+ case TCPS_FIN_WAIT_1:
	+ tcp_state_change(tp, TCPS_CLOSING);
	+ break;
	+
	+ /*
	+ * In FIN_WAIT_2 state enter the TIME_WAIT state,
	+ * starting the time-wait timer, turning off the other
	+ * standard timers.
	+ */
	+ case TCPS_FIN_WAIT_2:
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
	+ "TCP_FIN_WAIT_2 ti_locked: %d", __func__,
	+ ti_locked));
	+
	+ tcp_twstart(tp);
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ return;
	+ }
	+ }
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ }
	+ ti_locked = TI_UNLOCKED;
	+
	+#ifdef TCPDEBUG
	+ if (so->so_options & SO_DEBUG)
	+ tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
	+ &tcp_savetcp, 0);
	+#endif
	+ TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
	+
	+ /*
	+ * Return any desired output.
	+ */
	+ if (needoutput \|\| (tp->t_flags & TF_ACKNOW))
	+ (void) tp->t_fb->tcp_output(tp);
	+
	+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
	+ __func__, ti_locked));
	+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+
	+ if (tp->t_flags & TF_DELACK) {
	+ tp->t_flags &= ~TF_DELACK;
	+ tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
	+ }
	+ INP_WUNLOCK(tp->t_inpcb);
	+ return;
	+
	+dropafterack:
	+ /*
	+ * Generate an ACK dropping incoming segment if it occupies
	+ * sequence space, where the ACK reflects our state.
	+ *
	+ * We can now skip the test for the RST flag since all
	+ * paths to this code happen after packets containing
	+ * RST have been dropped.
	+ *
	+ * In the SYN-RECEIVED state, don't send an ACK unless the
	+ * segment we received passes the SYN-RECEIVED ACK test.
	+ * If it fails send a RST. This breaks the loop in the
	+ * "LAND" DoS attack, and also prevents an ACK storm
	+ * between two listening ports that have been sent forged
	+ * SYN segments, each with the source address of the other.
	+ */
	+ if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
	+ (SEQ_GT(tp->snd_una, th->th_ack) \|\|
	+ SEQ_GT(th->th_ack, tp->snd_max)) ) {
	+ rstreason = BANDLIM_RST_OPENPORT;
	+ goto dropwithreset;
	+ }
	+#ifdef TCPDEBUG
	+ if (so->so_options & SO_DEBUG)
	+ tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
	+ &tcp_savetcp, 0);
	+#endif
	+ TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ }
	+ ti_locked = TI_UNLOCKED;
	+
	+ tp->t_flags \|= TF_ACKNOW;
	+ (void) tp->t_fb->tcp_output(tp);
	+ INP_WUNLOCK(tp->t_inpcb);
	+ m_freem(m);
	+ return;
	+
	+dropwithreset:
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ }
	+ ti_locked = TI_UNLOCKED;
	+
	+ if (tp != NULL) {
	+ tcp_dropwithreset(m, th, tp, tlen, rstreason);
	+ INP_WUNLOCK(tp->t_inpcb);
	+ } else
	+ tcp_dropwithreset(m, th, NULL, tlen, rstreason);
	+ return;
	+
	+drop:
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ ti_locked = TI_UNLOCKED;
	+ }
	+#ifdef INVARIANTS
	+ else
	+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	+#endif
	+
	+ /*
	+ * Drop space held by incoming segment and return.
	+ */
	+#ifdef TCPDEBUG
	+ if (tp == NULL \|\| (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
	+ tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
	+ &tcp_savetcp, 0);
	+#endif
	+ TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
	+ if (tp != NULL)
	+ INP_WUNLOCK(tp->t_inpcb);
	+ m_freem(m);
	+}
	+
	+
	+/*
	+ * Do fast slow is a combination of the original
	+ * tcp_dosegment and a split fastpath, one function
	+ * for the fast-ack which also includes allowing fastpath
	+ * for window advanced in sequence acks. And also a
	+ * sub-function that handles the insequence data.
	+ */
	+void
	+tcp_do_segment_fastslow(struct mbuf m, struct tcphdr th, struct socket *so,
	+ struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
	+ int ti_locked)
	+{
	+ int thflags;
	+ u_long tiwin;
	+ char *s;
	+ int can_enter;
	+ struct in_conninfo *inc;
	+ struct tcpopt to;
	+
	+#ifdef TCPDEBUG
	+ /*
	+ * The size of tcp_saveipgen must be the size of the max ip header,
	+ * now IPv6.
	+ */
	+ u_char tcp_saveipgen[IP6_HDR_LEN];
	+ struct tcphdr tcp_savetcp;
	+ short ostate = 0;
	+#endif
	+ thflags = th->th_flags;
	+ tp->sackhint.last_sack_ack = 0;
	+ inc = &tp->t_inpcb->inp_inc;
	+ /*
	+ * If this is either a state-changing packet or current state isn't
	+ * established, we require a write lock on tcbinfo. Otherwise, we
	+ * allow the tcbinfo to be in either alocked or unlocked, as the
	+ * caller may have unnecessarily acquired a write lock due to a race.
	+ */
	+ if ((thflags & (TH_SYN \| TH_FIN \| TH_RST)) != 0 \|\|
	+ tp->t_state != TCPS_ESTABLISHED) {
	+ KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
	+ "SYN/FIN/RST/!EST", __func__, ti_locked));
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ } else {
	+#ifdef INVARIANTS
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ } else {
	+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
	+ "ti_locked: %d", __func__, ti_locked));
	+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	+ }
	+#endif
	+ }
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+ KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
	+ __func__));
	+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
	+ __func__));
	+
	+ /*
	+ * Segment received on connection.
	+ * Reset idle time and keep-alive timer.
	+ * XXX: This should be done after segment
	+ * validation to ignore broken/spoofed segs.
	+ */
	+ tp->t_rcvtime = ticks;
	+ if (TCPS_HAVEESTABLISHED(tp->t_state))
	+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
	+
	+ /*
	+ * Unscale the window into a 32-bit value.
	+ * For the SYN_SENT state the scale is zero.
	+ */
	+ tiwin = th->th_win << tp->snd_scale;
	+
	+ /*
	+ * TCP ECN processing.
	+ */
	+ if (tp->t_flags & TF_ECN_PERMIT) {
	+ if (thflags & TH_CWR)
	+ tp->t_flags &= ~TF_ECN_SND_ECE;
	+ switch (iptos & IPTOS_ECN_MASK) {
	+ case IPTOS_ECN_CE:
	+ tp->t_flags \|= TF_ECN_SND_ECE;
	+ TCPSTAT_INC(tcps_ecn_ce);
	+ break;
	+ case IPTOS_ECN_ECT0:
	+ TCPSTAT_INC(tcps_ecn_ect0);
	+ break;
	+ case IPTOS_ECN_ECT1:
	+ TCPSTAT_INC(tcps_ecn_ect1);
	+ break;
	+ }
	+ /* Congestion experienced. */
	+ if (thflags & TH_ECE) {
	+ cc_cong_signal(tp, th, CC_ECN);
	+ }
	+ }
	+
	+ /*
	+ * Parse options on any incoming segment.
	+ */
	+ tcp_dooptions(&to, (u_char *)(th + 1),
	+ (th->th_off << 2) - sizeof(struct tcphdr),
	+ (thflags & TH_SYN) ? TO_SYN : 0);
	+
	+ /*
	+ * If echoed timestamp is later than the current time,
	+ * fall back to non RFC1323 RTT calculation. Normalize
	+ * timestamp if syncookies were used when this connection
	+ * was established.
	+ */
	+ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
	+ to.to_tsecr -= tp->ts_offset;
	+ if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
	+ to.to_tsecr = 0;
	+ }
	+ /*
	+ * If timestamps were negotiated during SYN/ACK they should
	+ * appear on every segment during this session and vice versa.
	+ */
	+ if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
	+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
	+ log(LOG_DEBUG, "%s; %s: Timestamp missing, "
	+ "no action\n", s, __func__);
	+ free(s, M_TCPLOG);
	+ }
	+ }
	+ if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
	+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
	+ log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
	+ "no action\n", s, __func__);
	+ free(s, M_TCPLOG);
	+ }
	+ }
	+
	+ /*
	+ * Process options only when we get SYN/ACK back. The SYN case
	+ * for incoming connections is handled in tcp_syncache.
	+ * According to RFC1323 the window field in a SYN (i.e., a <SYN>
	+ * or <SYN,ACK>) segment itself is never scaled.
	+ * XXX this is traditional behavior, may need to be cleaned up.
	+ */
	+ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
	+ if ((to.to_flags & TOF_SCALE) &&
	+ (tp->t_flags & TF_REQ_SCALE)) {
	+ tp->t_flags \|= TF_RCVD_SCALE;
	+ tp->snd_scale = to.to_wscale;
	+ }
	+ /*
	+ * Initial send window. It will be updated with
	+ * the next incoming segment to the scaled value.
	+ */
	+ tp->snd_wnd = th->th_win;
	+ if (to.to_flags & TOF_TS) {
	+ tp->t_flags \|= TF_RCVD_TSTMP;
	+ tp->ts_recent = to.to_tsval;
	+ tp->ts_recent_age = tcp_ts_getticks();
	+ }
	+ if (to.to_flags & TOF_MSS)
	+ tcp_mss(tp, to.to_mss);
	+ if ((tp->t_flags & TF_SACK_PERMIT) &&
	+ (to.to_flags & TOF_SACKPERM) == 0)
	+ tp->t_flags &= ~TF_SACK_PERMIT;
	+ }
	+ can_enter = 0;
	+ if (__predict_true((tlen == 0))) {
	+ /*
	+ * The ack moved forward and we have a window (non-zero)
	+ * <or>
	+ * The ack did not move forward, but the window increased.
	+ */
	+ if (__predict_true((SEQ_GT(th->th_ack, tp->snd_una) && tiwin) \|\|
	+ ((th->th_ack == tp->snd_una) && tiwin && (tiwin > tp->snd_wnd)))) {
	+ can_enter = 1;
	+ }
	+ } else {
	+ /*
	+ * Data incoming, use the old entry criteria
	+ * for fast-path with data.
	+ */
	+ if ((tiwin && tiwin == tp->snd_wnd)) {
	+ can_enter = 1;
	+ }
	+ }
	+ /*
	+ * Header prediction: check for the two common cases
	+ * of a uni-directional data xfer. If the packet has
	+ * no control flags, is in-sequence, the window didn't
	+ * change and we're not retransmitting, it's a
	+ * candidate. If the length is zero and the ack moved
	+ * forward, we're the sender side of the xfer. Just
	+ * free the data acked & wake any higher level process
	+ * that was blocked waiting for space. If the length
	+ * is non-zero and the ack didn't move, we're the
	+ * receiver side. If we're getting packets in-order
	+ * (the reassembly queue is empty), add the data to
	+ * the socket buffer and note that we need a delayed ack.
	+ * Make sure that the hidden state-flags are also off.
	+ * Since we check for TCPS_ESTABLISHED first, it can only
	+ * be TH_NEEDSYN.
	+ */
	+ if (__predict_true(tp->t_state == TCPS_ESTABLISHED &&
	+ th->th_seq == tp->rcv_nxt &&
	+ (thflags & (TH_SYN\|TH_FIN\|TH_RST\|TH_URG\|TH_ACK)) == TH_ACK &&
	+ tp->snd_nxt == tp->snd_max &&
	+ can_enter &&
	+ ((tp->t_flags & (TF_NEEDSYN\|TF_NEEDFIN)) == 0) &&
	+ LIST_EMPTY(&tp->t_segq) &&
	+ ((to.to_flags & TOF_TS) == 0 \|\|
	+ TSTMP_GEQ(to.to_tsval, tp->ts_recent)))) {
	+ if (__predict_true((tlen == 0) &&
	+ (SEQ_LEQ(th->th_ack, tp->snd_max) &&
	+ !IN_RECOVERY(tp->t_flags) &&
	+ (to.to_flags & TOF_SACK) == 0 &&
	+ TAILQ_EMPTY(&tp->snd_holes)))) {
	+ /* We are done */
	+ tcp_do_fastack(m, th, so, tp, &to, drop_hdrlen, tlen,
	+ ti_locked, tiwin);
	+ return;
	+ } else if ((tlen) &&
	+ (th->th_ack == tp->snd_una &&
	+ tlen <= sbspace(&so->so_rcv))) {
	+ tcp_do_fastnewdata(m, th, so, tp, &to, drop_hdrlen, tlen,
	+ ti_locked, tiwin);
	+ /* We are done */
	+ return;
	+ }
	+ }
	+ tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen,
	+ ti_locked, tiwin, thflags);
	+}
	+
	+
	+/*
	+ * This subfunction is used to try to highly optimize the
	+ * fast path. We again allow window updates that are
	+ * in sequence to remain in the fast-path. We also add
	+ * in the __predict's to attempt to help the compiler.
	+ * Note that if we return a 0, then we can not process
	+ * it and the caller should push the packet into the
	+ * slow-path.
	+ */
	+static int
	+tcp_fastack(struct mbuf m, struct tcphdr th, struct socket *so,
	+ struct tcpcb tp, struct tcpopt to, int drop_hdrlen, int tlen,
	+ int ti_locked, u_long tiwin)
	+{
	+ int acked;
	+ int winup_only=0;
	+
	+ if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) {
	+ /* Old ack, behind (or duplicate to) the last one rcv'd */
	+ return (0);
	+ }
	+ if (__predict_false(th->th_ack == tp->snd_una) &&
	+ __predict_false(tiwin <= tp->snd_wnd)) {
	+ /* duplicate ack <or> a shrinking dup ack with shrinking window */
	+ return (0);
	+ }
	+ if (__predict_false(tiwin == 0)) {
	+ /* zero window */
	+ return (0);
	+ }
	+ if (__predict_false(SEQ_GT(th->th_ack, tp->snd_max))) {
	+ /* Above what we have sent? */
	+ return (0);
	+ }
	+ if (__predict_false(tp->snd_nxt != tp->snd_max)) {
	+ /* We are retransmitting */
	+ return (0);
	+ }
	+ if (__predict_false(tp->t_flags & (TF_NEEDSYN\|TF_NEEDFIN))) {
	+ /* We need a SYN or a FIN, unlikely.. */
	+ return (0);
	+ }
	+ if((to->to_flags & TOF_TS) && __predict_false(TSTMP_LT(to->to_tsval, tp->ts_recent))) {
	+ /* Timestamp is behind .. old ack with seq wrap? */
	+ return (0);
	+ }
	+ if (__predict_false(IN_RECOVERY(tp->t_flags))) {
	+ /* Still recovering */
	+ return (0);
	+ }
	+ if (__predict_false(to->to_flags & TOF_SACK)) {
	+ /* Sack included in the ack.. */
	+ return (0);
	+ }
	+ if (!TAILQ_EMPTY(&tp->snd_holes)) {
	+ /* We have sack holes on our scoreboard */
	+ return (0);
	+ }
	+ /* Ok if we reach here, we can process a fast-ack */
	+
	+ /* Did the window get updated? */
	+ if (tiwin != tp->snd_wnd) {
	+ /* keep track of pure window updates */
	+ if (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) {
	+ winup_only = 1;
	+ TCPSTAT_INC(tcps_rcvwinupd);
	+ }
	+ tp->snd_wnd = tiwin;
	+ tp->snd_wl1 = th->th_seq;
	+ if (tp->snd_wnd > tp->max_sndwnd)
	+ tp->max_sndwnd = tp->snd_wnd;
	+ }
	+ /*
	+ * Pull snd_wl2 up to prevent seq wrap relative
	+ * to th_ack.
	+ */
	+ tp->snd_wl2 = th->th_ack;
	+ /*
	+ * If last ACK falls within this segment's sequence numbers,
	+ * record the timestamp.
	+ * NOTE that the test is modified according to the latest
	+ * proposal of the tcplw@cray.com list (Braden 1993/04/26).
	+ */
	+ if ((to->to_flags & TOF_TS) != 0 &&
	+ SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
	+ tp->ts_recent_age = tcp_ts_getticks();
	+ tp->ts_recent = to->to_tsval;
	+ }
	+ /*
	+ * This is a pure ack for outstanding data.
	+ */
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RUNLOCK(&V_tcbinfo);
	+ }
	+ ti_locked = TI_UNLOCKED;
	+
	+ TCPSTAT_INC(tcps_predack);
	+
	+ /*
	+ * "bad retransmit" recovery.
	+ */
	+ if (tp->t_rxtshift == 1 &&
	+ tp->t_flags & TF_PREVVALID &&
	+ (int)(ticks - tp->t_badrxtwin) < 0) {
	+ cc_cong_signal(tp, th, CC_RTO_ERR);
	+ }
	+
	+ /*
	+ * Recalculate the transmit timer / rtt.
	+ *
	+ * Some boxes send broken timestamp replies
	+ * during the SYN+ACK phase, ignore
	+ * timestamps of 0 or we could calculate a
	+ * huge RTT and blow up the retransmit timer.
	+ */
	+ if ((to->to_flags & TOF_TS) != 0 &&
	+ to->to_tsecr) {
	+ u_int t;
	+
	+ t = tcp_ts_getticks() - to->to_tsecr;
	+ if (!tp->t_rttlow \|\| tp->t_rttlow > t)
	+ tp->t_rttlow = t;
	+ tcp_xmit_timer(tp,
	+ TCP_TS_TO_TICKS(t) + 1);
	+ } else if (tp->t_rtttime &&
	+ SEQ_GT(th->th_ack, tp->t_rtseq)) {
	+ if (!tp->t_rttlow \|\|
	+ tp->t_rttlow > ticks - tp->t_rtttime)
	+ tp->t_rttlow = ticks - tp->t_rtttime;
	+ tcp_xmit_timer(tp,
	+ ticks - tp->t_rtttime);
	+ }
	+ if (winup_only == 0) {
	+ acked = BYTES_THIS_ACK(tp, th);
	+
	+ /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
	+ hhook_run_tcp_est_in(tp, th, to);
	+
	+ TCPSTAT_ADD(tcps_rcvackbyte, acked);
	+ sbdrop(&so->so_snd, acked);
	+ if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
	+ SEQ_LEQ(th->th_ack, tp->snd_recover))
	+ tp->snd_recover = th->th_ack - 1;
	+
	+ /*
	+ * Let the congestion control algorithm update
	+ * congestion control related information. This
	+ * typically means increasing the congestion
	+ * window.
	+ */
	+ cc_ack_received(tp, th, CC_ACK);
	+
	+ tp->snd_una = th->th_ack;
	+ tp->t_dupacks = 0;
	+ m_freem(m);
	+
	+ /*
	+ * If all outstanding data are acked, stop
	+ * retransmit timer, otherwise restart timer
	+ * using current (possibly backed-off) value.
	+ * If process is waiting for space,
	+ * wakeup/selwakeup/signal. If data
	+ * are ready to send, let tcp_output
	+ * decide between more output or persist.
	+ */
	+#ifdef TCPDEBUG
	+ if (so->so_options & SO_DEBUG)
	+ tcp_trace(TA_INPUT, ostate, tp,
	+ (void *)tcp_saveipgen,
	+ &tcp_savetcp, 0);
	+#endif
	+ if (tp->snd_una == tp->snd_max)
	+ tcp_timer_activate(tp, TT_REXMT, 0);
	+ else if (!tcp_timer_active(tp, TT_PERSIST))
	+ tcp_timer_activate(tp, TT_REXMT,
	+ tp->t_rxtcur);
	+ /* Wake up the socket if we have room to write more */
	+ sowwakeup(so);
	+ } else {
	+ /*
	+ * Window update only, just free the mbufs and
	+ * send out whatever we can.
	+ */
	+ m_freem(m);
	+ }
	+ if (sbavail(&so->so_snd))
	+ (void) tcp_output(tp);
	+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
	+ __func__, ti_locked));
	+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+
	+ if (tp->t_flags & TF_DELACK) {
	+ tp->t_flags &= ~TF_DELACK;
	+ tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
	+ }
	+ INP_WUNLOCK(tp->t_inpcb);
	+ return (1);
	+}
	+
	+/*
	+ * This tcp-do-segment concentrates on making the fastest
	+ * ack processing path. It does not have a fast-path for
	+ * data (it possibly could which would then eliminate the
	+ * need for fast-slow above). For a content distributor having
	+ * large outgoing elephants and very very little coming in
	+ * having no fastpath for data does not really help (since you
	+ * don't get much data in). The most important thing is
	+ * processing ack's quickly and getting the rest of the data
	+ * output to the peer as quickly as possible. This routine
	+ * seems to be about an overall 3% faster then the old
	+ * tcp_do_segment and keeps us in the fast-path for packets
	+ * much more (by allowing window updates to also stay in the fastpath).
	+ */
	+void
	+tcp_do_segment_fastack(struct mbuf m, struct tcphdr th, struct socket *so,
	+ struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
	+ int ti_locked)
	+{
	+ int thflags;
	+ u_long tiwin;
	+ char *s;
	+ struct in_conninfo *inc;
	+ struct tcpopt to;
	+
	+#ifdef TCPDEBUG
	+ /*
	+ * The size of tcp_saveipgen must be the size of the max ip header,
	+ * now IPv6.
	+ */
	+ u_char tcp_saveipgen[IP6_HDR_LEN];
	+ struct tcphdr tcp_savetcp;
	+ short ostate = 0;
	+#endif
	+ thflags = th->th_flags;
	+ tp->sackhint.last_sack_ack = 0;
	+ inc = &tp->t_inpcb->inp_inc;
	+ /*
	+ * If this is either a state-changing packet or current state isn't
	+ * established, we require a write lock on tcbinfo. Otherwise, we
	+ * allow the tcbinfo to be in either alocked or unlocked, as the
	+ * caller may have unnecessarily acquired a write lock due to a race.
	+ */
	+ if ((thflags & (TH_SYN \| TH_FIN \| TH_RST)) != 0 \|\|
	+ tp->t_state != TCPS_ESTABLISHED) {
	+ KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
	+ "SYN/FIN/RST/!EST", __func__, ti_locked));
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ } else {
	+#ifdef INVARIANTS
	+ if (ti_locked == TI_RLOCKED) {
	+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
	+ } else {
	+ KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
	+ "ti_locked: %d", __func__, ti_locked));
	+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	+ }
	+#endif
	+ }
	+ INP_WLOCK_ASSERT(tp->t_inpcb);
	+ KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
	+ __func__));
	+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
	+ __func__));
	+
	+ /*
	+ * Segment received on connection.
	+ * Reset idle time and keep-alive timer.
	+ * XXX: This should be done after segment
	+ * validation to ignore broken/spoofed segs.
	+ */
	+ tp->t_rcvtime = ticks;
	+ if (TCPS_HAVEESTABLISHED(tp->t_state))
	+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
	+
	+ /*
	+ * Unscale the window into a 32-bit value.
	+ * For the SYN_SENT state the scale is zero.
	+ */
	+ tiwin = th->th_win << tp->snd_scale;
	+
	+ /*
	+ * TCP ECN processing.
	+ */
	+ if (tp->t_flags & TF_ECN_PERMIT) {
	+ if (thflags & TH_CWR)
	+ tp->t_flags &= ~TF_ECN_SND_ECE;
	+ switch (iptos & IPTOS_ECN_MASK) {
	+ case IPTOS_ECN_CE:
	+ tp->t_flags \|= TF_ECN_SND_ECE;
	+ TCPSTAT_INC(tcps_ecn_ce);
	+ break;
	+ case IPTOS_ECN_ECT0:
	+ TCPSTAT_INC(tcps_ecn_ect0);
	+ break;
	+ case IPTOS_ECN_ECT1:
	+ TCPSTAT_INC(tcps_ecn_ect1);
	+ break;
	+ }
	+ /* Congestion experienced. */
	+ if (thflags & TH_ECE) {
	+ cc_cong_signal(tp, th, CC_ECN);
	+ }
	+ }
	+
	+ /*
	+ * Parse options on any incoming segment.
	+ */
	+ tcp_dooptions(&to, (u_char *)(th + 1),
	+ (th->th_off << 2) - sizeof(struct tcphdr),
	+ (thflags & TH_SYN) ? TO_SYN : 0);
	+
	+ /*
	+ * If echoed timestamp is later than the current time,
	+ * fall back to non RFC1323 RTT calculation. Normalize
	+ * timestamp if syncookies were used when this connection
	+ * was established.
	+ */
	+ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
	+ to.to_tsecr -= tp->ts_offset;
	+ if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
	+ to.to_tsecr = 0;
	+ }
	+ /*
	+ * If timestamps were negotiated during SYN/ACK they should
	+ * appear on every segment during this session and vice versa.
	+ */
	+ if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
	+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
	+ log(LOG_DEBUG, "%s; %s: Timestamp missing, "
	+ "no action\n", s, __func__);
	+ free(s, M_TCPLOG);
	+ }
	+ }
	+ if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
	+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
	+ log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
	+ "no action\n", s, __func__);
	+ free(s, M_TCPLOG);
	+ }
	+ }
	+
	+ /*
	+ * Process options only when we get SYN/ACK back. The SYN case
	+ * for incoming connections is handled in tcp_syncache.
	+ * According to RFC1323 the window field in a SYN (i.e., a <SYN>
	+ * or <SYN,ACK>) segment itself is never scaled.
	+ * XXX this is traditional behavior, may need to be cleaned up.
	+ */
	+ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
	+ if ((to.to_flags & TOF_SCALE) &&
	+ (tp->t_flags & TF_REQ_SCALE)) {
	+ tp->t_flags \|= TF_RCVD_SCALE;
	+ tp->snd_scale = to.to_wscale;
	+ }
	+ /*
	+ * Initial send window. It will be updated with
	+ * the next incoming segment to the scaled value.
	+ */
	+ tp->snd_wnd = th->th_win;
	+ if (to.to_flags & TOF_TS) {
	+ tp->t_flags \|= TF_RCVD_TSTMP;
	+ tp->ts_recent = to.to_tsval;
	+ tp->ts_recent_age = tcp_ts_getticks();
	+ }
	+ if (to.to_flags & TOF_MSS)
	+ tcp_mss(tp, to.to_mss);
	+ if ((tp->t_flags & TF_SACK_PERMIT) &&
	+ (to.to_flags & TOF_SACKPERM) == 0)
	+ tp->t_flags &= ~TF_SACK_PERMIT;
	+ }
	+ /*
	+ * Header prediction: check for the two common cases
	+ * of a uni-directional data xfer. If the packet has
	+ * no control flags, is in-sequence, the window didn't
	+ * change and we're not retransmitting, it's a
	+ * candidate. If the length is zero and the ack moved
	+ * forward, we're the sender side of the xfer. Just
	+ * free the data acked & wake any higher level process
	+ * that was blocked waiting for space. If the length
	+ * is non-zero and the ack didn't move, we're the
	+ * receiver side. If we're getting packets in-order
	+ * (the reassembly queue is empty), add the data to
	+ * the socket buffer and note that we need a delayed ack.
	+ * Make sure that the hidden state-flags are also off.
	+ * Since we check for TCPS_ESTABLISHED first, it can only
	+ * be TH_NEEDSYN.
	+ */
	+ if (__predict_true(tp->t_state == TCPS_ESTABLISHED) &&
	+ __predict_true(((to.to_flags & TOF_SACK) == 0)) &&
	+ __predict_true(tlen == 0) &&
	+ __predict_true((thflags & (TH_SYN\|TH_FIN\|TH_RST\|TH_URG\|TH_ACK)) == TH_ACK) &&
	+ __predict_true(LIST_EMPTY(&tp->t_segq)) &&
	+ __predict_true(th->th_seq == tp->rcv_nxt)) {
	+ if (tcp_fastack(m, th, so, tp, &to, drop_hdrlen, tlen,
	+ ti_locked, tiwin)) {
	+ return;
	+ }
	+ }
	+ tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen,
	+ ti_locked, tiwin, thflags);
	+}
	+
	+struct tcp_function_block __tcp_fastslow = {
	+ "fastslow",
	+ tcp_output,
	+ tcp_do_segment_fastslow,
	+ tcp_default_ctloutput,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ 0,
	+ 0
	+
	+};
	+
	+struct tcp_function_block __tcp_fastack = {
	+ "fastack",
	+ tcp_output,
	+ tcp_do_segment_fastack,
	+ tcp_default_ctloutput,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ 0,
	+ 0
	+};
	+
	+static int
	+tcp_addfastpaths(module_t mod, int type, void *data)
	+{
	+ int err=0;
	+
	+ switch (type) {
	+ case MOD_LOAD:
	+ err = register_tcp_functions(&__tcp_fastack, M_WAITOK);
	+ if (err) {
	+ printf("Failed to register fastack module -- err:%d\n", err);
	+ return(err);
	+ }
	+ err = register_tcp_functions(&__tcp_fastslow, M_WAITOK);
	+ if (err) {
	+ printf("Failed to register fastslow module -- err:%d\n", err);
	+ deregister_tcp_functions(&__tcp_fastack);
	+ return(err);
	+ }
	+ break;
	+ case MOD_QUIESCE:
	+ if ((__tcp_fastslow.refcnt) \|\|( __tcp_fastack.refcnt)) {
	+ return(EBUSY);
	+ }
	+ break;
	+ case MOD_UNLOAD:
	+ err = deregister_tcp_functions(&__tcp_fastack);
	+ if (err == EBUSY)
	+ break;
	+ err = deregister_tcp_functions(&__tcp_fastslow);
	+ if (err == EBUSY)
	+ break;
	+ err = 0;
	+ break;
	+ default:
	+ return (EOPNOTSUPP);
	+ }
	+ return (err);
	+}
	+
	+static moduledata_t new_tcp_fastpaths = {
	+ .name = "tcp_fastpaths",
	+ .evhand = tcp_addfastpaths,
	+ .priv = 0
	+};
	+
	+MODULE_VERSION(kern_tcpfastpaths, 1);
	+DECLARE_MODULE(kern_tcpfastpaths, new_tcp_fastpaths, SI_SUB_PSEUDO, SI_ORDER_ANY);
	Index: netinet/tcp.h
	===================================================================
	--- netinet/tcp.h
	+++ netinet/tcp.h
	@@ -167,7 +167,7 @@
	#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
	#define TCP_PCAP_OUT 2048 /* number of output packets to keep */
	#define TCP_PCAP_IN 4096 /* number of input packets to keep */
	-
	+#define TCP_FUNCTION_BLK 8192 /* Set the tcp function pointers to the specified stack */
	/* Start of reserved space for third-party user-settable options. */
	#define TCP_VENDOR SO_VENDOR

	@@ -245,5 +245,11 @@
	u_int32_t __tcpi_pad[26]; /* Padding. */
	};
	#endif
	+#define TCP_FUNCTION_NAME_LEN_MAX 32

	+struct tcp_function_set {
	+ char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
	+ uint32_t pcbcnt;
	+};
	+
	#endif /* !_NETINET_TCP_H_ */
	Index: netinet/tcp_input.c
	===================================================================
	--- netinet/tcp_input.c
	+++ netinet/tcp_input.c
	@@ -230,23 +230,6 @@
	#define tcb6 tcb /* for KAME src sync over BSD's /
	VNET_DEFINE(struct inpcbinfo, tcbinfo);

	-static void tcp_dooptions(struct tcpopt , u_char , int, int);
	-static void tcp_do_segment(struct mbuf , struct tcphdr ,
	- struct socket , struct tcpcb , int, int, uint8_t,
	- int);
	-static void tcp_dropwithreset(struct mbuf , struct tcphdr ,
	- struct tcpcb *, int, int);
	-static void tcp_pulloutofband(struct socket *,
	- struct tcphdr , struct mbuf , int);
	-static void tcp_xmit_timer(struct tcpcb *, int);
	-static void tcp_newreno_partial_ack(struct tcpcb , struct tcphdr );
	-static void inline cc_ack_received(struct tcpcb tp, struct tcphdr th,
	- uint16_t type);
	-static void inline cc_conn_init(struct tcpcb *tp);
	-static void inline cc_post_recovery(struct tcpcb tp, struct tcphdr th);
	-static void inline hhook_run_tcp_est_in(struct tcpcb *tp,
	- struct tcphdr th, struct tcpopt to);
	-
	/*
	* TCP statistics are stored in an "array" of counter(9)s.
	*/
	@@ -272,7 +255,7 @@
	/*
	* Wrapper for the TCP established input helper hook.
	*/
	-static void inline
	+void
	hhook_run_tcp_est_in(struct tcpcb tp, struct tcphdr th, struct tcpopt *to)
	{
	struct tcp_hhook_data hhook_data;
	@@ -290,7 +273,7 @@
	/*
	* CC wrapper hook functions
	*/
	-static void inline
	+void
	cc_ack_received(struct tcpcb tp, struct tcphdr th, uint16_t type)
	{
	INP_WLOCK_ASSERT(tp->t_inpcb);
	@@ -322,7 +305,7 @@
	}
	}

	-static void inline
	+void
	cc_conn_init(struct tcpcb *tp)
	{
	struct hc_metrics_lite metrics;
	@@ -446,7 +429,7 @@
	}
	}

	-static void inline
	+void inline
	cc_post_recovery(struct tcpcb tp, struct tcphdr th)
	{
	INP_WLOCK_ASSERT(tp->t_inpcb);
	@@ -601,9 +584,6 @@
	struct tcpopt to; /* options in this segment */
	char s = NULL; / address and port logging */
	int ti_locked;
	-#define TI_UNLOCKED 1
	-#define TI_RLOCKED 2
	-
	#ifdef TCPDEBUG
	/*
	* The size of tcp_saveipgen must be the size of the max ip header,
	@@ -1175,7 +1155,7 @@
	* contains. tcp_do_segment() consumes
	* the mbuf chain and unlocks the inpcb.
	*/
	- tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
	+ tp->t_fb->tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
	iptos, ti_locked);
	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	return (IPPROTO_DONE);
	@@ -1421,7 +1401,7 @@
	* state. tcp_do_segment() always consumes the mbuf chain, unlocks
	* the inpcb, and unlocks pcbinfo.
	*/
	- tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
	+ tp->t_fb->tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
	return (IPPROTO_DONE);

	@@ -1476,7 +1456,7 @@
	return (IPPROTO_DONE);
	}

	-static void
	+void
	tcp_do_segment(struct mbuf m, struct tcphdr th, struct socket *so,
	struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
	int ti_locked)
	@@ -1787,7 +1767,7 @@
	tp->t_rxtcur);
	sowwakeup(so);
	if (sbavail(&so->so_snd))
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	goto check_delack;
	}
	} else if (th->th_ack == tp->snd_una &&
	@@ -1906,7 +1886,7 @@
	tp->t_flags \|= TF_DELACK;
	} else {
	tp->t_flags \|= TF_ACKNOW;
	- tcp_output(tp);
	+ tp->t_fb->tcp_output(tp);
	}
	goto check_delack;
	}
	@@ -2507,7 +2487,7 @@
	}
	} else
	tp->snd_cwnd += tp->t_maxseg;
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	goto drop;
	} else if (tp->t_dupacks == tcprexmtthresh) {
	tcp_seq onxt = tp->snd_nxt;
	@@ -2541,12 +2521,12 @@
	tcps_sack_recovery_episode);
	tp->sack_newdata = tp->snd_nxt;
	tp->snd_cwnd = tp->t_maxseg;
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	goto drop;
	}
	tp->snd_nxt = th->th_ack;
	tp->snd_cwnd = tp->t_maxseg;
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	KASSERT(tp->snd_limited <= 2,
	("%s: tp->snd_limited too big",
	__func__));
	@@ -2593,7 +2573,7 @@
	(tp->snd_nxt - tp->snd_una);
	SOCKBUF_UNLOCK(&so->so_snd);
	if (avail > 0)
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	sent = tp->snd_max - oldsndmax;
	if (sent > tp->t_maxseg) {
	KASSERT((tp->t_dupacks == 2 &&
	@@ -3049,7 +3029,7 @@
	* Return any desired output.
	*/
	if (needoutput \|\| (tp->t_flags & TF_ACKNOW))
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);

	check_delack:
	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
	@@ -3097,7 +3077,7 @@
	ti_locked = TI_UNLOCKED;

	tp->t_flags \|= TF_ACKNOW;
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	INP_WUNLOCK(tp->t_inpcb);
	m_freem(m);
	return;
	@@ -3143,7 +3123,7 @@
	* The mbuf must still include the original packet header.
	* tp may be NULL.
	*/
	-static void
	+void
	tcp_dropwithreset(struct mbuf m, struct tcphdr th, struct tcpcb *tp,
	int tlen, int rstreason)
	{
	@@ -3206,7 +3186,7 @@
	/*
	* Parse TCP options and place in tcpopt.
	*/
	-static void
	+void
	tcp_dooptions(struct tcpopt to, u_char cp, int cnt, int flags)
	{
	int opt, optlen;
	@@ -3300,7 +3280,7 @@
	* It is still reflected in the segment length for
	* sequencing purposes.
	*/
	-static void
	+void
	tcp_pulloutofband(struct socket so, struct tcphdr th, struct mbuf *m,
	int off)
	{
	@@ -3333,7 +3313,7 @@
	* Collect new round-trip time estimate
	* and update averages and current timeout.
	*/
	-static void
	+void
	tcp_xmit_timer(struct tcpcb *tp, int rtt)
	{
	int delta;
	@@ -3713,7 +3693,7 @@
	* By setting snd_nxt to ti_ack, this forces retransmission timer to
	* be started again.
	*/
	-static void
	+void
	tcp_newreno_partial_ack(struct tcpcb tp, struct tcphdr th)
	{
	tcp_seq onxt = tp->snd_nxt;
	@@ -3730,7 +3710,7 @@
	*/
	tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th);
	tp->t_flags \|= TF_ACKNOW;
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	tp->snd_cwnd = ocwnd;
	if (SEQ_GT(onxt, tp->snd_nxt))
	tp->snd_nxt = onxt;
	Index: netinet/tcp_sack.c
	===================================================================
	--- netinet/tcp_sack.c
	+++ netinet/tcp_sack.c
	@@ -589,7 +589,7 @@
	if (tp->snd_cwnd > tp->snd_ssthresh)
	tp->snd_cwnd = tp->snd_ssthresh;
	tp->t_flags \|= TF_ACKNOW;
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	}

	#if 0
	Index: netinet/tcp_subr.c
	===================================================================
	--- netinet/tcp_subr.c
	+++ netinet/tcp_subr.c
	@@ -47,6 +47,7 @@
	#include <sys/sysctl.h>
	#include <sys/jail.h>
	#include <sys/malloc.h>
	+#include <sys/refcount.h>
	#include <sys/mbuf.h>
	#ifdef INET6
	#include <sys/domain.h>
	@@ -125,6 +126,8 @@
	VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
	#endif

	+struct rwlock tcp_function_lock;
	+
	static int
	sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
	{
	@@ -236,6 +239,222 @@
	void ip4hdr, const void ip6hdr);
	static void tcp_timer_discard(struct tcpcb *, uint32_t);

	+
	+static struct tcp_function_block tcp_def_funcblk = {
	+ "default",
	+ tcp_output,
	+ tcp_do_segment,
	+ tcp_default_ctloutput,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ NULL,
	+ 0,
	+ 0
	+};
	+
	+struct tcp_funchead t_functions;
	+static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
	+
	+static struct tcp_function_block *
	+find_tcp_functions_locked(struct tcp_function_set *fs)
	+{
	+ struct tcp_function *f;
	+ struct tcp_function_block *blk=NULL;
	+
	+ TAILQ_FOREACH(f, &t_functions, next) {
	+ if (strcmp(f->fb->tcp_block_name, fs->function_set_name) == 0) {
	+ blk = f->fb;
	+ break;
	+ }
	+ }
	+ return(blk);
	+}
	+
	+static struct tcp_function_block *
	+find_tcp_fb_locked(struct tcp_function_block blk, struct tcp_function *s)
	+{
	+ struct tcp_function_block *rblk=NULL;
	+ struct tcp_function *f;
	+
	+ TAILQ_FOREACH(f, &t_functions, next) {
	+ if (f->fb == blk) {
	+ rblk = blk;
	+ if (s) {
	+ *s = f;
	+ }
	+ break;
	+ }
	+ }
	+ return (rblk);
	+}
	+
	+struct tcp_function_block *
	+find_and_ref_tcp_functions(struct tcp_function_set *fs)
	+{
	+ struct tcp_function_block *blk;
	+
	+ rw_rlock(&tcp_function_lock);
	+ blk = find_tcp_functions_locked(fs);
	+ if (blk)
	+ refcount_acquire(&blk->refcnt);
	+ rw_runlock(&tcp_function_lock);
	+ return(blk);
	+}
	+
	+struct tcp_function_block *
	+find_and_ref_tcp_fb(struct tcp_function_block *blk)
	+{
	+ struct tcp_function_block *rblk;
	+
	+ rw_rlock(&tcp_function_lock);
	+ rblk = find_tcp_fb_locked(blk, NULL);
	+ if (rblk)
	+ refcount_acquire(&rblk->refcnt);
	+ rw_runlock(&tcp_function_lock);
	+ return(rblk);
	+}
	+
	+
	+static int
	+sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
	+{
	+ int error=ENOENT;
	+ struct tcp_function_set fs;
	+ struct tcp_function_block *blk;
	+
	+ memset(&fs, 0, sizeof(fs));
	+ rw_rlock(&tcp_function_lock);
	+ blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
	+ if (blk) {
	+ /* Found him */
	+ strcpy(fs.function_set_name, blk->tcp_block_name);
	+ fs.pcbcnt = blk->refcnt;
	+ }
	+ rw_runlock(&tcp_function_lock);
	+ error = sysctl_handle_string(oidp, fs.function_set_name,
	+ sizeof(fs.function_set_name), req);
	+
	+ /* Check for error or no change */
	+ if (error != 0 \|\| req->newptr == NULL)
	+ return(error);
	+
	+ rw_rlock(&tcp_function_lock);
	+ blk = find_tcp_functions_locked(&fs);
	+ if (blk == NULL) {
	+ error = ENOENT;
	+ goto done;
	+ }
	+ tcp_func_set_ptr = blk;
	+done:
	+ rw_runlock(&tcp_function_lock);
	+ return (error);
	+}
	+
	+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
	+ CTLTYPE_STRING \| CTLFLAG_RW,
	+ NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
	+ "Set/get the default TCP functions");
	+
	+static int
	+sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
	+{
	+ int error=0, cnt, at;
	+ struct tcp_function f, n;
	+ char *buffer=NULL;
	+ size_t sz;
	+
	+ rw_rlock(&tcp_function_lock);
	+ cnt = 0;
	+ TAILQ_FOREACH(f, &t_functions, next) {
	+ cnt++;
	+ }
	+ rw_runlock(&tcp_function_lock);
	+
	+ sz = ((cnt+1) * TCP_FUNCTION_NAME_LEN_MAX) + cnt;
	+ buffer = malloc(sz, M_TEMP, M_WAITOK);
	+ if (buffer == NULL)
	+ return (ENOMEM);
	+
	+ memset(buffer, 0, sz);
	+ error = at = 0;
	+
	+ n = NULL;
	+ rw_rlock(&tcp_function_lock);
	+ TAILQ_FOREACH_SAFE(f, &t_functions, next, n) {
	+ if (at+TCP_FUNCTION_NAME_LEN_MAX > sz) {
	+ error = EOVERFLOW;
	+ break;
	+ }
	+ strcpy(&buffer[at], f->fb->tcp_block_name);
	+ at += strlen(f->fb->tcp_block_name);
	+ if (n) {
	+ buffer[at] = ',';
	+ at++;
	+ }
	+ }
	+ rw_runlock(&tcp_function_lock);
	+ if (error == 0) {
	+ sz = strlen(buffer) + 1;
	+ error = sysctl_handle_string(oidp, buffer, sz, req);
	+ }
	+ free(buffer, M_TEMP);
	+ return (error);
	+}
	+
	+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
	+ CTLTYPE_STRING\|CTLFLAG_RD,
	+ NULL, 0, sysctl_net_inet_list_available, "A",
	+ "list available TCP Function sets");
	+
	+static int
	+sysctl_net_inet_tcp_functions_list(SYSCTL_HANDLER_ARGS)
	+{
	+ int error=0, n, at;
	+ struct tcp_function *f;
	+ struct tcp_function_set *fs=NULL;
	+ size_t sz;
	+
	+ rw_rlock(&tcp_function_lock);
	+ n = 0;
	+ TAILQ_FOREACH(f, &t_functions, next) {
	+ n++;
	+ }
	+ rw_runlock(&tcp_function_lock);
	+ if (req->oldptr == NULL) {
	+ req->oldidx = ((n+2) * sizeof(struct tcp_function_set));
	+ return(0);
	+ }
	+ sz = n * sizeof(struct tcp_function_set);
	+ fs = malloc(sz, M_TEMP, M_WAITOK);
	+ if (fs == NULL) {
	+ return(ENOMEM);
	+ }
	+ at = 0;
	+ memset(fs, 0, sz);
	+ rw_rlock(&tcp_function_lock);
	+ TAILQ_FOREACH(f, &t_functions, next) {
	+ strcpy(fs[at].function_set_name, f->fb->tcp_block_name);
	+ fs[at].pcbcnt = f->fb->refcnt;
	+ at++;
	+ if (at >= n)
	+ break;
	+ }
	+ rw_runlock(&tcp_function_lock);
	+ error = SYSCTL_OUT(req, fs, (at * sizeof(struct tcp_function_set)));
	+ free(fs, M_TEMP);
	+ return (error);
	+}
	+
	+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_list_detail,
	+ CTLFLAG_VNET \| CTLTYPE_STRUCT \| CTLFLAG_RD, 0, 0,
	+ &sysctl_net_inet_tcp_functions_list, "I",
	+ "List the TCP function stacks and there reference counts");
	+
	+
	/*
	* Target size of TCP PCB hash tables. Must be a power of two.
	*
	@@ -263,6 +482,8 @@
	#define V_tcpcb_zone VNET(tcpcb_zone)

	MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
	+MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
	+
	static struct mtx isn_mtx;

	#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
	@@ -311,6 +532,96 @@
	return (hashsize);
	}

	+int
	+register_tcp_functions(struct tcp_function_block *blk, int wait)
	+{
	+ struct tcp_function_block *lblk;
	+ struct tcp_function *n;
	+ struct tcp_function_set fs;
	+
	+ if ((blk->tcp_output == NULL) \|\|
	+ (blk->tcp_do_segment == NULL) \|\|
	+ (blk->tcp_ctloutput == NULL) \|\|
	+ (strlen(blk->tcp_block_name) == 0)) {
	+ /*
	+ * These functions are required and you
	+ * need a name.
	+ */
	+ return (EINVAL);
	+ }
	+ if (blk->tcp_timer_stop_all \|\|
	+ blk->tcp_timers_left \|\|
	+ blk->tcp_timer_activate \|\|
	+ blk->tcp_timer_active \|\|
	+ blk->tcp_timer_stop) {
	+ /*
	+ * If you define one timer function you
	+ * must have them all.
	+ */
	+ if ((blk->tcp_timer_stop_all == NULL) \|\|
	+ (blk->tcp_timers_left == NULL) \|\|
	+ (blk->tcp_timer_activate == NULL) \|\|
	+ (blk->tcp_timer_active == NULL) \|\|
	+ (blk->tcp_timer_stop == NULL)) {
	+ return (EINVAL);
	+ }
	+ }
	+ n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
	+ if (n == NULL) {
	+ return (ENOMEM);
	+ }
	+ n->fb = blk;
	+ strcpy(fs.function_set_name, blk->tcp_block_name);
	+ rw_wlock(&tcp_function_lock);
	+ lblk = find_tcp_functions_locked(&fs);
	+ if (lblk) {
	+ /* Duplicate name space not allowed */
	+ rw_wunlock(&tcp_function_lock);
	+ free(n, M_TCPFUNCTIONS);
	+ return (EALREADY);
	+ }
	+ refcount_init(&blk->refcnt, 0);
	+ blk->flags = 0;
	+ TAILQ_INSERT_TAIL(&t_functions, n, next);
	+ rw_wunlock(&tcp_function_lock);
	+ return(0);
	+}
	+
	+int
	+deregister_tcp_functions(struct tcp_function_block *blk)
	+{
	+ struct tcp_function_block *lblk;
	+ struct tcp_function *f;
	+ int error=ENOENT;
	+
	+ if (strcmp(blk->tcp_block_name, "default") == 0) {
	+ /* You can't un-register the default */
	+ return (EPERM);
	+ }
	+ rw_wlock(&tcp_function_lock);
	+ if (blk == tcp_func_set_ptr) {
	+ /* You can't free the current default */
	+ rw_wunlock(&tcp_function_lock);
	+ return (EBUSY);
	+ }
	+ if (blk->refcnt) {
	+ /* Still tcb attached, mark it. */
	+ blk->flags \|= TCP_FUNC_BEING_REMOVED;
	+ rw_wunlock(&tcp_function_lock);
	+ return (EBUSY);
	+ }
	+ lblk = find_tcp_fb_locked(blk, &f);
	+ if (lblk) {
	+ /* Found */
	+ TAILQ_REMOVE(&t_functions, f, next);
	+ f->fb = NULL;
	+ free(f, M_TCPFUNCTIONS);
	+ error = 0;
	+ }
	+ rw_wunlock(&tcp_function_lock);
	+ return (error);
	+}
	+
	void
	tcp_init(void)
	{
	@@ -325,7 +636,10 @@
	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
	&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT\|HHOOK_HEADISINVNET) != 0)
	printf("%s: WARNING: unable to register helper hook\n", __func__);
	-
	+ /* Setup the tcp function block list */
	+ TAILQ_INIT(&t_functions);
	+ rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
	+ register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
	hashsize = TCBHASHSIZE;
	TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
	if (hashsize == 0) {
	@@ -768,7 +1082,13 @@
	tp->ccv = &tm->ccv;
	tp->ccv->type = IPPROTO_TCP;
	tp->ccv->ccvc.tcp = tp;
	-
	+ rw_rlock(&tcp_function_lock);
	+ tp->t_fb = tcp_func_set_ptr;
	+ refcount_acquire(&tp->t_fb->refcnt);
	+ rw_runlock(&tcp_function_lock);
	+ if (tp->t_fb->tcp_fb_init) {
	+ (*tp->t_fb->tcp_fb_init)(tp);
	+ }
	/*
	* Use the current system default CC algorithm.
	*/
	@@ -779,6 +1099,9 @@

	if (CC_ALGO(tp)->cb_init != NULL)
	if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
	+ if (tp->t_fb->tcp_fb_fini)
	+ (*tp->t_fb->tcp_fb_fini)(tp);
	+ refcount_release(&tp->t_fb->refcnt);
	uma_zfree(V_tcpcb_zone, tm);
	return (NULL);
	}
	@@ -785,6 +1108,9 @@

	tp->osd = &tm->osd;
	if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
	+ if (tp->t_fb->tcp_fb_fini)
	+ (*tp->t_fb->tcp_fb_fini)(tp);
	+ refcount_release(&tp->t_fb->refcnt);
	uma_zfree(V_tcpcb_zone, tm);
	return (NULL);
	}
	@@ -925,7 +1251,7 @@

	if (TCPS_HAVERCVDSYN(tp->t_state)) {
	tcp_state_change(tp, TCPS_CLOSED);
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	TCPSTAT_INC(tcps_drops);
	} else
	TCPSTAT_INC(tcps_conndrops);
	@@ -960,6 +1286,10 @@
	tcp_timer_stop(tp, TT_KEEP);
	tcp_timer_stop(tp, TT_2MSL);
	tcp_timer_stop(tp, TT_DELACK);
	+ if (tp->t_fb->tcp_timer_stop_all) {
	+ /* Call the stop-all function of the methods */
	+ tp->t_fb->tcp_timer_stop_all(tp);
	+ }

	/*
	* If we got enough samples through the srtt filter,
	@@ -1044,6 +1374,14 @@
	inp->inp_ppcb = NULL;
	if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
	/* We own the last reference on tcpcb, let's free it. */
	+ if ((tp->t_fb->tcp_timers_left) &&
	+ (tp->t_fb->tcp_timers_left(tp))) {
	+ /* Some fb timers left running! */
	+ return;
	+ }
	+ if (tp->t_fb->tcp_fb_fini)
	+ (*tp->t_fb->tcp_fb_fini)(tp);
	+ refcount_release(&tp->t_fb->refcnt);
	tp->t_inpcb = NULL;
	uma_zfree(V_tcpcb_zone, tp);
	released = in_pcbrele_wlocked(inp);
	@@ -1105,6 +1443,14 @@
	tp->t_timers->tt_flags &= ~timer_type;
	if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
	/* We own the last reference on this tcpcb, let's free it. */
	+ if ((tp->t_fb->tcp_timers_left) &&
	+ (tp->t_fb->tcp_timers_left(tp))) {
	+ /* Some fb timers left running! */
	+ goto leave;
	+ }
	+ if (tp->t_fb->tcp_fb_fini)
	+ (*tp->t_fb->tcp_fb_fini)(tp);
	+ refcount_release(&tp->t_fb->refcnt);
	tp->t_inpcb = NULL;
	uma_zfree(V_tcpcb_zone, tp);
	if (in_pcbrele_wlocked(inp)) {
	@@ -1113,6 +1459,7 @@
	return;
	}
	}
	+leave:
	INP_WUNLOCK(inp);
	INP_INFO_RUNLOCK(&V_tcbinfo);
	CURVNET_RESTORE();
	@@ -1865,7 +2212,7 @@
	tp->snd_recover = tp->snd_max;
	if (tp->t_flags & TF_SACK_PERMIT)
	EXIT_FASTRECOVERY(tp->t_flags);
	- tcp_output(tp);
	+ tp->t_fb->tcp_output(tp);
	}

	#ifdef INET
	Index: netinet/tcp_syncache.c
	===================================================================
	--- netinet/tcp_syncache.c
	+++ netinet/tcp_syncache.c
	@@ -41,6 +41,7 @@
	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/hash.h>
	+#include <sys/refcount.h>
	#include <sys/kernel.h>
	#include <sys/sysctl.h>
	#include <sys/limits.h>
	@@ -626,6 +627,7 @@
	static struct socket *
	syncache_socket(struct syncache sc, struct socket lso, struct mbuf *m)
	{
	+ struct tcp_function_block *blk;
	struct inpcb *inp = NULL;
	struct socket *so;
	struct tcpcb *tp;
	@@ -817,6 +819,27 @@
	tp->irs = sc->sc_irs;
	tcp_rcvseqinit(tp);
	tcp_sendseqinit(tp);
	+ blk = sototcpcb(lso)->t_fb;
	+ if (blk &&
	+ (blk != tp->t_fb)) {
	+ /*
	+ * Our parents t_fb was not the default,
	+ * we need to release our ref on tp->t_fb and
	+ * pickup one on the new entry.
	+ */
	+ struct tcp_function_block *rblk;
	+
	+ rblk = find_and_ref_tcp_fb(blk);
	+ KASSERT(rblk != NULL,
	+ ("cannot find blk %p out of syncache?", blk));
	+ if (tp->t_fb->tcp_fb_fini)
	+ (*tp->t_fb->tcp_fb_fini)(tp);
	+ refcount_release(&tp->t_fb->refcnt);
	+ tp->t_fb = rblk;
	+ if (tp->t_fb->tcp_fb_init) {
	+ (*tp->t_fb->tcp_fb_init)(tp);
	+ }
	+ }
	tp->snd_wl1 = sc->sc_irs;
	tp->snd_max = tp->iss + 1;
	tp->snd_nxt = tp->iss + 1;
	Index: netinet/tcp_timer.c
	===================================================================
	--- netinet/tcp_timer.c
	+++ netinet/tcp_timer.c
	@@ -292,7 +292,7 @@

	tp->t_flags \|= TF_ACKNOW;
	TCPSTAT_INC(tcps_delack);
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	INP_WUNLOCK(inp);
	CURVNET_RESTORE();
	}
	@@ -543,7 +543,7 @@
	}
	tcp_setpersist(tp);
	tp->t_flags \|= TF_FORCEDATA;
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	tp->t_flags &= ~TF_FORCEDATA;

	out:
	@@ -798,7 +798,7 @@

	cc_cong_signal(tp, NULL, CC_RTO);

	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);

	out:
	#ifdef TCPDEBUG
	@@ -858,6 +858,10 @@
	f_reset = TT_2MSL_RST;
	break;
	default:
	+ if (tp->t_fb->tcp_timer_activate) {
	+ tcp_timer_activate(tp, timer_type, delta);
	+ return;
	+ }
	panic("tp %p bad timer_type %#x", tp, timer_type);
	}
	if (delta == 0) {
	@@ -904,6 +908,9 @@
	t_callout = &tp->t_timers->tt_2msl;
	break;
	default:
	+ if (tp->t_fb->tcp_timer_active) {
	+ return(tp->t_fb->tcp_timer_active(tp, timer_type));
	+ }
	panic("tp %p bad timer_type %#x", tp, timer_type);
	}
	return callout_active(t_callout);
	@@ -945,6 +952,14 @@
	f_reset = TT_2MSL_RST;
	break;
	default:
	+ if (tp->t_fb->tcp_timer_stop) {
	+ /*
	+ * XXXrrs we need to look at this with the
	+ * stop case below (flags).
	+ */
	+ tp->t_fb->tcp_timer_stop(tp, timer_type);
	+ return;
	+ }
	panic("tp %p bad timer_type %#x", tp, timer_type);
	}

	Index: netinet/tcp_usrreq.c
	===================================================================
	--- netinet/tcp_usrreq.c
	+++ netinet/tcp_usrreq.c
	@@ -47,6 +47,7 @@
	#include <sys/systm.h>
	#include <sys/limits.h>
	#include <sys/malloc.h>
	+#include <sys/refcount.h>
	#include <sys/kernel.h>
	#include <sys/sysctl.h>
	#include <sys/mbuf.h>
	@@ -509,7 +510,7 @@
	goto out;
	#endif
	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
	- error = tcp_output(tp);
	+ error = tp->t_fb->tcp_output(tp);
	out:
	TCPDEBUG2(PRU_CONNECT);
	INP_WUNLOCK(inp);
	@@ -579,7 +580,7 @@
	(error = tcp_offload_connect(so, nam)) == 0)
	goto out;
	#endif
	- error = tcp_output(tp);
	+ error = tp->t_fb->tcp_output(tp);
	goto out;
	}
	#endif
	@@ -597,7 +598,7 @@
	goto out;
	#endif
	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
	- error = tcp_output(tp);
	+ error = tp->t_fb->tcp_output(tp);

	out:
	TCPDEBUG2(PRU_CONNECT);
	@@ -773,7 +774,7 @@
	socantsendmore(so);
	tcp_usrclosed(tp);
	if (!(inp->inp_flags & INP_DROPPED))
	- error = tcp_output(tp);
	+ error = tp->t_fb->tcp_output(tp);

	out:
	TCPDEBUG2(PRU_SHUTDOWN);
	@@ -809,7 +810,7 @@
	tcp_offload_rcvd(tp);
	else
	#endif
	- tcp_output(tp);
	+ tp->t_fb->tcp_output(tp);

	out:
	TCPDEBUG2(PRU_RCVD);
	@@ -911,7 +912,7 @@
	!(flags & PRUS_NOTREADY)) {
	if (flags & PRUS_MORETOCOME)
	tp->t_flags \|= TF_MORETOCOME;
	- error = tcp_output(tp);
	+ error = tp->t_fb->tcp_output(tp);
	if (flags & PRUS_MORETOCOME)
	tp->t_flags &= ~TF_MORETOCOME;
	}
	@@ -961,7 +962,7 @@
	tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
	if (!(flags & PRUS_NOTREADY)) {
	tp->t_flags \|= TF_FORCEDATA;
	- error = tcp_output(tp);
	+ error = tp->t_fb->tcp_output(tp);
	tp->t_flags &= ~TF_FORCEDATA;
	}
	}
	@@ -997,7 +998,7 @@
	error = sbready(&so->so_snd, m, count);
	SOCKBUF_UNLOCK(&so->so_snd);
	if (error == 0)
	- error = tcp_output(tp);
	+ error = tp->t_fb->tcp_output(tp);
	INP_WUNLOCK(inp);

	return (error);
	@@ -1349,13 +1350,11 @@
	int
	tcp_ctloutput(struct socket so, struct sockopt sopt)
	{
	- int error, opt, optval;
	- u_int ui;
	+ int error;
	struct inpcb *inp;
	struct tcpcb *tp;
	- struct tcp_info ti;
	- char buf[TCP_CA_NAME_MAX];
	- struct cc_algo *algo;
	+ struct tcp_function_block *blk;
	+ struct tcp_function_set fsn;

	error = 0;
	inp = sotoinpcb(so);
	@@ -1383,7 +1382,83 @@
	INP_WUNLOCK(inp);
	return (ECONNRESET);
	}
	+ tp = intotcpcb(inp);
	+ /*
	+ * Protect the TCP option TCP_FUNCTION_BLK so
	+ * that a sub-function can never overwrite this.
	+ */
	+ if ((sopt->sopt_dir == SOPT_SET) &&
	+ (sopt->sopt_name == TCP_FUNCTION_BLK)) {
	+ INP_WUNLOCK(inp);
	+ error = sooptcopyin(sopt, &fsn, sizeof fsn,
	+ sizeof fsn);
	+ if (error)
	+ return (error);
	+ INP_WLOCK_RECHECK(inp);
	+ if (tp->t_state != TCPS_CLOSED) {
	+ /*
	+ * The user has advanced the state
	+ * past the initial point, we can't
	+ * switch since we are down the road
	+ * and a new set of functions may
	+ * not be compatibile.
	+ */
	+ INP_WUNLOCK(inp);
	+ return(EINVAL);
	+ }
	+ blk = find_and_ref_tcp_functions(&fsn);
	+ if (blk == NULL) {
	+ INP_WUNLOCK(inp);
	+ return (ENOENT);
	+ }
	+ if (tp->t_fb != blk) {
	+ if (blk->flags & TCP_FUNC_BEING_REMOVED) {
	+ refcount_release(&blk->refcnt);
	+ INP_WUNLOCK(inp);
	+ return (ENOENT);
	+ }
	+ /*
	+ * Release the old refcnt, the
	+ * lookup acquires a ref on the
	+ * new one.
	+ */
	+ if (tp->t_fb->tcp_fb_fini)
	+ (*tp->t_fb->tcp_fb_fini)(tp);
	+ refcount_release(&tp->t_fb->refcnt);
	+ tp->t_fb = blk;
	+ if (tp->t_fb->tcp_fb_init) {
	+ (*tp->t_fb->tcp_fb_init)(tp);
	+ }
	+ }
	+#ifdef TCP_OFFLOAD
	+ if (tp->t_flags & TF_TOE) {
	+ tcp_offload_ctloutput(tp, sopt->sopt_dir,
	+ sopt->sopt_name);
	+ }
	+#endif
	+ INP_WUNLOCK(inp);
	+ return (error);
	+ } else if ((sopt->sopt_dir == SOPT_GET) &&
	+ (sopt->sopt_name == TCP_FUNCTION_BLK)) {
	+ strcpy(fsn.function_set_name, tp->t_fb->tcp_block_name);
	+ fsn.pcbcnt = tp->t_fb->refcnt;
	+ INP_WUNLOCK(inp);
	+ error = sooptcopyout(sopt, &fsn, sizeof fsn);
	+ return (error);
	+ }
	+ /* Pass in the INP locked, called must unlock it */
	+ return (tp->t_fb->tcp_ctloutput(so, sopt, inp, tp));
	+}

	+int
	+tcp_default_ctloutput(struct socket so, struct sockopt sopt, struct inpcb inp, struct tcpcb tp)
	+{
	+ int error, opt, optval;
	+ u_int ui;
	+ struct tcp_info ti;
	+ struct cc_algo *algo;
	+ char buf[TCP_CA_NAME_MAX];
	+
	switch (sopt->sopt_dir) {
	case SOPT_SET:
	switch (sopt->sopt_name) {
	@@ -1451,7 +1526,7 @@
	else if (tp->t_flags & TF_NOPUSH) {
	tp->t_flags &= ~TF_NOPUSH;
	if (TCPS_HAVEESTABLISHED(tp->t_state))
	- error = tcp_output(tp);
	+ error = tp->t_fb->tcp_output(tp);
	}
	goto unlock_and_done;

	@@ -1770,7 +1845,7 @@
	sbflush(&so->so_rcv);
	tcp_usrclosed(tp);
	if (!(inp->inp_flags & INP_DROPPED))
	- tcp_output(tp);
	+ tp->t_fb->tcp_output(tp);
	}
	}

	Index: netinet/tcp_var.h
	===================================================================
	--- netinet/tcp_var.h
	+++ netinet/tcp_var.h
	@@ -88,6 +88,48 @@

	#define tcp6cb tcpcb /* for KAME src sync over BSD's /

	+/*
	+ * TODO: We yet need to brave plowing in
	+ * to tcp_input() and the pru_usrreq() block.
	+ * Right now these go to the old standards which
	+ * are somewhat ok, but in the long term may
	+ * need to be changed. If we do tackle tcp_input()
	+ * then we need to get rid of the tcp_do_segment()
	+ * function below.
	+ */
	+/* Flags for tcp functions */
	+#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */
	+
	+struct tcp_function_block {
	+ char tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
	+ int (tcp_output)(struct tcpcb );
	+ void (tcp_do_segment)(struct mbuf , struct tcphdr *,
	+ struct socket , struct tcpcb ,
	+ int, int, uint8_t,
	+ int);
	+ int (tcp_ctloutput)(struct socket so, struct sockopt *sopt,
	+ struct inpcb inp, struct tcpcb tp);
	+ /* Optional memory allocation/free routine */
	+ void (tcp_fb_init)(struct tcpcb );
	+ void (tcp_fb_fini)(struct tcpcb );
	+ /* Optional timers, must define all if you define one */
	+ void (tcp_timer_stop_all)(struct tcpcb );
	+ int (tcp_timers_left)(struct tcpcb );
	+ void (tcp_timer_activate)(struct tcpcb ,
	+ uint32_t, u_int);
	+ int (tcp_timer_active)(struct tcpcb , uint32_t);
	+ void (tcp_timer_stop)(struct tcpcb , uint32_t);
	+ volatile uint32_t refcnt;
	+ uint32_t flags;
	+};
	+
	+struct tcp_function {
	+ TAILQ_ENTRY(tcp_function) next;
	+ struct tcp_function_block *fb;
	+};
	+
	+TAILQ_HEAD(tcp_funchead, tcp_function);
	+
	/*
	* Tcp control block, one per tcp; fields:
	* Organized for 16 byte cacheline efficiency.
	@@ -206,9 +248,10 @@
	u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */
	u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */
	u_int t_flags2; /* More tcpcb flags storage */
	-
	uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
	- void t_pspare2[4]; / 1 TCP_SIGNATURE, 3 TBD */
	+ struct tcp_function_block t_fb;/ TCP function call block */
	+ void t_fb_ptr; / Pointer to t_fb specific data */
	+ void t_pspare2[2]; / 1 TCP_SIGNATURE, 1 TBD */
	#if defined(_KERNEL) && defined(TCPPCAP)
	struct mbufq t_inpkts; /* List of saved input packets. */
	struct mbufq t_outpkts; /* List of saved output packets. */
	@@ -534,6 +577,8 @@
	#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */

	#ifdef _KERNEL
	+#define TI_UNLOCKED 1
	+#define TI_RLOCKED 2
	#include <sys/counter.h>

	VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
	@@ -684,7 +729,32 @@
	int tcp_reass(struct tcpcb , struct tcphdr , int , struct mbuf );
	void tcp_reass_global_init(void);
	void tcp_reass_flush(struct tcpcb *);
	+void tcp_dooptions(struct tcpopt , u_char , int, int);
	+void tcp_dropwithreset(struct mbuf , struct tcphdr ,
	+ struct tcpcb *, int, int);
	+void tcp_pulloutofband(struct socket *,
	+ struct tcphdr , struct mbuf , int);
	+void tcp_xmit_timer(struct tcpcb *, int);
	+void tcp_newreno_partial_ack(struct tcpcb , struct tcphdr );
	+void cc_ack_received(struct tcpcb tp, struct tcphdr th,
	+ uint16_t type);
	+void cc_conn_init(struct tcpcb *tp);
	+void cc_post_recovery(struct tcpcb tp, struct tcphdr th);
	+void cc_cong_signal(struct tcpcb tp, struct tcphdr th, uint32_t type);
	+void hhook_run_tcp_est_in(struct tcpcb *tp,
	+ struct tcphdr th, struct tcpopt to);
	+
	int tcp_input(struct mbuf *, int , int);
	+void tcp_do_segment(struct mbuf , struct tcphdr ,
	+ struct socket , struct tcpcb , int, int, uint8_t,
	+ int);
	+
	+int register_tcp_functions(struct tcp_function_block *blk, int wait);
	+int deregister_tcp_functions(struct tcp_function_block *blk);
	+struct tcp_function_block find_and_ref_tcp_functions(struct tcp_function_set fs);
	+struct tcp_function_block find_and_ref_tcp_fb(struct tcp_function_block blk);
	+int tcp_default_ctloutput(struct socket so, struct sockopt sopt, struct inpcb inp, struct tcpcb tp);
	+
	u_long tcp_maxmtu(struct in_conninfo , struct tcp_ifcap );
	u_long tcp_maxmtu6(struct in_conninfo , struct tcp_ifcap );
	void tcp_mss_update(struct tcpcb , int, int, struct hc_metrics_lite ,
	Index: netinet/toecore.c
	===================================================================
	--- netinet/toecore.c
	+++ netinet/toecore.c
	@@ -509,7 +509,7 @@
	KASSERT(!(tp->t_flags & TF_TOE),
	("%s: tp %p still offloaded.", __func__, tp));
	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
	- (void) tcp_output(tp);
	+ (void) tp->t_fb->tcp_output(tp);
	} else {

	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);

File Metadata

Mime Type: text/plain
Expires: Sun, Feb 23, 4:13 AM (3 h, 59 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16780722
Default Alt Text: D4055.id10089.diff (106 KB)

D4055.id10089.diffNo OneTemporaryActions

D4055.id10089.diffView Options

File Metadata

Event Timeline

D4055.id10089.diff
No OneTemporary
Actions

D4055.id10089.diff
View Options