Index: sys/conf/options
===================================================================
--- sys/conf/options
+++ sys/conf/options
@@ -227,6 +227,7 @@
 SYSVSHM		opt_sysvipc.h
 SW_WATCHDOG	opt_watchdog.h
 TCPHPTS         opt_inet.h
+TCP_REQUEST_TRK opt_global.h
 TCP_ACCOUNTING	opt_inet.h
 TURNSTILE_PROFILING
 UMTX_PROFILING
Index: sys/kern/kern_sendfile.c
===================================================================
--- sys/kern/kern_sendfile.c
+++ sys/kern/kern_sendfile.c
@@ -57,6 +57,9 @@
 #include <net/vnet.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_log_buf.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
@@ -1188,6 +1191,12 @@
 			    NULL, NULL, td);
 			sendfile_iodone(sfio, NULL, 0, error);
 		}
+#ifdef TCP_REQUEST_TRK
+		if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+			/* log the sendfile call to the TCP log, if enabled */
+			tcp_log_sendfile(so, offset, nbytes, flags);
+		}
+#endif
 		CURVNET_RESTORE();
 
 		m = NULL;
Index: sys/modules/tcp/rack/Makefile
===================================================================
--- sys/modules/tcp/rack/Makefile
+++ sys/modules/tcp/rack/Makefile
@@ -6,7 +6,7 @@
 
 STACKNAME=	rack
 KMOD=	tcp_${STACKNAME}
-SRCS=	rack.c sack_filter.c rack_bbr_common.c
+SRCS=	rack.c sack_filter.c rack_bbr_common.c #tailq_hash.c
 
 SRCS+=	opt_inet.h opt_inet6.h opt_ipsec.h
 SRCS+=	opt_kern_tls.h
Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -217,15 +217,15 @@
 /* Options for Rack and BBR */
 #define	TCP_REUSPORT_LB_NUMA   1026	/* set listen socket numa domain */
 #define TCP_RACK_MBUF_QUEUE   1050 /* Do we allow mbuf queuing if supported */
-#define TCP_RACK_PROP	      1051 /* RACK proportional rate reduction (bool) */
+#define TCP_RACK_PROP	      1051 /* Not used */
 #define TCP_RACK_TLP_REDUCE   1052 /* RACK TLP cwnd reduction (bool) */
 #define TCP_RACK_PACE_REDUCE  1053 /* RACK Pacingv reduction factor (divisor) */
 #define TCP_RACK_PACE_MAX_SEG 1054 /* Max TSO size we will send  */
 #define TCP_RACK_PACE_ALWAYS  1055 /* Use the always pace method */
-#define TCP_RACK_PROP_RATE    1056 /* The proportional reduction rate */
+#define TCP_RACK_PROP_RATE    1056 /* Not used */
 #define TCP_RACK_PRR_SENDALOT 1057 /* Allow PRR to send more than one seg */
 #define TCP_RACK_MIN_TO       1058 /* Minimum time between rack t-o's in ms */
-#define TCP_RACK_EARLY_RECOV  1059 /* Should recovery happen early (bool) */
+#define TCP_RACK_EARLY_RECOV  1059 /* Not used */
 #define TCP_RACK_EARLY_SEG    1060 /* If early recovery max segments */
 #define TCP_RACK_REORD_THRESH 1061 /* RACK reorder threshold (shift amount) */
 #define TCP_RACK_REORD_FADE   1062 /* Does reordering fade after ms time */
@@ -309,12 +309,22 @@
 #define TCP_REC_ABC_VAL 1134	/* Do we use the ABC value for recovery or the override one from sysctl  */
 #define TCP_RACK_MEASURE_CNT 1135 /* How many measurements are required in GP pacing */
 #define TCP_DEFER_OPTIONS 1136 /* Defer options until the proper number of measurements occur, does not defer TCP_RACK_MEASURE_CNT */
-#define TCP_FAST_RSM_HACK 1137 /* Do we do the broken thing where we don't twiddle the TLP bits properly in fast_rsm_output? */
+#define TCP_FAST_RSM_HACK 1137	/* Not used in modern stacks */
 #define TCP_RACK_PACING_BETA 1138	/* Changing the beta for pacing */
 #define TCP_RACK_PACING_BETA_ECN 1139	/* Changing the beta for ecn with pacing */
 #define TCP_RACK_TIMER_SLOP 1140	/* Set or get the timer slop used */
 #define TCP_RACK_DSACK_OPT 1141		/* How do we setup rack timer DSACK options bit 1/2 */
 #define TCP_RACK_ENABLE_HYSTART 1142	/* Do we allow hystart in the CC modules */
+#define TCP_RACK_SET_RXT_OPTIONS 1143	/* Set the bits in the retransmit options */
+#define TCP_RACK_HI_BETA 1144 /* Turn on/off high beta */
+#define TCP_RACK_SPLIT_LIMIT 1145	/* Set a split limit for split allocations */
+#define TCP_RACK_PACING_DIVISOR 1146 /* Pacing divisor given to rate-limit code for burst sizing */
+#define TCP_RACK_PACE_MIN_SEG 1147	/* Pacing min seg size rack will use */
+#define TCP_RACK_DGP_IN_REC 1148	/* Do we use full DGP in recovery? */
+#define TCP_RXT_CLAMP 1149 /* Do we apply a threshold to rack so if excess rxt clamp cwnd? */
+#define TCP_HYBRID_PACING   1150	/* Hybrid pacing enablement */
+#define TCP_PACING_DND	    1151	/* When pacing with rr_config=3 can sacks disturb us */
+
 /* Start of reserved space for third-party user-settable options. */
 #define	TCP_VENDOR	SO_VENDOR
 
@@ -447,6 +457,53 @@
 #define	TLS_SET_RECORD_TYPE	1
 #define	TLS_GET_RECORD		2
 
+/*
+ * TCP log user opaque
+ */
+struct http_req {
+	uint64_t timestamp;
+	uint64_t start;
+	uint64_t end;
+	uint32_t flags;
+};
+
+union tcp_log_userdata {
+	struct http_req http_req;
+};
+
+struct tcp_log_user {
+	uint32_t type;
+	uint32_t subtype;
+	union tcp_log_userdata data;
+};
+
+/* user types, i.e. apps */
+#define TCP_LOG_USER_HTTPD	1
+
+/* user subtypes */
+#define TCP_LOG_HTTPD_TS	1	/* client timestamp */
+#define TCP_LOG_HTTPD_TS_REQ	2	/* client timestamp and request info */
+
+/* HTTPD REQ flags */
+#define TCP_LOG_HTTPD_RANGE_START	0x0001
+#define TCP_LOG_HTTPD_RANGE_END		0x0002
+
+/* Flags for hybrid pacing */
+#define TCP_HYBRID_PACING_CU		0x0001		/* Enable catch-up mode */
+#define TCP_HYBRID_PACING_DTL		0x0002		/* Enable Detailed logging */
+#define TCP_HYBRID_PACING_CSPR		0x0004		/* A client suggested rate is present  */
+#define TCP_HYBRID_PACING_H_MS		0x0008		/* A client hint for maxseg is present  */
+#define TCP_HYBRID_PACING_ENABLE	0x0010		/* We are enabling hybrid pacing else disable */
+#define TCP_HYBRID_PACING_S_MSS		0x0020		/* Clent wants us to set the mss overriding gp est in CU */
+#define TCP_HYBRID_PACING_SETMSS	0x1000		/* Internal flag that tellsus we set the mss on this entry */
+
+struct tcp_hybrid_req {
+	struct http_req req;
+	uint64_t cspr;
+	uint32_t hint_maxseg;
+	uint32_t hybrid_flags;
+};
+
 /*
  * TCP specific variables of interest for tp->t_stats stats(9) accounting.
  */
@@ -460,6 +517,7 @@
 #define	VOI_TCP_CALCFRWINDIFF	7 /* Congestion avoidance LCWIN - FRWIN */
 #define	VOI_TCP_GPUT_ND		8 /* Goodput normalised delta */
 #define	VOI_TCP_ACKLEN		9 /* Average ACKed bytes per ACK */
+#define VOI_TCP_PATHRTT		10 /* The path RTT based on ACK arrival */
 
 #define TCP_REUSPORT_LB_NUMA_NODOM	(-2) /* remove numa binding */
 #define TCP_REUSPORT_LB_NUMA_CURDOM	(-1) /* bind to current domain */
Index: sys/netinet/tcp_hpts.h
===================================================================
--- sys/netinet/tcp_hpts.h
+++ sys/netinet/tcp_hpts.h
@@ -187,6 +187,15 @@
 }
 
 #ifdef _KERNEL
+
+extern int32_t tcp_min_hptsi_time;
+
+__inline int32_t
+get_hpts_min_sleep_time()
+{
+	return (tcp_min_hptsi_time + HPTS_TICKS_PER_SLOT);
+}
+
 static __inline uint32_t
 tcp_gethptstick(struct timeval *sv)
 {
Index: sys/netinet/tcp_log_buf.c
===================================================================
--- sys/netinet/tcp_log_buf.c
+++ sys/netinet/tcp_log_buf.c
@@ -58,6 +58,7 @@
 #include <netinet/in_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_log_buf.h>
+#include <netinet/tcp_seq.h>
 #include <netinet/tcp_hpts.h>
 
 /* Default expiry time */
@@ -2844,6 +2845,10 @@
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
+#ifdef TCP_REQUEST_TRK
+	struct http_sendfile_track *ent;
+	int i, fnd;
+#endif
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_log_sendfile: inp == NULL"));
@@ -2873,6 +2878,90 @@
 		    &tptosocket(tp)->so_snd,
 		    TCP_LOG_SENDFILE, 0, 0, &log, false, &tv);
 	}
+#ifdef TCP_REQUEST_TRK
+	if (tp->t_http_req == 0) {
+		/* No http requests to track */
+		goto done;
+	}
+	fnd = 0;
+	if (tp->t_http_closed == 0) {
+		/* No closed end req to track */
+		goto skip_closed_req;
+	}
+	for(i = 0; i < MAX_TCP_HTTP_REQ; i++) {
+		/* Lets see if this one can be found */
+		ent = &tp->t_http_info[i];
+		if (ent->flags == TCP_HTTP_TRACK_FLG_EMPTY) {
+			/* Not used */
+			continue;
+		}
+		if (ent->flags & TCP_HTTP_TRACK_FLG_OPEN) {
+			/* This pass does not consider open requests */
+			continue;
+		}
+		if (ent->flags & TCP_HTTP_TRACK_FLG_COMP) {
+			/* Don't look at what we have completed */
+			continue;
+		}
+		/* If we reach here its a allocated closed end request */
+		if ((ent->start == offset) || 
+		    ((offset > ent->start) && (offset < ent->end))){
+			/* Its within this request?? */
+			fnd = 1;
+		}
+		if (fnd) {
+			/*
+			 * It is at or past the end, its complete.
+			 */
+			ent->flags |= TCP_HTTP_TRACK_FLG_SEQV;
+			/*
+			 * When an entry completes we can take (snd_una + sb_cc) and know where
+			 * the end of the range really is. Note that this works since two
+			 * requests must be sequential and sendfile now is complete for *this* request.
+			 * we must use sb_ccc since the data may still be in-flight in TLS.
+			 *
+			 * We always cautiously move the end_seq only if our calculations
+			 * show it happened (just in case sf has the call to here at the wrong
+			 * place). When we go COMP we will stop coming here and hopefully be
+			 * left with the correct end_seq.
+			 */
+			if (SEQ_GT((tp->snd_una + so->so_snd.sb_ccc), ent->end_seq))
+				ent->end_seq = tp->snd_una + so->so_snd.sb_ccc;
+			if ((offset + nbytes) >= ent->end) {
+				ent->flags |= TCP_HTTP_TRACK_FLG_COMP;
+				tcp_http_log_req_info(tp, ent, i, TCP_HTTP_REQ_LOG_COMPLETE, offset, nbytes);
+			} else {
+				tcp_http_log_req_info(tp, ent, i, TCP_HTTP_REQ_LOG_MOREYET, offset, nbytes);
+			}
+			/* We assume that sendfile never sends overlapping requests */
+			goto done;
+		}
+	}
+skip_closed_req:
+	if (!fnd) {
+		/* Ok now lets look for open requests */
+		for(i = 0; i < MAX_TCP_HTTP_REQ; i++) {
+			ent = &tp->t_http_info[i];
+			if (ent->flags == TCP_HTTP_TRACK_FLG_EMPTY) {
+				/* Not used */
+				continue;
+			}
+			if ((ent->flags & TCP_HTTP_TRACK_FLG_OPEN) == 0)
+				continue;
+			/* If we reach here its an allocated open request */
+			if (ent->start == offset) {
+				/* It begins this request */
+				ent->start_seq = tp->snd_una +
+				    tptosocket(tp)->so_snd.sb_ccc;
+				ent->flags |= TCP_HTTP_TRACK_FLG_SEQV;
+				break;
+			} else if (offset > ent->start) {
+				ent->flags |= TCP_HTTP_TRACK_FLG_SEQV;
+				break;
+			}
+		}
+	}
+#endif
 done:
 	INP_WUNLOCK(inp);
 }
Index: sys/netinet/tcp_stacks/bbr.c
===================================================================
--- sys/netinet/tcp_stacks/bbr.c
+++ sys/netinet/tcp_stacks/bbr.c
@@ -500,7 +500,7 @@
 bbr_enter_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts,
 		  int32_t line);
 static void
-bbr_stop_all_timers(struct tcpcb *tp);
+bbr_stop_all_timers(struct tcpcb *tp, struct tcp_bbr *bbr);
 static void
 bbr_exit_probe_rtt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts);
 static void
@@ -1970,7 +1970,7 @@
 static void
 bbr_log_msgsize_fail(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t len, uint32_t maxseg, uint32_t mtu, int32_t csum_flags, int32_t tso, uint32_t cts)
 {
-	if (tcp_bblogging_on(bbr->rc_tp)) {
+	if (tcp_bblogging_on(tp)) {
 		union tcp_log_stackspecific log;
 
 		bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
@@ -2669,7 +2669,7 @@
 	uint32_t newbw, uint32_t obw, uint32_t diff,
 	uint32_t tim)
 {
-	if (tcp_bblogging_on(bbr->rc_tp)) {
+	if (/*bbr_verbose_logging && */tcp_bblogging_on(bbr->rc_tp)) {
 		union tcp_log_stackspecific log;
 
 		bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
@@ -2697,7 +2697,7 @@
 static inline void
 bbr_log_progress_event(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t tick, int event, int line)
 {
-	if (tcp_bblogging_on(bbr->rc_tp)) {
+	if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) {
 		union tcp_log_stackspecific log;
 
 		bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime);
@@ -6281,6 +6281,9 @@
 		else
 			apply_filter_min_small(&bbr->r_ctl.rc_rttprop, rtt, cts);
 	}
+#ifdef STATS
+	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_PATHRTT, imax(0, rtt));
+#endif
 	if (bbr->rc_ack_was_delayed)
 		rtt += bbr->r_ctl.rc_ack_hdwr_delay;
 
@@ -9850,16 +9853,13 @@
 }
 
 static void
-bbr_stop_all_timers(struct tcpcb *tp)
+bbr_stop_all_timers(struct tcpcb *tp, struct tcp_bbr *bbr)
 {
-	struct tcp_bbr *bbr;
-
 	/*
 	 * Assure no timers are running.
 	 */
 	if (tcp_timer_active(tp, TT_PERSIST)) {
 		/* We enter in persists, set the flag appropriately */
-		bbr = (struct tcp_bbr *)tp->t_fb_ptr;
 		bbr->rc_in_persist = 1;
 	}
 }
@@ -9927,14 +9927,14 @@
  * which indicates the error (usually no memory).
  */
 static int
-bbr_init(struct tcpcb *tp)
+bbr_init(struct tcpcb *tp, void **ptr)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct tcp_bbr *bbr = NULL;
 	uint32_t cts;
 
-	tp->t_fb_ptr = uma_zalloc(bbr_pcb_zone, (M_NOWAIT | M_ZERO));
-	if (tp->t_fb_ptr == NULL) {
+	*ptr = uma_zalloc(bbr_pcb_zone, (M_NOWAIT | M_ZERO));
+	if (*ptr == NULL) {
 		/*
 		 * We need to allocate memory but cant. The INP and INP_INFO
 		 * locks and they are recursive (happens during setup. So a
@@ -9943,10 +9943,16 @@
 		 */
 		return (ENOMEM);
 	}
-	bbr = (struct tcp_bbr *)tp->t_fb_ptr;
+	bbr = (struct tcp_bbr *)*ptr;
 	bbr->rtt_valid = 0;
 	inp->inp_flags2 |= INP_CANNOT_DO_ECN;
 	inp->inp_flags2 |= INP_SUPPORTS_MBUFQ;
+	/* Take off any undesired flags */
+	inp->inp_flags2 &= ~INP_MBUF_QUEUE_READY;
+	inp->inp_flags2 &= ~INP_DONT_SACK_QUEUE;
+	inp->inp_flags2 &= ~INP_MBUF_ACKCMP;
+	inp->inp_flags2 &= ~INP_MBUF_L_ACKS;
+
 	TAILQ_INIT(&bbr->r_ctl.rc_map);
 	TAILQ_INIT(&bbr->r_ctl.rc_free);
 	TAILQ_INIT(&bbr->r_ctl.rc_tmap);
@@ -10074,8 +10080,8 @@
 
 		rsm = bbr_alloc(bbr);
 		if (rsm == NULL) {
-			uma_zfree(bbr_pcb_zone, tp->t_fb_ptr);
-			tp->t_fb_ptr = NULL;
+			uma_zfree(bbr_pcb_zone, *ptr);
+			*ptr = NULL;
 			return (ENOMEM);
 		}
 		rsm->r_rtt_not_allowed = 1;
@@ -10128,7 +10134,17 @@
 	 * the TCB on the hptsi wheel if a timer is needed with appropriate
 	 * flags.
 	 */
-	bbr_stop_all_timers(tp);
+	bbr_stop_all_timers(tp, bbr);
+	/* 
+	 * Validate the timers are not in usec, if they are convert.
+	 * BBR should in theory move to USEC and get rid of a
+	 * lot of the TICKS_2 calls.. but for now we stay
+	 * with tick timers.
+	 */
+	tcp_change_time_units(tp, TCP_TMR_GRANULARITY_TICKS);
+	TCPT_RANGESET(tp->t_rxtcur,
+	    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+	    tp->t_rttmin, TCPTV_REXMTMAX);
 	bbr_start_hpts_timer(bbr, tp, cts, 5, 0, 0);
 	return (0);
 }
@@ -10172,7 +10188,6 @@
 bbr_fini(struct tcpcb *tp, int32_t tcb_is_purged)
 {
 	if (tp->t_fb_ptr) {
-		struct inpcb *inp = tptoinpcb(tp);
 		uint32_t calc;
 		struct tcp_bbr *bbr;
 		struct bbr_sendmap *rsm;
@@ -10182,10 +10197,6 @@
 			tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp);
 		bbr_log_flowend(bbr);
 		bbr->rc_tp = NULL;
-		/* Backout any flags2 we applied */
-		inp->inp_flags2 &= ~INP_CANNOT_DO_ECN;
-		inp->inp_flags2 &= ~INP_SUPPORTS_MBUFQ;
-		inp->inp_flags2 &= ~INP_MBUF_QUEUE_READY;
 		if (bbr->bbr_hdrw_pacing)
 			counter_u64_add(bbr_flows_whdwr_pacing, -1);
 		else
@@ -11853,7 +11864,6 @@
 	int32_t isipv6;
 #endif
 	uint8_t app_limited = BBR_JR_SENT_DATA;
-	uint8_t filled_all = 0;
 	bbr = (struct tcp_bbr *)tp->t_fb_ptr;
 	/* We take a cache hit here */
 	memcpy(&bbr->rc_tv, tv, sizeof(struct timeval));
@@ -13162,7 +13172,7 @@
 				if_hw_tsomaxsegsize, msb,
 				((rsm == NULL) ? hw_tls : 0)
 #ifdef NETFLIX_COPY_ARGS
-				, &filled_all
+				, NULL, NULL
 #endif
 				);
 			if (len <= maxseg) {
@@ -13474,7 +13484,7 @@
 #endif
 
 	/* Log to the black box */
-	if (tcp_bblogging_on(bbr->rc_tp)) {
+	if (tcp_bblogging_on(tp)) {
 		union tcp_log_stackspecific log;
 
 		bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
@@ -13483,13 +13493,10 @@
 		log.u_bbr.flex2 = (bbr->r_recovery_bw << 3);
 		log.u_bbr.flex3 = maxseg;
 		log.u_bbr.flex4 = delay_calc;
-		/* Encode filled_all into the upper flex5 bit */
 		log.u_bbr.flex5 = bbr->rc_past_init_win;
 		log.u_bbr.flex5 <<= 1;
 		log.u_bbr.flex5 |= bbr->rc_no_pacing;
 		log.u_bbr.flex5 <<= 29;
-		if (filled_all)
-			log.u_bbr.flex5 |= 0x80000000;
 		log.u_bbr.flex5 |= tp->t_maxseg;
 		log.u_bbr.flex6 = bbr->r_ctl.rc_pace_max_segs;
 		log.u_bbr.flex7 = (bbr->rc_bbr_state << 8) | bbr_state_val(bbr);
@@ -14073,6 +14080,56 @@
 	return (0);
 }
 
+static void
+bbr_switch_failed(struct tcpcb *tp)
+{
+	/*
+	 * If a switch fails we only need to
+	 * make sure mbuf_queuing is still in place.
+	 * We also need to make sure we are still in
+	 * ticks granularity (though we should probably
+	 * change bbr to go to USECs).
+	 *
+	 * For timers we need to see if we are still in the
+	 * pacer (if our flags are up) if so we are good, if
+	 * not we need to get back into the pacer.
+	 */
+	struct inpcb *inp = tptoinpcb(tp);
+	struct timeval tv;
+	uint32_t cts;
+	uint32_t toval;
+	struct tcp_bbr *bbr;
+	struct hpts_diag diag;
+
+	inp->inp_flags2 |= INP_CANNOT_DO_ECN;
+	inp->inp_flags2 |= INP_SUPPORTS_MBUFQ;
+	tcp_change_time_units(tp, TCP_TMR_GRANULARITY_TICKS);
+	if (inp->inp_in_hpts) {
+		return;
+	}
+	bbr = (struct tcp_bbr *)tp->t_fb_ptr;
+	cts = tcp_get_usecs(&tv);
+	if (bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
+		if (TSTMP_GT(bbr->rc_pacer_started, cts)) {
+			toval = bbr->rc_pacer_started - cts;
+		} else {
+			/* one slot please */
+			toval = HPTS_TICKS_PER_SLOT;
+		}
+	} else if (bbr->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
+		if (TSTMP_GT(bbr->r_ctl.rc_timer_exp, cts)) {
+			toval = bbr->r_ctl.rc_timer_exp - cts;
+		} else {
+			/* one slot please */
+			toval = HPTS_TICKS_PER_SLOT;
+		}
+	} else
+		toval = HPTS_TICKS_PER_SLOT;
+	(void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(toval),
+				   __LINE__, &diag);
+	bbr_log_hpts_diag(bbr, cts, &diag);
+}
+
 struct tcp_function_block __tcp_bbr = {
 	.tfb_tcp_block_name = __XSTRING(STACKNAME),
 	.tfb_tcp_output = bbr_output,
@@ -14087,6 +14144,7 @@
 	.tfb_tcp_handoff_ok = bbr_handoff_ok,
 	.tfb_tcp_mtu_chg = bbr_mtu_chg,
 	.tfb_pru_options = bbr_pru_options,
+	.tfb_switch_failed = bbr_switch_failed,
 	.tfb_flags = TCP_FUNC_OUTPUT_CANDROP,
 };
 
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -458,7 +458,7 @@
 static uint32_t
 rack_get_pacing_len(struct tcp_rack *rack, uint64_t bw, uint32_t mss);
 static int32_t rack_handoff_ok(struct tcpcb *tp);
-static int32_t rack_init(struct tcpcb *tp);
+static int32_t rack_init(struct tcpcb *tp, void **ptr);
 static void rack_init_sysctls(void);
 static void
 rack_log_ack(struct tcpcb *tp, struct tcpopt *to,
@@ -12344,7 +12344,7 @@
 }
 
 static int
-rack_init(struct tcpcb *tp)
+rack_init(struct tcpcb *tp, void **ptr)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct tcp_rack *rack = NULL;
@@ -12354,8 +12354,8 @@
 	uint32_t iwin, snt, us_cts;
 	int err;
 
-	tp->t_fb_ptr = uma_zalloc(rack_pcb_zone, M_NOWAIT);
-	if (tp->t_fb_ptr == NULL) {
+	*ptr = uma_zalloc(rack_pcb_zone, M_NOWAIT);
+	if (*ptr == NULL) {
 		/*
 		 * We need to allocate memory but cant. The INP and INP_INFO
 		 * locks and they are recursive (happens during setup. So a
@@ -12364,9 +12364,9 @@
 		 */
 		return (ENOMEM);
 	}
-	memset(tp->t_fb_ptr, 0, sizeof(struct tcp_rack));
+	memset(ptr, 0, sizeof(struct tcp_rack));
 
-	rack = (struct tcp_rack *)tp->t_fb_ptr;
+	rack = (struct tcp_rack *)ptr;
 	RB_INIT(&rack->r_ctl.rc_mtree);
 	TAILQ_INIT(&rack->r_ctl.rc_free);
 	TAILQ_INIT(&rack->r_ctl.rc_tmap);
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -109,6 +109,7 @@
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_hpts.h>
+#include <netinet/tcp_lro.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcpip.h>
 #include <netinet/tcp_fastopen.h>
@@ -152,6 +153,11 @@
     CTLFLAG_RW,
     &tcp_force_detection, 0,
     "Do we force detection even if the INP has it off?");
+int32_t tcp_sad_limit = 10000;
+SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, limit,
+    CTLFLAG_RW,
+    &tcp_sad_limit, 10000,
+    "If SaD is enabled, what is the limit to sendmap entries (0 = unlimited)?");
 int32_t tcp_sack_to_ack_thresh = 700;	/* 70 % */
 SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
     CTLFLAG_RW,
@@ -363,7 +369,7 @@
 VNET_DEFINE_STATIC(u_char, ts_offset_secret[TS_OFFSET_SECRET_LENGTH]);
 #define	V_ts_offset_secret	VNET(ts_offset_secret)
 
-static int	tcp_default_fb_init(struct tcpcb *tp);
+static int	tcp_default_fb_init(struct tcpcb *tp, void **ptr);
 static void	tcp_default_fb_fini(struct tcpcb *tp, int tcb_is_purged);
 static int	tcp_default_handoff_ok(struct tcpcb *tp);
 static struct inpcb *tcp_notify(struct inpcb *, int);
@@ -519,18 +525,11 @@
 tcp_switch_back_to_default(struct tcpcb *tp)
 {
 	struct tcp_function_block *tfb;
+	void *ptr = NULL;
 
 	KASSERT(tp->t_fb != &tcp_def_funcblk,
 	    ("%s: called by the built-in default stack", __func__));
 
-	/*
-	 * Release the old stack. This function will either find a new one
-	 * or panic.
-	 */
-	if (tp->t_fb->tfb_tcp_fb_fini != NULL)
-		(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
-	refcount_release(&tp->t_fb->tfb_refcnt);
-
 	/*
 	 * Now, we'll find a new function block to use.
 	 * Start by trying the current user-selected
@@ -551,14 +550,20 @@
 	/* Try to use that stack. */
 	if (tfb != NULL) {
 		/* Initialize the new stack. If it succeeds, we are done. */
-		tp->t_fb = tfb;
-		if (tp->t_fb->tfb_tcp_fb_init == NULL ||
-		    (*tp->t_fb->tfb_tcp_fb_init)(tp) == 0)
+		if (tfb->tfb_tcp_fb_init == NULL ||
+		    (*tfb->tfb_tcp_fb_init)(tp, &ptr) == 0) {
+			/* Release the old stack */
+			if (tp->t_fb->tfb_tcp_fb_fini != NULL)
+				(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
+			refcount_release(&tp->t_fb->tfb_refcnt);
+			/* Now set in all the pointers */
+			tp->t_fb = tfb;
+			tp->t_fb_ptr = ptr;
 			return;
-
+		}
 		/*
 		 * Initialization failed. Release the reference count on
-		 * the stack.
+		 * the looked up default stack.
 		 */
 		refcount_release(&tfb->tfb_refcnt);
 	}
@@ -578,12 +583,18 @@
 			panic("Default stack rejects a new session?");
 		}
 	}
-	tp->t_fb = tfb;
-	if (tp->t_fb->tfb_tcp_fb_init != NULL &&
-	    (*tp->t_fb->tfb_tcp_fb_init)(tp)) {
+	if (tfb->tfb_tcp_fb_init != NULL &&
+	    (*tfb->tfb_tcp_fb_init)(tp, &ptr)) {
 		/* The default stack cannot fail */
 		panic("Default stack initialization failed");
 	}
+	/* Now release the old stack */
+	if (tp->t_fb->tfb_tcp_fb_fini != NULL)
+		(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
+	refcount_release(&tp->t_fb->tfb_refcnt);
+	/* And set in the pointers to the new */
+	tp->t_fb = tfb;
+	tp->t_fb_ptr = ptr;
 }
 
 static bool
@@ -1040,16 +1051,37 @@
  * it is required to always succeed since it is the stack of last resort!
  */
 static int
-tcp_default_fb_init(struct tcpcb *tp)
+tcp_default_fb_init(struct tcpcb *tp, void **ptr)
 {
 	struct socket *so = tptosocket(tp);
+	int rexmt;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
+	/* We don't use the pointer */
+	*ptr = NULL;
 
 	KASSERT(tp->t_state >= 0 && tp->t_state < TCPS_TIME_WAIT,
 	    ("%s: connection %p in unexpected state %d", __func__, tp,
 	    tp->t_state));
 
+	/* Make sure we get no interesting mbuf queuing behavior */
+	/* All mbuf queue/ack compress flags should be off */
+	tcp_lro_features_off(tptoinpcb(tp));
+
+	/* Cancel the GP measurement in progress */
+	tp->t_flags &= ~TF_GPUTINPROG;
+	/* Validate the timers are not in usec, if they are convert */
+	tcp_change_time_units(tp, TCP_TMR_GRANULARITY_TICKS);
+	if ((tp->t_state == TCPS_SYN_SENT) ||
+	    (tp->t_state == TCPS_SYN_RECEIVED))
+		rexmt = tcp_rexmit_initial * tcp_backoff[tp->t_rxtshift];
+	else
+		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
+	if (tp->t_rxtshift == 0)
+		tp->t_rxtcur = rexmt;
+	else
+		TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, TCPTV_REXMTMAX);
+
 	/*
 	 * Nothing to do for ESTABLISHED or LISTEN states. And, we don't
 	 * know what to do for unexpected states (which includes TIME_WAIT).
@@ -2240,6 +2272,8 @@
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->t_rcvtime = ticks;
+	/* We always start with ticks granularity */
+	tp->t_tmr_granularity = TCP_TMR_GRANULARITY_TICKS;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
@@ -2265,7 +2299,7 @@
 #endif
 	tp->t_pacing_rate = -1;
 	if (tp->t_fb->tfb_tcp_fb_init) {
-		if ((*tp->t_fb->tfb_tcp_fb_init)(tp)) {
+		if ((*tp->t_fb->tfb_tcp_fb_init)(tp, &tp->t_fb_ptr)) {
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			return (NULL);
 		}
@@ -4019,3 +4053,524 @@
 	}
 }
 #endif
+
+void
+tcp_change_time_units(struct tcpcb *tp, int granularity)
+{
+	if (tp->t_tmr_granularity == granularity) {
+		/* We are there */
+		return;
+	}
+	if (granularity == TCP_TMR_GRANULARITY_USEC) {
+		KASSERT((tp->t_tmr_granularity == TCP_TMR_GRANULARITY_TICKS),
+			("Granularity is not TICKS its %u in tp:%p",
+			 tp->t_tmr_granularity, tp));
+		tp->t_rttlow = TICKS_2_USEC(tp->t_rttlow);
+		if (tp->t_srtt > 1) {
+			uint32_t val, frac;
+
+			val = tp->t_srtt >> TCP_RTT_SHIFT;
+			frac = tp->t_srtt & 0x1f;
+			tp->t_srtt = TICKS_2_USEC(val);
+			/*
+			 * frac is the fractional part of the srtt (if any)
+			 * but its in ticks and every bit represents
+			 * 1/32nd of a hz.
+			 */
+			if (frac) {
+				if (hz == 1000) {
+					frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_MSEC) / (uint64_t)TCP_RTT_SCALE);
+				} else {
+					frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_SEC) / ((uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE));
+				}
+				tp->t_srtt += frac;
+			}
+		}
+		if (tp->t_rttvar) {
+			uint32_t val, frac;
+
+			val = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
+			frac = tp->t_rttvar & 0x1f;
+			tp->t_rttvar = TICKS_2_USEC(val);
+			/*
+			 * frac is the fractional part of the srtt (if any)
+			 * but its in ticks and every bit represents
+			 * 1/32nd of a hz.
+			 */
+			if (frac) {
+				if (hz == 1000) {
+					frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_MSEC) / (uint64_t)TCP_RTT_SCALE);
+				} else {
+					frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_SEC) / ((uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE));
+				}
+				tp->t_rttvar += frac;
+			}
+		}
+		tp->t_tmr_granularity = TCP_TMR_GRANULARITY_USEC;
+	} else if (granularity == TCP_TMR_GRANULARITY_TICKS) {
+		/* Convert back to ticks, with  */
+		KASSERT((tp->t_tmr_granularity == TCP_TMR_GRANULARITY_USEC),
+			("Granularity is not USEC its %u in tp:%p",
+			 tp->t_tmr_granularity, tp));
+		if (tp->t_srtt > 1) {
+			uint32_t val, frac;
+
+			val = USEC_2_TICKS(tp->t_srtt);
+			frac = tp->t_srtt % (HPTS_USEC_IN_SEC / hz);
+			tp->t_srtt = val << TCP_RTT_SHIFT;
+			/*
+			 * frac is the fractional part here is left
+			 * over from converting to hz and shifting.
+			 * We need to convert this to the 5 bit
+			 * remainder.
+			 */
+			if (frac) {
+				if (hz == 1000) {
+					frac = (((uint64_t)frac *  (uint64_t)TCP_RTT_SCALE) / (uint64_t)HPTS_USEC_IN_MSEC);
+				} else {
+					frac = (((uint64_t)frac * (uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE) /(uint64_t)HPTS_USEC_IN_SEC);
+				}
+				tp->t_srtt += frac;
+			}
+		}
+		if (tp->t_rttvar) {
+			uint32_t val, frac;
+
+			val = USEC_2_TICKS(tp->t_rttvar);
+			frac = tp->t_srtt % (HPTS_USEC_IN_SEC / hz);
+			tp->t_rttvar = val <<  TCP_RTTVAR_SHIFT;
+			/*
+			 * frac is the fractional part here is left
+			 * over from converting to hz and shifting.
+			 * We need to convert this to the 5 bit
+			 * remainder.
+			 */
+			if (frac) {
+				if (hz == 1000) {
+					frac = (((uint64_t)frac *  (uint64_t)TCP_RTT_SCALE) / (uint64_t)HPTS_USEC_IN_MSEC);
+				} else {
+					frac = (((uint64_t)frac * (uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE) /(uint64_t)HPTS_USEC_IN_SEC);
+				}
+				tp->t_rttvar += frac;
+			}
+		}
+		tp->t_rttlow = USEC_2_TICKS(tp->t_rttlow);
+		tp->t_tmr_granularity = TCP_TMR_GRANULARITY_TICKS;
+	}
+#ifdef INVARIANTS
+	else {
+		panic("Unknown granularity:%d tp:%p",
+		      granularity, tp);
+	}
+#endif	
+}
+
+void
+tcp_handle_orphaned_packets(struct tcpcb *tp)
+{
+	struct mbuf *save, *m, *prev;
+	/*
+	 * Called when a stack switch is occuring from the fini()
+	 * of the old stack. We assue the init() as already been
+	 * run of the new stack and it has set the inp_flags2 to
+	 * what it supports. This function will then deal with any
+	 * differences i.e. cleanup packets that maybe queued that
+	 * the newstack does not support.
+	 */
+
+	if (tptoinpcb(tp)->inp_flags2 & INP_MBUF_L_ACKS)
+		return;
+	if ((tptoinpcb(tp)->inp_flags2 & INP_SUPPORTS_MBUFQ) == 0) {
+		/*
+		 * It is unsafe to process the packets since a
+		 * reset may be lurking in them (its rare but it
+		 * can occur). If we were to find a RST, then we
+		 * would end up dropping the connection and the
+		 * INP lock, so when we return the caller (tcp_usrreq)
+		 * will blow up when it trys to unlock the inp.
+		 * This new stack does not do any fancy LRO features
+		 * so all we can do is toss the packets.
+		 */
+		m = tp->t_in_pkt;
+		tp->t_in_pkt = NULL;
+		tp->t_tail_pkt = NULL;
+		while (m) {
+			save = m->m_nextpkt;
+			m->m_nextpkt = NULL;
+			m_freem(m);
+			m = save;
+		}
+	} else {
+		/*
+		 * Here we have a stack that does mbuf queuing but
+		 * does not support compressed ack's. We must
+		 * walk all the mbufs and discard any compressed acks.
+		 */
+		m = tp->t_in_pkt;
+		prev = NULL;
+		while (m) {
+			if (m->m_flags & M_ACKCMP) {
+				/* We must toss this packet */
+				if (tp->t_tail_pkt == m)
+					tp->t_tail_pkt = prev;
+				if (prev)
+					prev->m_nextpkt = m->m_nextpkt;
+				else
+					tp->t_in_pkt =  m->m_nextpkt;
+				m->m_nextpkt = NULL;
+				m_freem(m);
+				/* move forward */
+				if (prev)
+					m = prev->m_nextpkt;
+				else
+					m = tp->t_in_pkt;
+			} else {
+				/* this one is ok */
+				prev = m;
+				m = m->m_nextpkt;
+			}
+		}
+	}
+}
+
+#ifdef TCP_REQUEST_TRK
+uint32_t
+tcp_estimate_tls_overhead(struct socket *so, uint64_t tls_usr_bytes)
+{
+#ifdef KERN_TLS
+	struct ktls_session *tls;
+	uint32_t rec_oh, records;
+
+	tls = so->so_snd.sb_tls_info;
+	if (tls == NULL)
+	    return (0);
+
+	rec_oh = tls->params.tls_hlen + tls->params.tls_tlen;
+	records = ((tls_usr_bytes + tls->params.max_frame_len - 1)/tls->params.max_frame_len);
+	return (records * rec_oh);
+#else
+	return (0);
+#endif
+}
+
+extern uint32_t tcp_stale_entry_time;
+uint32_t tcp_stale_entry_time = 250000;
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, usrlog_stale, CTLFLAG_RW,
+    &tcp_stale_entry_time, 250000, "Time that a http entry without a sendfile ages out");
+
+void
+tcp_http_log_req_info(struct tcpcb *tp, struct http_sendfile_track *http,
+    uint16_t slot, uint8_t val, uint64_t offset, uint64_t nbytes)
+{
+	if (tcp_bblogging_on(tp)) {
+		union tcp_log_stackspecific log;
+		struct timeval tv;
+
+		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+#ifdef TCPHPTS
+		log.u_bbr.inhpts = tcp_in_hpts(tptoinpcb(tp));
+#endif
+		log.u_bbr.flex8 = val;
+		log.u_bbr.rttProp = http->timestamp;
+		log.u_bbr.delRate = http->start;
+		log.u_bbr.cur_del_rate = http->end;
+		log.u_bbr.flex1 = http->start_seq;
+		log.u_bbr.flex2 = http->end_seq;
+		log.u_bbr.flex3 = http->flags;
+		log.u_bbr.flex4 = ((http->localtime >> 32) & 0x00000000ffffffff);
+		log.u_bbr.flex5 = (http->localtime & 0x00000000ffffffff);
+		log.u_bbr.flex7 = slot;
+		log.u_bbr.bw_inuse = offset;
+		/* nbytes = flex6 | epoch */
+		log.u_bbr.flex6 = ((nbytes >> 32) & 0x00000000ffffffff);
+		log.u_bbr.epoch = (nbytes & 0x00000000ffffffff);
+		/* cspr =  lt_epoch | pkts_out */
+		log.u_bbr.lt_epoch = ((http->cspr >> 32) & 0x00000000ffffffff);
+		log.u_bbr.pkts_out |= (http->cspr & 0x00000000ffffffff);
+		log.u_bbr.applimited = tp->t_http_closed;
+		log.u_bbr.applimited <<= 8;
+		log.u_bbr.applimited |= tp->t_http_open;
+		log.u_bbr.applimited <<= 8;
+		log.u_bbr.applimited |= tp->t_http_req;
+		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+		TCP_LOG_EVENTP(tp, NULL,
+		    &tptosocket(tp)->so_rcv,
+		    &tptosocket(tp)->so_snd,
+		    TCP_LOG_HTTP_T, 0,
+		    0, &log, false, &tv);
+	}
+}
+
+void
+tcp_http_free_a_slot(struct tcpcb *tp, struct http_sendfile_track *ent)
+{
+	if (tp->t_http_req > 0)
+		tp->t_http_req--;
+	if (ent->flags & TCP_HTTP_TRACK_FLG_OPEN) {
+		if (tp->t_http_open > 0)
+			tp->t_http_open--;
+	} else {
+		if (tp->t_http_closed > 0)
+			tp->t_http_closed--;
+	}
+	ent->flags = TCP_HTTP_TRACK_FLG_EMPTY;
+}
+
+static void
+tcp_http_check_for_stale_entries(struct tcpcb *tp, uint64_t ts, int rm_oldest)
+{
+	struct http_sendfile_track *ent;
+	uint64_t time_delta, oldest_delta;
+	int i, oldest, oldest_set = 0, cnt_rm = 0;
+
+	for(i = 0; i < MAX_TCP_HTTP_REQ; i++) {
+		ent = &tp->t_http_info[i];
+		if (ent->flags != TCP_HTTP_TRACK_FLG_USED) {
+			/*
+			 * We only care about closed end ranges
+			 * that are allocated and have no sendfile
+			 * ever touching them. They would be in
+			 * state USED.
+			 */
+			continue;
+		}
+		if (ts >= ent->localtime)
+			time_delta = ts - ent->localtime;
+		else
+			time_delta = 0;
+		if (time_delta &&
+		    ((oldest_delta < time_delta) || (oldest_set == 0))) {
+			oldest_set = 1;
+			oldest = i;
+			oldest_delta = time_delta;
+		}
+		if (tcp_stale_entry_time && (time_delta >= tcp_stale_entry_time)) {
+			/*
+			 * No sendfile in a our time-limit
+			 * time to purge it.
+			 */
+			cnt_rm++;
+			tcp_http_log_req_info(tp, &tp->t_http_info[i], i, TCP_HTTP_REQ_LOG_STALE,
+					      time_delta, 0);
+			tcp_http_free_a_slot(tp, ent);
+		}
+	}
+	if ((cnt_rm == 0) && rm_oldest && oldest_set) {
+		ent = &tp->t_http_info[oldest];
+		tcp_http_log_req_info(tp, &tp->t_http_info[i], i, TCP_HTTP_REQ_LOG_STALE,
+				      oldest_delta, 1);
+		tcp_http_free_a_slot(tp, ent);
+	}
+}
+
+int
+tcp_http_check_for_comp(struct tcpcb *tp, tcp_seq ack_point)
+{
+	int i, ret=0;
+	struct http_sendfile_track *ent;
+
+	/* Clean up any old closed end requests that are now completed */
+	if (tp->t_http_req == 0)
+		return(0);
+	if (tp->t_http_closed == 0)
+		return(0);
+	for(i = 0; i < MAX_TCP_HTTP_REQ; i++) {
+		ent = &tp->t_http_info[i];
+		/* Skip empty ones */
+		if (ent->flags == TCP_HTTP_TRACK_FLG_EMPTY)
+			continue;
+		/* Skip open ones */
+		if (ent->flags & TCP_HTTP_TRACK_FLG_OPEN)
+			continue;
+		if (SEQ_GEQ(ack_point, ent->end_seq)) {
+			/* We are past it -- free it */
+			tcp_http_log_req_info(tp, ent,
+					      i, TCP_HTTP_REQ_LOG_FREED, 0, 0);
+			tcp_http_free_a_slot(tp, ent);
+			ret++;
+		}
+	}
+	return (ret);
+}
+
+int
+tcp_http_is_entry_comp(struct tcpcb *tp, struct http_sendfile_track *ent, tcp_seq ack_point)
+{
+	if (tp->t_http_req == 0)
+		return(-1);
+	if (tp->t_http_closed == 0)
+		return(-1);
+	if (ent->flags == TCP_HTTP_TRACK_FLG_EMPTY)
+		return(-1);
+	if (SEQ_GEQ(ack_point, ent->end_seq)) {
+		return (1);
+	}
+	return (0);
+}
+
+struct http_sendfile_track *
+tcp_http_find_a_req_that_is_completed_by(struct tcpcb *tp, tcp_seq th_ack, int *ip)
+{
+	/*
+	 * Given an ack point (th_ack) walk through our entries and
+	 * return the first one found that th_ack goes past the
+	 * end_seq.
+	 */
+	struct http_sendfile_track *ent;
+	int i;
+
+	if (tp->t_http_req == 0) {
+		/* none open */
+		return (NULL);
+	}
+	for(i = 0; i < MAX_TCP_HTTP_REQ; i++) {
+		ent = &tp->t_http_info[i];
+		if (ent->flags == TCP_HTTP_TRACK_FLG_EMPTY)
+			continue;
+		if ((ent->flags & TCP_HTTP_TRACK_FLG_OPEN) == 0) {
+			if (SEQ_GEQ(th_ack, ent->end_seq)) {
+				*ip = i;
+				return (ent);
+			}
+		}
+	}
+	return (NULL);
+}
+
+struct http_sendfile_track *
+tcp_http_find_req_for_seq(struct tcpcb *tp, tcp_seq seq)
+{
+	struct http_sendfile_track *ent;
+	int i;
+
+	if (tp->t_http_req == 0) {
+		/* none open */
+		return (NULL);
+	}
+	for(i = 0; i < MAX_TCP_HTTP_REQ; i++) {
+		ent = &tp->t_http_info[i];
+		tcp_http_log_req_info(tp, ent, i, TCP_HTTP_REQ_LOG_SEARCH,
+				      (uint64_t)seq, 0);
+		if (ent->flags == TCP_HTTP_TRACK_FLG_EMPTY) {
+			continue;
+		}
+		if (ent->flags & TCP_HTTP_TRACK_FLG_OPEN) {
+			/*
+			 * An open end request only needs to
+			 * match the beginning seq or be
+			 * all we have (once we keep going on
+			 * a open end request we may have a seq
+			 * wrap).
+			 */
+			if ((SEQ_GEQ(seq, ent->start_seq)) ||
+			    (tp->t_http_closed == 0))
+				return (ent);
+		} else {
+			/*
+			 * For this one we need to
+			 * be a bit more careful if its
+			 * completed at least.
+			 */
+			if ((SEQ_GEQ(seq, ent->start_seq)) &&
+			    (SEQ_LT(seq, ent->end_seq))) {
+				return (ent);
+			}
+		}
+	}
+	return (NULL);
+}
+
+/* Should this be in its own file tcp_http.c ? */
+struct http_sendfile_track *
+tcp_http_alloc_req_full(struct tcpcb *tp, struct http_req *req, uint64_t ts, int rec_dups)
+{
+	struct http_sendfile_track *fil;
+	int i, allocated;
+
+	/* In case the stack does not check for completions do so now */
+	tcp_http_check_for_comp(tp, tp->snd_una);
+	/* Check for stale entries */
+	if (tp->t_http_req)
+		tcp_http_check_for_stale_entries(tp, ts,
+		    (tp->t_http_req >= MAX_TCP_HTTP_REQ));
+	/* Check to see if this is a duplicate of one not started */
+	if (tp->t_http_req) {
+		for(i = 0, allocated = 0; i < MAX_TCP_HTTP_REQ; i++) {
+			fil = &tp->t_http_info[i];
+			if (fil->flags != TCP_HTTP_TRACK_FLG_USED)
+				continue;
+			if ((fil->timestamp == req->timestamp) &&
+			    (fil->start == req->start) &&
+			    ((fil->flags & TCP_HTTP_TRACK_FLG_OPEN) ||
+			     (fil->end == req->end))) {
+				/*
+				 * We already have this request
+				 * and it has not been started with sendfile.
+				 * This probably means the user was returned
+				 * a 4xx of some sort and its going to age
+				 * out, lets not duplicate it.
+				 */
+				return(fil);
+			}
+		}
+	}
+	/* Ok if there is no room at the inn we are in trouble */
+	if (tp->t_http_req >= MAX_TCP_HTTP_REQ) {
+		tcp_trace_point(tp, TCP_TP_HTTP_LOG_FAIL);
+		for(i = 0; i < MAX_TCP_HTTP_REQ; i++) {
+			tcp_http_log_req_info(tp, &tp->t_http_info[i],
+			    i, TCP_HTTP_REQ_LOG_ALLOCFAIL, 0, 0);
+		}
+		return (NULL);
+	}
+	for(i = 0, allocated = 0; i < MAX_TCP_HTTP_REQ; i++) {
+		fil = &tp->t_http_info[i];
+		if (fil->flags == TCP_HTTP_TRACK_FLG_EMPTY) {
+			allocated = 1;
+			fil->flags = TCP_HTTP_TRACK_FLG_USED;
+			fil->timestamp = req->timestamp;
+			fil->localtime = ts;
+			fil->start = req->start;
+			if (req->flags & TCP_LOG_HTTPD_RANGE_END) {
+				fil->end = req->end;
+			} else {
+				fil->end = 0;
+				fil->flags |= TCP_HTTP_TRACK_FLG_OPEN;
+			}
+			/*
+			 * We can set the min boundaries to the TCP Sequence space,
+			 * but it might be found to be further up when sendfile
+			 * actually runs on this range (if it ever does).
+			 */
+			fil->sbcc_at_s = tptosocket(tp)->so_snd.sb_ccc;
+			fil->start_seq = tp->snd_una +
+			    tptosocket(tp)->so_snd.sb_ccc;
+			fil->end_seq = (fil->start_seq + ((uint32_t)(fil->end - fil->start)));
+			if (tptosocket(tp)->so_snd.sb_tls_info) {
+				/*
+				 * This session is doing TLS. Take a swag guess
+				 * at the overhead.
+				 */
+				fil->end_seq += tcp_estimate_tls_overhead(
+				    tptosocket(tp), (fil->end - fil->start));
+			}
+			tp->t_http_req++;
+			if (fil->flags & TCP_HTTP_TRACK_FLG_OPEN)
+				tp->t_http_open++;
+			else
+				tp->t_http_closed++;
+			tcp_http_log_req_info(tp, fil, i,
+			    TCP_HTTP_REQ_LOG_NEW, 0, 0);
+			break;
+		} else
+			fil = NULL;
+	}
+	return (fil);
+}
+
+void
+tcp_http_alloc_req(struct tcpcb *tp, union tcp_log_userdata *user, uint64_t ts)
+{
+	(void)tcp_http_alloc_req_full(tp, &user->http_req, ts, 1);
+}
+#endif
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -932,22 +932,27 @@
 		 * pickup one on the new entry.
 		 */
 		struct tcp_function_block *rblk;
+		void *ptr = NULL;
 
 		rblk = find_and_ref_tcp_fb(blk);
 		KASSERT(rblk != NULL,
 		    ("cannot find blk %p out of syncache?", blk));
-		if (tp->t_fb->tfb_tcp_fb_fini)
-			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
-		refcount_release(&tp->t_fb->tfb_refcnt);
-		tp->t_fb = rblk;
-		/*
-		 * XXXrrs this is quite dangerous, it is possible
-		 * for the new function to fail to init. We also
-		 * are not asking if the handoff_is_ok though at
-		 * the very start thats probalbly ok.
-		 */
-		if (tp->t_fb->tfb_tcp_fb_init) {
-			(*tp->t_fb->tfb_tcp_fb_init)(tp);
+
+		if (rblk->tfb_tcp_fb_init == NULL ||
+		    (*rblk->tfb_tcp_fb_init)(tp, &ptr) == 0) {
+			/* Release the old stack */
+			if (tp->t_fb->tfb_tcp_fb_fini != NULL)
+				(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
+			refcount_release(&tp->t_fb->tfb_refcnt);
+			/* Now set in all the pointers */
+			tp->t_fb = rblk;
+			tp->t_fb_ptr = ptr;
+		} else {
+			/*
+			 * Initialization failed. Release the reference count on
+			 * the looked up default stack.
+			 */
+			refcount_release(&rblk->tfb_refcnt);
 		}
 	}
 	tp->snd_wl1 = sc->sc_irs;
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1659,6 +1659,7 @@
 		 */
 		struct tcp_function_set fsn;
 		struct tcp_function_block *blk;
+		void *ptr = NULL;
 
 		INP_WUNLOCK(inp);
 		error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn);
@@ -1666,10 +1667,6 @@
 			return (error);
 
 		INP_WLOCK(inp);
-		if (inp->inp_flags & INP_DROPPED) {
-			INP_WUNLOCK(inp);
-			return (ECONNRESET);
-		}
 		tp = intotcpcb(inp);
 
 		blk = find_and_ref_tcp_functions(&fsn);
@@ -1710,41 +1707,57 @@
 			return (ENOENT);
 		}
 		/*
-		 * Release the old refcnt, the
-		 * lookup acquired a ref on the
-		 * new one already.
+		 * Ensure the new stack takes ownership with a
+		 * clean slate on peak rate threshold.
 		 */
-		if (tp->t_fb->tfb_tcp_fb_fini) {
-			struct epoch_tracker et;
-			/*
-			 * Tell the stack to cleanup with 0 i.e.
-			 * the tcb is not going away.
-			 */
-			NET_EPOCH_ENTER(et);
-			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
-			NET_EPOCH_EXIT(et);
-		}
+		tp->t_peakrate_thr = 0;
 #ifdef TCPHPTS
 		/* Assure that we are not on any hpts */
 		tcp_hpts_remove(tptoinpcb(tp));
 #endif
 		if (blk->tfb_tcp_fb_init) {
-			error = (*blk->tfb_tcp_fb_init)(tp);
+			error = (*blk->tfb_tcp_fb_init)(tp, &ptr);
 			if (error) {
+				/*
+				 * Release the ref count the lookup
+				 * acquired.
+				 */ 
 				refcount_release(&blk->tfb_refcnt);
-				if (tp->t_fb->tfb_tcp_fb_init) {
-					if((*tp->t_fb->tfb_tcp_fb_init)(tp) != 0)  {
-						/* Fall back failed, drop the connection */
-						INP_WUNLOCK(inp);
-						soabort(so);
-						return (error);
-					}
+				/* 
+				 * Now there is a chance that the
+				 * init() function mucked with some
+				 * things before it failed, such as
+				 * hpts or inp_flags2 or timer granularity.
+				 * It should not of, but lets give the old
+				 * stack a chance to reset to a known good state.
+				 */
+				if (tp->t_fb->tfb_switch_failed) {
+					(*tp->t_fb->tfb_switch_failed)(tp);
 				}
-				goto err_out;
+			 	goto err_out;
 			}
 		}
+		if (tp->t_fb->tfb_tcp_fb_fini) {
+			struct epoch_tracker et;
+			/*
+			 * Tell the stack to cleanup with 0 i.e.
+			 * the tcb is not going away.
+			 */
+			NET_EPOCH_ENTER(et);
+			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
+			NET_EPOCH_EXIT(et);
+		}
+		/*
+		 * Release the old refcnt, the
+		 * lookup acquired a ref on the
+		 * new one already.
+		 */
 		refcount_release(&tp->t_fb->tfb_refcnt);
+		/* 
+		 * Set in the new stack.
+		 */
 		tp->t_fb = blk;
+		tp->t_fb_ptr = ptr;
 #ifdef TCP_OFFLOAD
 		if (tp->t_flags & TF_TOE) {
 			tcp_offload_ctloutput(tp, sopt->sopt_dir,
@@ -1754,6 +1767,7 @@
 err_out:
 		INP_WUNLOCK(inp);
 		return (error);
+
 	}
 
 	/* Pass in the INP locked, callee must unlock it. */
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -61,6 +61,15 @@
 #define TCP_EI_STATUS_2MSL		0xb
 #define TCP_EI_STATUS_MAX_VALUE		0xb
 
+#define TCP_HTTP_REQ_LOG_NEW		0x01
+#define TCP_HTTP_REQ_LOG_COMPLETE	0x02
+#define TCP_HTTP_REQ_LOG_FREED		0x03
+#define TCP_HTTP_REQ_LOG_ALLOCFAIL	0x04
+#define TCP_HTTP_REQ_LOG_MOREYET	0x05
+#define TCP_HTTP_REQ_LOG_FORCEFREE	0x06
+#define TCP_HTTP_REQ_LOG_STALE		0x07
+#define TCP_HTTP_REQ_LOG_SEARCH		0x08
+
 /************************************************/
 /* Status bits we track to assure no duplicates,
  * the bits here are not used by the code but
@@ -126,6 +135,154 @@
 
 STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
 
+#define TCP_HTTP_TRACK_FLG_EMPTY 0x00	/* Available */
+#define TCP_HTTP_TRACK_FLG_USED  0x01	/* In use */
+#define TCP_HTTP_TRACK_FLG_OPEN  0x02	/* End is not valid (open range request) */
+#define TCP_HTTP_TRACK_FLG_SEQV  0x04	/* We had a sendfile that touched it  */
+#define TCP_HTTP_TRACK_FLG_COMP  0x08	/* Sendfile as placed the last bits (range req only) */
+#define TCP_HTTP_TRACK_FLG_FSND	 0x10	/* First send has been done into the seq space */
+#define MAX_TCP_HTTP_REQ 5		/* Max we will have at once */
+
+#ifdef TCP_REQUEST_TRK
+struct http_sendfile_track {
+	uint64_t timestamp;	/* User sent timestamp */
+	uint64_t start;		/* Start of sendfile offset */
+	uint64_t end;		/* End if not open-range req */
+	uint64_t localtime;	/* Time we actually got the req */
+	uint64_t deadline;	/* If in CU mode, deadline to delivery */
+	uint64_t first_send;	/* Time of first send in the range */
+	uint64_t cspr;		/* Client suggested pace rate */
+	uint64_t sent_at_fs;	/* What was t_sndbytes as we begun sending */
+	uint64_t rxt_at_fs;	/* What was t_snd_rxt_bytes as we begun sending */
+	tcp_seq start_seq;	/* First TCP Seq assigned */
+	tcp_seq end_seq;	/* If range req last seq */
+	uint32_t flags;		/* Type of request open etc */
+	uint32_t sbcc_at_s;	/* When we allocate what is the sb_cc */
+	uint32_t hint_maxseg;	/* Client hinted maxseg */
+	uint32_t hybrid_flags;	/* Hybrid flags on this request */
+};
+
+#endif
+
+/*
+ * Change Query responses for a stack switch we create a structure
+ * that allows query response from the new stack to the old, if
+ * supported.
+ *
+ * There are three queries currently defined.
+ *  - sendmap
+ *  - timers
+ *  - rack_times
+ *
+ * For the sendmap query the caller fills in the
+ * req and the req_param as the first seq (usually
+ * snd_una). When the response comes back indicating
+ * that there was data (return value 1), then the caller
+ * can build a sendmap entry based on the range and the
+ * times. The next query would then be done at the 
+ * newly created sendmap_end. Repeated until sendmap_end == snd_max.
+ *
+ * Flags in sendmap_flags are defined below as well.
+ *
+ * For timers the standard PACE_TMR_XXXX flags are returned indicating
+ * a pacing timer (possibly) and one other timer. If pacing timer then
+ * the expiration timeout time in microseconds is in timer_pacing_to.
+ * And the value used with whatever timer (if a flag is set) is in
+ * timer_rxt. If no timers are running a 0 is returned and of
+ * course no flags are set in timer_hpts_flags.
+ *
+ * The rack_times are a misc collection of information that
+ * the old stack might possibly fill in. Of course its possible
+ * that an old stack may not have a piece of information. If so
+ * then setting that value to zero is advised. Setting any 
+ * timestamp passed should only place a zero in it when it
+ * is unfilled. This may mean that a time is off by a micro-second
+ * but this is ok in the grand scheme of things.
+ *
+ * When switching stacks it is desireable to get as much information
+ * from the old stack to the new stack as possible. Though not always
+ * will the stack be compatible in the types of information. The
+ * init() function needs to take care when it begins changing 
+ * things such as inp_flags2 and the timer units to position these
+ * changes at a point where it is unlikely they will fail after
+ * making such changes. A stack optionally can have an "undo"
+ * function  
+ *
+ * To transfer information to the old stack from the new in 
+ * respect to LRO and the inp_flags2, the new stack should set
+ * the inp_flags2 to what it supports. The old stack in its
+ * fini() function should call the tcp_handle_orphaned_packets()
+ * to clean up any packets. Note that a new stack should attempt
+ */
+
+/* Query types */
+#define TCP_QUERY_SENDMAP	1
+#define TCP_QUERY_TIMERS_UP	2
+#define TCP_QUERY_RACK_TIMES	3
+
+/* Flags returned in sendmap_flags */
+#define SNDMAP_ACKED		0x000001/* The remote endpoint acked this */
+#define SNDMAP_OVERMAX		0x000008/* We have more retran's then we can fit */
+#define SNDMAP_SACK_PASSED	0x000010/* A sack was done above this block */
+#define SNDMAP_HAS_FIN		0x000040/* segment is sent with fin */
+#define SNDMAP_TLP		0x000080/* segment sent as tail-loss-probe */
+#define SNDMAP_HAS_SYN		0x000800/* SYN is on this guy */
+#define SNDMAP_HAD_PUSH		0x008000/* Push was sent on original send */
+#define SNDMAP_MASK  (SNDMAP_ACKED|SNDMAP_OVERMAX|SNDMAP_SACK_PASSED|SNDMAP_HAS_FIN\
+		      |SNDMAP_TLP|SNDMAP_HAS_SYN|SNDMAP_HAD_PUSH)
+#define SNDMAP_NRTX 3
+
+struct tcp_query_resp {
+	int req;
+	uint32_t req_param;
+	union {
+		struct {
+			tcp_seq sendmap_start;
+			tcp_seq sendmap_end;
+			int sendmap_send_cnt;
+			uint64_t sendmap_time[SNDMAP_NRTX];
+			uint64_t sendmap_ack_arrival;
+			int sendmap_flags;
+			uint32_t sendmap_r_rtr_bytes;
+			/* If FAS is available if not 0 */
+			uint32_t sendmap_fas;
+			uint8_t sendmap_dupacks;
+		};
+		struct {
+			uint32_t timer_hpts_flags;
+			uint32_t timer_pacing_to;
+			uint32_t timer_timer_exp;
+		};
+		struct {
+			/* Timestamps and rtt's */
+			uint32_t rack_reorder_ts;	/* Last uscts that reordering was seen */
+			uint32_t rack_num_dsacks;	/* Num of dsacks seen */
+			uint32_t rack_rxt_last_time; 	/* Last time a RXT/TLP or rack tmr  went off */
+			uint32_t rack_min_rtt;		/* never 0 smallest rtt seen */
+			uint32_t rack_rtt;		/* Last rtt used by rack */
+			uint32_t rack_tmit_time;	/* The time the rtt seg was tmited */
+			uint32_t rack_time_went_idle;	/* If in persist the time we went idle */
+			/* Prr data  */
+			uint32_t rack_sacked;
+			uint32_t rack_holes_rxt;
+			uint32_t rack_prr_delivered;
+			uint32_t rack_prr_recovery_fs;
+			uint32_t rack_prr_out;
+			uint32_t rack_prr_sndcnt;
+			/* TLP data */
+			uint16_t rack_tlp_cnt_out;	/* How many tlp's have been sent */
+			/* Various bits */
+			uint8_t  rack_tlp_out;		/* Is a TLP outstanding */
+			uint8_t  rack_srtt_measured;	/* The previous stack has measured srtt */
+			uint8_t  rack_in_persist;	/* Is the old stack in persists? */
+			uint8_t	 rack_wanted_output;	/* Did the prevous stack have a want output set */
+		};
+	};
+};
+
+#define TCP_TMR_GRANULARITY_TICKS	1	/* TCP timers are in ticks (msec if hz=1000)  */
+#define TCP_TMR_GRANULARITY_USEC	2	/* TCP timers are in microseconds */
+
 typedef enum {
 	TT_REXMT = 0,
 	TT_PERSIST,
@@ -276,6 +433,11 @@
 #ifdef TCP_ACCOUNTING
 	uint64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
 	uint64_t tcp_proc_time[TCP_NUM_CNT_COUNTERS];
+#endif
+#ifdef TCP_REQUEST_TRK
+	uint32_t tcp_hybrid_start;	/* Num of times we started hybrid pacing */
+	uint32_t tcp_hybrid_stop;	/* Num of times we stopped hybrid pacing */
+	uint32_t tcp_hybrid_error;	/* Num of times we failed to start hybrid pacing */
 #endif
 	uint32_t t_logsn;		/* Log "serial number" */
 	uint32_t gput_ts;		/* Time goodput measurement started */
@@ -290,6 +452,7 @@
 	uint32_t t_dsack_bytes;		/* dsack bytes received */
 	uint32_t t_dsack_tlp_bytes;	/* dsack bytes received for TLPs sent */
 	uint32_t t_dsack_pack;		/* dsack packets we have eceived */
+	uint8_t t_tmr_granularity;	/* Granularity of all timers srtt etc */
 	uint8_t t_rttupdated;		/* number of times rtt sampled */
 	/* TCP Fast Open */
 	uint8_t t_tfo_client_cookie_len; /* TFO client cookie length */
@@ -311,6 +474,13 @@
 	struct osd	t_osd;		/* storage for Khelp module data */
 #endif
 	uint8_t _t_logpoint;	/* Used when a BB log points is enabled */
+#ifdef TCP_REQUEST_TRK
+	/* Response tracking addons. */
+	uint8_t t_http_req;	/* Request count */
+	uint8_t t_http_open;	/* Number of open range requests */
+	uint8_t t_http_closed;	/* Number of closed range requests */
+	struct http_sendfile_track t_http_info[MAX_TCP_HTTP_REQ];
+#endif
 };
 #endif	/* _KERNEL || _WANT_TCPCB */
 
@@ -346,7 +516,7 @@
 #define	TCP_FUNC_BEING_REMOVED	0x01   	/* Can no longer be referenced */
 #define	TCP_FUNC_OUTPUT_CANDROP	0x02   	/* tfb_tcp_output may ask tcp_drop */
 
-/*
+/**
  * If defining the optional tcp_timers, in the
  * tfb_tcp_timer_stop call you must use the
  * callout_async_drain() function with the
@@ -356,6 +526,7 @@
  * does not know your callbacks you must provide a
  * stop_all function that loops through and calls
  * tcp_timer_stop() with each of your defined timers.
+ *
  * Adding a tfb_tcp_handoff_ok function allows the socket
  * option to change stacks to query you even if the
  * connection is in a later stage. You return 0 to
@@ -363,16 +534,67 @@
  * non-zero (an error number) to say no you can't.
  * If the function is undefined you can only change
  * in the early states (before connect or listen).
+ *
+ * tfb_tcp_fb_init is used to allow the new stack to
+ * setup its control block. Among the things it must
+ * do is:
+ * a) Make sure that the inp_flags2 is setup correctly
+ *    for LRO. There are two flags that the previous
+ *    stack may have set INP_MBUF_ACKCMP and 
+ *    INP_SUPPORTS_MBUFQ. If the new stack does not
+ *    support these it *should* clear the flags.
+ * b) Make sure that the timers are in the proper
+ *    granularity that the stack wants. The stack
+ *    should check the t_tmr_granularity field. Currently
+ *    there are two values that it may hold 
+ *    TCP_TMR_GRANULARITY_TICKS and TCP_TMR_GRANULARITY_USEC.
+ *    Use the functions tcp_timer_convert(tp, granularity);
+ *    to move the timers to the correct format for your stack.
+ *
+ * The new stack may also optionally query the tfb_chg_query
+ * function if the old stack has one. The new stack may ask
+ * for one of three entries and can also state to the old
+ * stack its support for the INP_MBUF_ACKCMP and 
+ * INP_SUPPORTS_MBUFQ. This is important since if there are
+ * queued ack's without that statement the old stack will
+ * be forced to discard the queued acks. The requests that
+ * can be made for information by the new stacks are:
+ *
+ * Note also that the tfb_tcp_fb_init() when called can
+ * determine if a query is needed by looking at the 
+ * value passed in the ptr. The ptr is designed to be
+ * set in with any allocated memory, but the address
+ * of the condtion (ptr == &tp->t_fb_ptr) will be
+ * true if this is not a stack switch but the initial
+ * setup of a tcb (which means no query would be needed).
+ * If, however, the value is not t_fb_ptr, then the caller
+ * is in the middle of a stack switch and is the new stack.
+ * A query would be appropriate (if the new stack support 
+ * the query mechanism).
+ *
+ * TCP_QUERY_SENDMAP - Query of outstanding data.
+ * TCP_QUERY_TIMERS_UP	- Query about running timers.
+ * TCP_SUPPORTED_LRO - Declaration in req_param of 
+ *                     the inp_flags2 supported by 
+ *                     the new stack.
+ * TCP_QUERY_RACK_TIMES	- Enquire about various timestamps
+ *                        and states the old stack may be in.
+ * 
  * tfb_tcp_fb_fini is changed to add a flag to tell
  * the old stack if the tcb is being destroyed or
  * not. A one in the flag means the TCB is being
  * destroyed, a zero indicates its transitioning to
- * another stack (via socket option).
+ * another stack (via socket option). The
+ * tfb_tcp_fb_fini() function itself should not change timers
+ * or inp_flags2 (the tfb_tcp_fb_init() must do that). However
+ * if the old stack supports the LRO mbuf queuing, and the new
+ * stack does not communicate via chg messages that it too does,
+ * it must assume it does not and free any queued mbufs.
+ *
  */
 struct tcp_function_block {
 	char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
 	int	(*tfb_tcp_output)(struct tcpcb *);
-	int	(*tfb_tcp_output_wtime)(struct tcpcb *, const struct timeval *);
 	void	(*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
 			    struct socket *, struct tcpcb *,
 		        int, int, uint8_t);
@@ -387,15 +609,18 @@
 			    int, struct timeval *);
 	int     (*tfb_tcp_ctloutput)(struct inpcb *inp, struct sockopt *sopt);
 	/* Optional memory allocation/free routine */
-	int	(*tfb_tcp_fb_init)(struct tcpcb *);
+	int	(*tfb_tcp_fb_init)(struct tcpcb *, void **);
 	void	(*tfb_tcp_fb_fini)(struct tcpcb *, int);
 	/* Optional timers, must define all if you define one */
 	int	(*tfb_tcp_timer_stop_all)(struct tcpcb *);
 	void	(*tfb_tcp_rexmit_tmr)(struct tcpcb *);
 	int	(*tfb_tcp_handoff_ok)(struct tcpcb *);
-	void	(*tfb_tcp_mtu_chg)(struct tcpcb *);
+	void	(*tfb_tcp_mtu_chg)(struct tcpcb *tp);
 	int	(*tfb_pru_options)(struct tcpcb *, int);
 	void	(*tfb_hwtls_change)(struct tcpcb *, int);
+	int	(*tfb_chg_query)(struct tcpcb *, struct tcp_query_resp *);
+	void	(*tfb_switch_failed)(struct tcpcb *);
+	bool	(*tfb_early_wake_check)(struct tcpcb *);
 	int     (*tfb_compute_pipe)(struct tcpcb *tp);
 	volatile uint32_t tfb_refcnt;
 	uint32_t  tfb_flags;
@@ -445,6 +670,16 @@
 	return (rv);
 }
 
+static inline void
+tcp_lro_features_off(struct inpcb *inp)
+{
+	inp->inp_flags2 &= ~(INP_SUPPORTS_MBUFQ|
+	    INP_MBUF_QUEUE_READY|
+	    INP_DONT_SACK_QUEUE|
+	    INP_MBUF_ACKCMP|
+	    INP_MBUF_L_ACKS);
+}
+
 /*
  * tcp_output_unlock()
  * Always returns unlocked, handles drop request from advanced stacks.
@@ -1169,6 +1404,7 @@
 #ifdef NETFLIX_EXP_DETECTION
 /* Various SACK attack thresholds */
 extern int32_t tcp_force_detection;
+extern int32_t tcp_sad_limit;
 extern int32_t tcp_sack_to_ack_thresh;
 extern int32_t tcp_sack_to_move_thresh;
 extern int32_t tcp_restoral_thresh;
@@ -1176,6 +1412,7 @@
 extern int32_t tcp_sad_pacing_interval;
 extern int32_t tcp_sad_low_pps;
 extern int32_t tcp_map_minimum;
+extern int32_t tcp_attack_on_turns_on_logging;
 #endif
 extern uint32_t tcp_ack_war_time_window;
 extern uint32_t tcp_ack_war_cnt;
@@ -1246,6 +1483,8 @@
     size_t seed_len);
 int tcp_can_enable_pacing(void);
 void tcp_decrement_paced_conn(void);
+void tcp_change_time_units(struct tcpcb *, int);
+void tcp_handle_orphaned_packets(struct tcpcb *);
 
 struct mbuf *
 	 tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
@@ -1253,6 +1492,31 @@
 
 int	tcp_stats_init(void);
 void tcp_log_end_status(struct tcpcb *tp, uint8_t status);
+#ifdef TCP_REQUEST_TRK
+void tcp_http_free_a_slot(struct tcpcb *tp, struct http_sendfile_track *ent);
+struct http_sendfile_track *
+tcp_http_find_a_req_that_is_completed_by(struct tcpcb *tp, tcp_seq th_ack, int *ip);
+int tcp_http_check_for_comp(struct tcpcb *tp, tcp_seq ack_point);
+int
+tcp_http_is_entry_comp(struct tcpcb *tp, struct http_sendfile_track *ent, tcp_seq ack_point);
+struct http_sendfile_track *
+tcp_http_find_req_for_seq(struct tcpcb *tp, tcp_seq seq);
+void
+tcp_http_log_req_info(struct tcpcb *tp,
+    struct http_sendfile_track *http, uint16_t slot,
+    uint8_t val, uint64_t offset, uint64_t nbytes);
+
+uint32_t
+tcp_estimate_tls_overhead(struct socket *so, uint64_t tls_usr_bytes);
+void
+tcp_http_alloc_req(struct tcpcb *tp, union tcp_log_userdata *user,
+    uint64_t ts);
+
+struct http_sendfile_track *
+tcp_http_alloc_req_full(struct tcpcb *tp, struct http_req *req, uint64_t ts, int rec_dups);
+
+
+#endif
 #ifdef TCP_ACCOUNTING
 int tcp_do_ack_accounting(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, uint32_t tiwin, int mss);
 #endif
Index: sys/sys/mbuf.h
===================================================================
--- sys/sys/mbuf.h
+++ sys/sys/mbuf.h
@@ -1235,6 +1235,16 @@
 #define	M_LEADINGSPACE(m)						\
 	(M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0)
 
+/*
+ * So M_TRAILINGROOM() is for when you want to know how much space
+ * would be there if it was writable. This can be used to
+ * detect changes in mbufs by knowing the value at one point
+ * and then being able to compare it later to the current M_TRAILINGROOM().
+ * The TRAILINGSPACE() macro is not suitable for this since an mbuf
+ * at one point might not be writable and then later it becomes writable
+ * even though the space at the back of it has not changed.
+ */
+#define M_TRAILINGROOM(m) ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len))
 /*
  * Compute the amount of space available after the end of data in an mbuf.
  *
@@ -1245,9 +1255,7 @@
  * for mbufs with external storage.  We now allow mbuf-embedded data to be
  * read-only as well.
  */
-#define	M_TRAILINGSPACE(m)						\
-	(M_WRITABLE(m) ?						\
-	    ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0)
+#define	M_TRAILINGSPACE(m) (M_WRITABLE(m) ? M_TRAILINGROOM(m) : 0)
 
 /*
  * Arrange to prepend space of size plen to mbuf m.  If a new mbuf must be