Index: head/sys/modules/tcp/rack/Makefile
===================================================================
--- head/sys/modules/tcp/rack/Makefile
+++ head/sys/modules/tcp/rack/Makefile
@@ -6,7 +6,7 @@
 
 STACKNAME=	rack
 KMOD=	tcp_${STACKNAME}
-SRCS=	rack.c sack_filter.c
+SRCS=	rack.c sack_filter.c rack_bbr_common.c
 
 SRCS+=	opt_inet.h opt_inet6.h opt_ipsec.h
 SRCS+=	opt_tcpdebug.h
Index: head/sys/netinet/in_pcb.h
===================================================================
--- head/sys/netinet/in_pcb.h
+++ head/sys/netinet/in_pcb.h
@@ -759,7 +759,9 @@
 #define	INP_ORIGDSTADDR		0x00000800 /* receive IP dst address/port */
 #define INP_CANNOT_DO_ECN	0x00001000 /* The stack does not do ECN */
 #define	INP_REUSEPORT_LB	0x00002000 /* SO_REUSEPORT_LB option is set */
-
+#define INP_SUPPORTS_MBUFQ	0x00004000 /* Supports the mbuf queue method of LRO */
+#define INP_MBUF_QUEUE_READY	0x00008000 /* The transport is pacing, inputs can be queued */
+#define INP_DONT_SACK_QUEUE	0x00010000 /* If a sack arrives do not wake me */
 /*
  * Flags passed to in_pcblookup*() functions.
  */
Index: head/sys/netinet/tcp.h
===================================================================
--- head/sys/netinet/tcp.h
+++ head/sys/netinet/tcp.h
@@ -201,9 +201,8 @@
 #define TCP_RACK_TLP_THRESH   1063 /* RACK TLP theshold i.e. srtt+(srtt/N) */
 #define TCP_RACK_PKT_DELAY    1064 /* RACK added ms i.e. rack-rtt + reord + N */
 #define TCP_RACK_TLP_INC_VAR  1065 /* Does TLP include rtt variance in t-o */
-#define TCP_RACK_SESS_CWV     1066 /* Enable RFC7611 cwnd validation on sess */
 #define TCP_BBR_IWINTSO	      1067 /* Initial TSO window for BBRs first sends */
-#define TCP_BBR_RECFORCE      1068 /* Enter recovery force out a segment disregard pacer */
+#define TCP_BBR_RECFORCE      1068 /* Enter recovery force out a segment disregard pacer no longer valid */
 #define TCP_BBR_STARTUP_PG    1069 /* Startup pacing gain */
 #define TCP_BBR_DRAIN_PG      1070 /* Drain pacing gain */
 #define TCP_BBR_RWND_IS_APP   1071 /* Rwnd limited is considered app limited */
@@ -211,14 +210,18 @@
 #define TCP_BBR_ONE_RETRAN    1073 /* Is only one segment allowed out during retran */
 #define TCP_BBR_STARTUP_LOSS_EXIT 1074	/* Do we exit a loss during startup if not 20% incr */
 #define TCP_BBR_USE_LOWGAIN   1075 /* lower the gain in PROBE_BW enable */
-#define TCP_BBR_LOWGAIN_THRESH 1076 /* How many cycles do we stay in lowgain */
-#define TCP_BBR_LOWGAIN_HALF  1077 /* Do we halfstep lowgain down */
-#define TCP_BBR_LOWGAIN_FD    1078 /* Do we force a drain when lowgain in place */
+#define TCP_BBR_LOWGAIN_THRESH 1076 /* Unused after 2.3 morphs to TSLIMITS >= 2.3 */
+#define TCP_BBR_TSLIMITS 1076	   /* Do we use experimental Timestamp limiting for our algo */
+#define TCP_BBR_LOWGAIN_HALF  1077 /* Unused after 2.3 */
+#define TCP_BBR_PACE_OH        1077 /* Reused in 4.2 for pacing overhead setting */
+#define TCP_BBR_LOWGAIN_FD    1078 /* Unused after 2.3 */
+#define TCP_BBR_HOLD_TARGET 1078	/* For 4.3 on */
 #define TCP_BBR_USEDEL_RATE   1079 /* Enable use of delivery rate for loss recovery */
 #define TCP_BBR_MIN_RTO       1080 /* Min RTO in milliseconds */
 #define TCP_BBR_MAX_RTO	      1081 /* Max RTO in milliseconds */
 #define TCP_BBR_REC_OVER_HPTS 1082 /* Recovery override htps settings 0/1/3 */
-#define TCP_BBR_UNLIMITED     1083 /* Does BBR, in non-recovery not use cwnd */
+#define TCP_BBR_UNLIMITED     1083 /* Not used before 2.3 and morphs to algorithm >= 2.3 */
+#define TCP_BBR_ALGORITHM     1083 /* What measurement algo does BBR use netflix=0, google=1 */
 #define TCP_BBR_DRAIN_INC_EXTRA 1084 /* Does the 3/4 drain target include the extra gain */
 #define TCP_BBR_STARTUP_EXIT_EPOCH 1085 /* what epoch gets us out of startup */
 #define TCP_BBR_PACE_PER_SEC   1086
@@ -227,17 +230,27 @@
 #define TCP_BBR_PACE_SEG_MIN   1089
 #define TCP_BBR_PACE_CROSS     1090
 #define TCP_RACK_IDLE_REDUCE_HIGH 1092  /* Reduce the highest cwnd seen to IW on idle */
-#define TCP_RACK_IDLE_REDUCE_HIGH 1092  /* Reduce the highest cwnd seen to IW on idle */
 #define TCP_RACK_MIN_PACE      1093 	/* Do we enforce rack min pace time */
 #define TCP_RACK_MIN_PACE_SEG  1094	/* If so what is the seg threshould */
+#define TCP_RACK_GP_INCREASE   1094	/* After 4.1 its the GP increase */
 #define TCP_RACK_TLP_USE       1095
 #define TCP_BBR_ACK_COMP_ALG   1096 	/* Not used */
+#define TCP_BBR_TMR_PACE_OH    1096	/* Recycled in 4.2 */
 #define TCP_BBR_EXTRA_GAIN     1097
 #define TCP_BBR_RACK_RTT_USE   1098	/* what RTT should we use 0, 1, or 2? */
 #define TCP_BBR_RETRAN_WTSO    1099
 #define TCP_DATA_AFTER_CLOSE   1100
 #define TCP_BBR_PROBE_RTT_GAIN 1101
 #define TCP_BBR_PROBE_RTT_LEN  1102
+#define TCP_BBR_SEND_IWND_IN_TSO 1103	/* Do we burst out whole iwin size chunks at start? */
+#define TCP_BBR_USE_RACK_CHEAT 1104	/* Do we use the rack cheat for pacing rxt's */
+#define TCP_BBR_HDWR_PACE      1105	/* Enable/disable hardware pacing */
+#define TCP_BBR_UTTER_MAX_TSO  1106	/* Do we enforce an utter max TSO size */
+#define TCP_BBR_EXTRA_STATE    1107	/* Special exit-persist catch up */
+#define TCP_BBR_FLOOR_MIN_TSO  1108     /* The min tso size */
+#define TCP_BBR_MIN_TOPACEOUT  1109	/* Do we suspend pacing until */
+#define TCP_BBR_TSTMP_RAISES   1110	/* Can a timestamp measurement raise the b/w */
+#define TCP_BBR_POLICER_DETECT 1111	/* Turn on/off google mode policer detection */
 
 
 /* Start of reserved space for third-party user-settable options. */
Index: head/sys/netinet/tcp_hpts.h
===================================================================
--- head/sys/netinet/tcp_hpts.h
+++ head/sys/netinet/tcp_hpts.h
@@ -45,112 +45,80 @@
 
 /* Number of useconds in a hpts tick */
 #define HPTS_TICKS_PER_USEC 10
-#define HPTS_MS_TO_SLOTS(x) (x * 100)
+#define HPTS_MS_TO_SLOTS(x) ((x * 100) + 1)
 #define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
 #define HPTS_USEC_IN_SEC 1000000
 #define HPTS_MSEC_IN_SEC 1000
 #define HPTS_USEC_IN_MSEC 1000
 
-#define DEFAULT_HPTS_LOG 3072
 
-/*
- * Log flags consist of
- *  7f      7f         1            1 bits
- * p_cpu | p_num | INPUT_ACTIVE | HPTS_ACTIVE
- *
- * So for example cpu 10, number 10 would with
- * input active would show up as:
- * p_flags = 0001010 0001010 1 0
- *  <or>
- * p_flags = 0x142a
- */
-#define HPTS_HPTS_ACTIVE 0x01
-#define HPTS_INPUT_ACTIVE 0x02
-
-#define HPTSLOG_IMMEDIATE	1
-#define HPTSLOG_INSERT_NORMAL	2
-#define HPTSLOG_INSERT_SLEEPER	3
-#define HPTSLOG_SLEEP_AFTER	4
-#define HPTSLOG_SLEEP_BEFORE	5
-#define HPTSLOG_INSERTED	6
-#define HPTSLOG_WAKEUP_HPTS	7
-#define HPTSLOG_SETTORUN	8
-#define HPTSLOG_HPTSI		9
-#define HPTSLOG_TOLONG		10
-#define HPTSLOG_AWAKENS	11
-#define HPTSLOG_TIMESOUT	12
-#define HPTSLOG_SLEEPSET	13
-#define HPTSLOG_WAKEUP_INPUT	14
-#define HPTSLOG_RESCHEDULE     15
-#define HPTSLOG_AWAKE		16
-#define HPTSLOG_INP_DONE	17
-
-struct hpts_log {
-	struct inpcb *inp;
-	int32_t event;
-	uint32_t cts;
-	int32_t line;
-	uint32_t ticknow;
-	uint32_t t_paceslot;
-	uint32_t t_hptsreq;
-	uint32_t p_curtick;
-	uint32_t p_prevtick;
-	uint32_t slot_req;
-	uint32_t p_on_queue_cnt;
-	uint32_t p_nxt_slot;
-	uint32_t p_cur_slot;
-	uint32_t p_hpts_sleep_time;
-	uint16_t p_flags;
-	uint8_t p_onhpts;
-	uint8_t p_oninput;
-	uint8_t is_notempty;
-};
-
 struct hpts_diag {
-	uint32_t p_hpts_active;
-	uint32_t p_nxt_slot;
-	uint32_t p_cur_slot;
-	uint32_t slot_req;
-	uint32_t inp_hptsslot;
-	uint32_t slot_now;
-	uint32_t have_slept;
-	uint32_t hpts_sleep_time;
-	uint32_t yet_to_sleep;
-	uint32_t need_new_to;
-	int32_t co_ret;
-	uint8_t p_on_min_sleep;
+	uint32_t p_hpts_active; 	/* bbr->flex7 x */
+	uint32_t p_nxt_slot;		/* bbr->flex1 x */
+	uint32_t p_cur_slot;		/* bbr->flex2 x */
+	uint32_t p_prev_slot;		/* bbr->delivered */
+	uint32_t p_runningtick;		/* bbr->inflight */
+	uint32_t slot_req;		/* bbr->flex3 x */
+	uint32_t inp_hptsslot;		/* bbr->flex4 x */
+	uint32_t slot_remaining;	/* bbr->flex5 x */
+	uint32_t have_slept;		/* bbr->epoch x */
+	uint32_t hpts_sleep_time;	/* bbr->applimited x */
+	uint32_t yet_to_sleep;		/* bbr->lt_epoch x */
+	uint32_t need_new_to;		/* bbr->flex6 x  */
+	uint32_t wheel_tick;		/* bbr->bw_inuse x */
+	uint32_t maxticks;		/* bbr->delRate x */
+	uint32_t wheel_cts;		/* bbr->rttProp x */
+	int32_t co_ret; 		/* bbr->pkts_out x */
+	uint32_t p_curtick;		/* upper bbr->cur_del_rate */
+	uint32_t p_lasttick;		/* lower bbr->cur_del_rate */
+	uint8_t p_on_min_sleep; 	/* bbr->flex8 x */
 };
 
+/* Magic flags to tell whats cooking on the pacing wheel */
+#define PACE_TMR_DELACK 0x01	/* Delayed ack timer running */
+#define PACE_TMR_RACK   0x02	/* RACK timer running */
+#define PACE_TMR_TLP    0x04	/* TLP timer running */
+#define PACE_TMR_RXT    0x08	/* Retransmit timer running */
+#define PACE_TMR_PERSIT 0x10	/* Persists timer running */
+#define PACE_TMR_KEEP   0x20	/* Keep alive timer running */
+#define PACE_PKT_OUTPUT 0x40	/* Output Packets being paced */
+#define PACE_TMR_MASK   (PACE_TMR_KEEP|PACE_TMR_PERSIT|PACE_TMR_RXT|PACE_TMR_TLP|PACE_TMR_RACK|PACE_TMR_DELACK)
+
 #ifdef _KERNEL
 /* Each hpts has its own p_mtx which is used for locking */
 struct tcp_hpts_entry {
 	/* Cache line 0x00 */
 	struct mtx p_mtx;	/* Mutex for hpts */
-	uint32_t p_hpts_active; /* Flag that says hpts is awake  */
-	uint32_t p_curtick;	/* Current tick in 10 us the hpts is at */
-	uint32_t p_prevtick;	/* Previous tick in 10 us the hpts ran */
+	uint16_t p_hpts_active; /* Flag that says hpts is awake  */
+	uint8_t p_hpts_wake_scheduled;	/* Have we scheduled a wakeup? */
+	uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */
+	uint32_t p_curtick;	/* Tick in 10 us the hpts is going to */
+	uint32_t p_runningtick; /* Current tick we are at if we are running */
+	uint32_t p_prev_slot;	/* Previous slot we were on */
 	uint32_t p_cur_slot;	/* Current slot in wheel hpts is draining */
 	uint32_t p_nxt_slot;	/* The next slot outside the current range of
 				 * slots that the hpts is running on. */
 	int32_t p_on_queue_cnt;	/* Count on queue in this hpts */
-	uint32_t enobuf_cnt;
-	uint16_t p_log_at;
+	uint32_t p_lasttick;	/* Last tick before the current one */
 	uint8_t p_direct_wake :1, /* boolean */
-		p_log_wrapped :1, /* boolean */
-		p_on_min_sleep:1; /* boolean */
-	uint8_t p_fill;
+		p_on_min_sleep:1, /* boolean */
+		p_avail:6; 
+	uint8_t p_fill[3];	  /* Fill to 32 bits */
 	/* Cache line 0x40 */
 	void *p_inp;
 	struct hptsh p_input;	/* For the tcp-input runner */
 	/* Hptsi wheel */
 	struct hptsh *p_hptss;
-	struct hpts_log *p_log;
-	uint32_t p_logsize;
 	int32_t p_on_inqueue_cnt; /* Count on input queue in this hpts */
 	uint32_t hit_no_enobuf;
 	uint32_t p_dyn_adjust;
 	uint32_t p_hpts_sleep_time;	/* Current sleep interval having a max
 					 * of 255ms */
+	uint32_t overidden_sleep;	/* what was overrided by min-sleep for logging */
+	uint32_t saved_lasttick;	/* for logging */
+	uint32_t saved_curtick;		/* for logging */
+	uint32_t saved_curslot;		/* for logging */
+	uint32_t saved_prev_slot;       /* for logging */
 	uint32_t p_delayed_by;	/* How much were we delayed by */
 	/* Cache line 0x80 */
 	struct sysctl_ctx_list hpts_ctx;
@@ -236,13 +204,9 @@
 int
     __tcp_queue_to_input_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line);
 #define tcp_queue_to_input_locked(a, b) __tcp_queue_to_input_locked(a, b, __LINE__);
-void
-tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
-    int32_t tlen, int32_t drop_hdrlen, uint8_t iptos);
 int
-__tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
-    int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line);
-#define tcp_queue_to_input(a, b, c, d, e, f, g) __tcp_queue_to_input(a, b, c, d, e, f, g, __LINE__)
+__tcp_queue_to_input(struct inpcb *inp, int32_t line);
+#define tcp_queue_to_input(a) __tcp_queue_to_input(a, __LINE__)
 
 uint16_t tcp_hpts_delayedby(struct inpcb *inp);
 
Index: head/sys/netinet/tcp_hpts.c
===================================================================
--- head/sys/netinet/tcp_hpts.c
+++ head/sys/netinet/tcp_hpts.c
@@ -37,7 +37,7 @@
  * pacing packets out onto the wire. It can be used in two ways
  * by a given TCP stack (and those two methods can be used simultaneously).
  *
- * First, and probably the main thing its used by Rack and BBR for, it can
+ * First, and probably the main thing its used by Rack and BBR, it can
  * be used to call tcp_output() of a transport stack at some time in the future.
  * The normal way this is done is that tcp_output() of the stack schedules
  * itself to be called again by calling tcp_hpts_insert(tcpcb, slot). The
@@ -59,42 +59,57 @@
  * to prevent output processing until the time alotted has gone by.
  * Of course this is a bare bones example and the stack will probably
  * have more consideration then just the above.
- *
- * Now the tcp_hpts system will call tcp_output in one of two forms, 
- * it will first check to see if the stack as defined a 
- * tfb_tcp_output_wtime() function, if so that is the routine it
- * will call, if that function is not defined then it will call the
- * tfb_tcp_output() function. The only difference between these
- * two calls is that the former passes the time in to the function
- * so the function does not have to access the time (which tcp_hpts
- * already has). What these functions do is of course totally up
- * to the individual tcp stack.
- *
+ * 
  * Now the second function (actually two functions I guess :D)
  * the tcp_hpts system provides is the  ability to either abort 
- * a connection (later) or process  input on a connection. 
- * Why would you want to do this? To keep processor locality.
+ * a connection (later) or process input on a connection. 
+ * Why would you want to do this? To keep processor locality
+ * and or not have to worry about untangling any recursive
+ * locks. The input function now is hooked to the new LRO
+ * system as well. 
  *
- * So in order to use the input redirection function the
- * stack changes its tcp_do_segment() routine to instead
- * of process the data call the function:
+ * In order to use the input redirection function the
+ * tcp stack must define an input function for 
+ * tfb_do_queued_segments(). This function understands
+ * how to dequeue a array of packets that were input and
+ * knows how to call the correct processing routine. 
  *
- * tcp_queue_pkt_to_input()
- *
- * You will note that the arguments to this function look
- * a lot like tcp_do_segments's arguments. This function
- * will assure that the tcp_hpts system will
- * call the functions tfb_tcp_hpts_do_segment() from the
- * correct CPU. Note that multiple calls can get pushed
- * into the tcp_hpts system this will be indicated by
- * the next to last argument to tfb_tcp_hpts_do_segment()
- * (nxt_pkt). If nxt_pkt is a 1 then another packet is
- * coming. If nxt_pkt is a 0 then this is the last call
- * that the tcp_hpts system has available for the tcp stack.
+ * Locking in this is important as well so most likely the 
+ * stack will need to define the tfb_do_segment_nounlock()
+ * splitting tfb_do_segment() into two parts. The main processing
+ * part that does not unlock the INP and returns a value of 1 or 0.
+ * It returns 0 if all is well and the lock was not released. It
+ * returns 1 if we had to destroy the TCB (a reset received etc).
+ * The remains of tfb_do_segment() then become just a simple call
+ * to the tfb_do_segment_nounlock() function and check the return
+ * code and possibly unlock.
  * 
- * The other point of the input system is to be able to safely
- * drop a tcp connection without worrying about the recursive 
- * locking that may be occuring on the INP_WLOCK. So if
+ * The stack must also set the flag on the INP that it supports this
+ * feature i.e. INP_SUPPORTS_MBUFQ. The LRO code recoginizes
+ * this flag as well and will queue packets when it is set.
+ * There are other flags as well INP_MBUF_QUEUE_READY and
+ * INP_DONT_SACK_QUEUE. The first flag tells the LRO code
+ * that we are in the pacer for output so there is no
+ * need to wake up the hpts system to get immediate
+ * input. The second tells the LRO code that its okay
+ * if a SACK arrives you can still defer input and let
+ * the current hpts timer run (this is usually set when
+ * a rack timer is up so we know SACK's are happening
+ * on the connection already and don't want to wakeup yet).
+ *
+ * There is a common functions within the rack_bbr_common code
+ * version i.e. ctf_do_queued_segments(). This function
+ * knows how to take the input queue of packets from 
+ * tp->t_in_pkts and process them digging out 
+ * all the arguments, calling any bpf tap and 
+ * calling into tfb_do_segment_nounlock(). The common
+ * function (ctf_do_queued_segments())  requires that 
+ * you have defined the tfb_do_segment_nounlock() as
+ * described above.
+ *
+ * The second feature of the input side of hpts is the
+ * dropping of a connection. This is due to the way that
+ * locking may have occured on the INP_WLOCK. So if
  * a stack wants to drop a connection it calls:
  *
  *     tcp_set_inp_to_drop(tp, ETIMEDOUT)
@@ -156,6 +171,7 @@
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_hpts.h>
+#include <netinet/tcp_log_buf.h>
 
 #ifdef tcpdebug
 #include <netinet/tcp_debug.h>
@@ -168,24 +184,19 @@
 
 MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
 #ifdef RSS
-#include <net/netisr.h>
-#include <net/rss_config.h>
 static int tcp_bind_threads = 1;
 #else
 static int tcp_bind_threads = 2;
 #endif
 TUNABLE_INT("net.inet.tcp.bind_hptss", &tcp_bind_threads);
 
-static uint32_t tcp_hpts_logging_size = DEFAULT_HPTS_LOG;
-
-TUNABLE_INT("net.inet.tcp.hpts_logging_sz", &tcp_hpts_logging_size);
-
 static struct tcp_hptsi tcp_pace;
+static int hpts_does_tp_logging = 0;
 
 static void tcp_wakehpts(struct tcp_hpts_entry *p);
 static void tcp_wakeinput(struct tcp_hpts_entry *p);
 static void tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv);
-static void tcp_hptsi(struct tcp_hpts_entry *hpts, struct timeval *ctick);
+static void tcp_hptsi(struct tcp_hpts_entry *hpts);
 static void tcp_hpts_thread(void *ctx);
 static void tcp_init_hptsi(void *st);
 
@@ -204,8 +215,6 @@
 		}							\
 	} while (0)
 
-static int32_t logging_on = 0;
-static int32_t hpts_sleep_max = (NUM_OF_HPTSI_SLOTS - 2);
 static int32_t tcp_hpts_precision = 120;
 
 struct hpts_domain_info {
@@ -219,44 +228,75 @@
     &tcp_hpts_precision, 120,
     "Value for PRE() precision of callout");
 
-SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, logging, CTLFLAG_RW,
-    &logging_on, 0,
-    "Turn on logging if compiled in");
+counter_u64_t hpts_hopelessly_behind;
 
+SYSCTL_COUNTER_U64(_net_inet_tcp_hpts, OID_AUTO, hopeless, CTLFLAG_RD,
+    &hpts_hopelessly_behind,
+    "Number of times hpts could not catch up and was behind hopelessly");
+
 counter_u64_t hpts_loops;
 
 SYSCTL_COUNTER_U64(_net_inet_tcp_hpts, OID_AUTO, loops, CTLFLAG_RD,
     &hpts_loops, "Number of times hpts had to loop to catch up");
 
+
 counter_u64_t back_tosleep;
 
 SYSCTL_COUNTER_U64(_net_inet_tcp_hpts, OID_AUTO, no_tcbsfound, CTLFLAG_RD,
     &back_tosleep, "Number of times hpts found no tcbs");
 
-static int32_t in_newts_every_tcb = 0;
+counter_u64_t combined_wheel_wrap;
 
-SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, in_tsperpcb, CTLFLAG_RW,
-    &in_newts_every_tcb, 0,
-    "Do we have a new cts every tcb we process for input");
-static int32_t in_ts_percision = 0;
+SYSCTL_COUNTER_U64(_net_inet_tcp_hpts, OID_AUTO, comb_wheel_wrap, CTLFLAG_RD,
+    &combined_wheel_wrap, "Number of times the wheel lagged enough to have an insert see wrap");
 
-SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, in_tspercision, CTLFLAG_RW,
-    &in_ts_percision, 0,
-    "Do we use percise timestamp for clients on input");
-static int32_t out_newts_every_tcb = 0;
+counter_u64_t wheel_wrap;
 
-SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, out_tsperpcb, CTLFLAG_RW,
-    &out_newts_every_tcb, 0,
-    "Do we have a new cts every tcb we process for output");
+SYSCTL_COUNTER_U64(_net_inet_tcp_hpts, OID_AUTO, wheel_wrap, CTLFLAG_RD,
+    &wheel_wrap, "Number of times the wheel lagged enough to have an insert see wrap");
+
 static int32_t out_ts_percision = 0;
 
 SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, out_tspercision, CTLFLAG_RW,
     &out_ts_percision, 0,
     "Do we use a percise timestamp for every output cts");
+SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, logging, CTLFLAG_RW,
+    &hpts_does_tp_logging, 0,
+    "Do we add to any tp that has logging on pacer logs");
 
-SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, maxsleep, CTLFLAG_RW,
+static int32_t max_pacer_loops = 10;
+SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, loopmax, CTLFLAG_RW,
+    &max_pacer_loops, 10,
+    "What is the maximum number of times the pacer will loop trying to catch up");
+
+#define HPTS_MAX_SLEEP_ALLOWED (NUM_OF_HPTSI_SLOTS/2)
+
+static uint32_t hpts_sleep_max = HPTS_MAX_SLEEP_ALLOWED;
+
+
+static int
+sysctl_net_inet_tcp_hpts_max_sleep(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	uint32_t new;
+
+	new = hpts_sleep_max;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		if ((new < (NUM_OF_HPTSI_SLOTS / 4)) ||
+		    (new > HPTS_MAX_SLEEP_ALLOWED)) 
+			error = EINVAL;
+		else
+			hpts_sleep_max = new;
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp_hpts, OID_AUTO, maxsleep,
+    CTLTYPE_UINT | CTLFLAG_RW,
     &hpts_sleep_max, 0,
-    "The maximum time the hpts will sleep <1 - 254>");
+    &sysctl_net_inet_tcp_hpts_max_sleep, "IU",
+    "Maximum time hpts will sleep");
 
 SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, minsleep, CTLFLAG_RW,
     &tcp_min_hptsi_time, 0,
@@ -267,55 +307,35 @@
     "Do we have the callout call directly to the hpts?");
 
 static void
-__tcp_hpts_log_it(struct tcp_hpts_entry *hpts, struct inpcb *inp, int event, uint32_t slot,
-    uint32_t ticknow, int32_t line)
+tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
+	     int ticks_to_run, int idx)
 {
-	struct hpts_log *pl;
-
-	HPTS_MTX_ASSERT(hpts);
-	if (hpts->p_log == NULL)
-		return;
-	pl = &hpts->p_log[hpts->p_log_at];
-	hpts->p_log_at++;
-	if (hpts->p_log_at >= hpts->p_logsize) {
-		hpts->p_log_at = 0;
-		hpts->p_log_wrapped = 1;
-	}
-	pl->inp = inp;
-	if (inp) {
-		pl->t_paceslot = inp->inp_hptsslot;
-		pl->t_hptsreq = inp->inp_hpts_request;
-		pl->p_onhpts = inp->inp_in_hpts;
-		pl->p_oninput = inp->inp_in_input;
-	} else {
-		pl->t_paceslot = 0;
-		pl->t_hptsreq = 0;
-		pl->p_onhpts = 0;
-		pl->p_oninput = 0;
-	}
-	pl->is_notempty = 1;
-	pl->event = event;
-	pl->line = line;
-	pl->cts = tcp_get_usecs(NULL);
-	pl->p_curtick = hpts->p_curtick;
-	pl->p_prevtick = hpts->p_prevtick;
-	pl->p_on_queue_cnt = hpts->p_on_queue_cnt;
-	pl->ticknow = ticknow;
-	pl->slot_req = slot;
-	pl->p_nxt_slot = hpts->p_nxt_slot;
-	pl->p_cur_slot = hpts->p_cur_slot;
-	pl->p_hpts_sleep_time = hpts->p_hpts_sleep_time;
-	pl->p_flags = (hpts->p_cpu & 0x7f);
-	pl->p_flags <<= 7;
-	pl->p_flags |= (hpts->p_num & 0x7f);
-	pl->p_flags <<= 2;
-	if (hpts->p_hpts_active) {
-		pl->p_flags |= HPTS_HPTS_ACTIVE;
-	}
+	union tcp_log_stackspecific log;
+	
+	memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+	log.u_bbr.flex1 = hpts->p_nxt_slot;
+	log.u_bbr.flex2 = hpts->p_cur_slot;
+	log.u_bbr.flex3 = hpts->p_prev_slot;
+	log.u_bbr.flex4 = idx;
+	log.u_bbr.flex5 = hpts->p_curtick;
+	log.u_bbr.flex6 = hpts->p_on_queue_cnt;
+	log.u_bbr.use_lt_bw = 1;
+	log.u_bbr.inflight = ticks_to_run;
+	log.u_bbr.applimited = hpts->overidden_sleep;
+	log.u_bbr.delivered = hpts->saved_curtick;
+	log.u_bbr.timeStamp = tcp_tv_to_usectick(tv);
+	log.u_bbr.epoch = hpts->saved_curslot;
+	log.u_bbr.lt_epoch = hpts->saved_prev_slot;
+	log.u_bbr.pkts_out = hpts->p_delayed_by;
+	log.u_bbr.lost = hpts->p_hpts_sleep_time;
+	log.u_bbr.cur_del_rate = hpts->p_runningtick;
+	TCP_LOG_EVENTP(tp, NULL,
+		       &tp->t_inpcb->inp_socket->so_rcv,
+		       &tp->t_inpcb->inp_socket->so_snd,
+		       BBR_LOG_HPTSDIAG, 0,
+		       0, &log, false, tv);
 }
 
-#define tcp_hpts_log_it(a, b, c, d, e) __tcp_hpts_log_it(a, b, c, d, e, __LINE__)
-
 static void
 hpts_timeout_swi(void *arg)
 {
@@ -347,12 +367,6 @@
 		/* We are not on the hpts? */
 		panic("%s: hpts:%p inp:%p not on the hpts?", __FUNCTION__, hpts, inp);
 	}
-	if (TAILQ_EMPTY(head) &&
-	    (hpts->p_on_queue_cnt != 0)) {
-		/* We should not be empty with a queue count */
-		panic("%s hpts:%p hpts bucket empty but cnt:%d",
-		    __FUNCTION__, hpts, hpts->p_on_queue_cnt);
-	}
 #endif
 	TAILQ_REMOVE(head, inp, inp_hpts);
 	hpts->p_on_queue_cnt--;
@@ -456,58 +470,13 @@
 	in_pcbref(inp);
 }
 
-static int
-sysctl_tcp_hpts_log(SYSCTL_HANDLER_ARGS)
-{
-	struct tcp_hpts_entry *hpts;
-	size_t sz;
-	int32_t logging_was, i;
-	int32_t error = 0;
-
-	/*
-	 * HACK: Turn off logging so no locks are required this really needs
-	 * a memory barrier :)
-	 */
-	logging_was = logging_on;
-	logging_on = 0;
-	if (!req->oldptr) {
-		/* How much? */
-		sz = 0;
-		for (i = 0; i < tcp_pace.rp_num_hptss; i++) {
-			hpts = tcp_pace.rp_ent[i];
-			if (hpts->p_log == NULL)
-				continue;
-			sz += (sizeof(struct hpts_log) * hpts->p_logsize);
-		}
-		error = SYSCTL_OUT(req, 0, sz);
-	} else {
-		for (i = 0; i < tcp_pace.rp_num_hptss; i++) {
-			hpts = tcp_pace.rp_ent[i];
-			if (hpts->p_log == NULL)
-				continue;
-			if (hpts->p_log_wrapped)
-				sz = (sizeof(struct hpts_log) * hpts->p_logsize);
-			else
-				sz = (sizeof(struct hpts_log) * hpts->p_log_at);
-			error = SYSCTL_OUT(req, hpts->p_log, sz);
-		}
-	}
-	logging_on = logging_was;
-	return error;
-}
-
-SYSCTL_PROC(_net_inet_tcp_hpts, OID_AUTO, log, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
-    0, 0, sysctl_tcp_hpts_log, "A", "tcp hptsi log");
-
-
 static void
 tcp_wakehpts(struct tcp_hpts_entry *hpts)
 {
 	HPTS_MTX_ASSERT(hpts);
-	swi_sched(hpts->ie_cookie, 0);
-	if (hpts->p_hpts_active == 2) {
-		/* Rare sleeping on a ENOBUF */
-		wakeup_one(hpts);
+	if (hpts->p_hpts_wake_scheduled == 0) {
+		hpts->p_hpts_wake_scheduled = 1;
+		swi_sched(hpts->ie_cookie, 0);
 	}
 }
 
@@ -515,10 +484,9 @@
 tcp_wakeinput(struct tcp_hpts_entry *hpts)
 {
 	HPTS_MTX_ASSERT(hpts);
-	swi_sched(hpts->ie_cookie, 0);
-	if (hpts->p_hpts_active == 2) {
-		/* Rare sleeping on a ENOBUF */
-		wakeup_one(hpts);
+	if (hpts->p_hpts_wake_scheduled == 0) {
+		hpts->p_hpts_wake_scheduled = 1;
+		swi_sched(hpts->ie_cookie, 0);
 	}
 }
 
@@ -648,8 +616,8 @@
  * Valid values in the flags are
  * HPTS_REMOVE_OUTPUT - remove from the output of the hpts.
  * HPTS_REMOVE_INPUT - remove from the input of the hpts.
- * Note that you can or both values together and get two
- * actions.
+ * Note that you can use one or both values together 
+ * and get two actions.
  */
 void
 __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line)
@@ -670,53 +638,198 @@
 }
 
 static inline int
-hpts_tick(struct tcp_hpts_entry *hpts, int32_t plus)
+hpts_tick(uint32_t wheel_tick, uint32_t plus)
 {
-	return ((hpts->p_prevtick + plus) % NUM_OF_HPTSI_SLOTS);
+	/*
+	 * Given a slot on the wheel, what slot
+	 * is that plus ticks out?
+	 */
+	KASSERT(wheel_tick < NUM_OF_HPTSI_SLOTS, ("Invalid tick %u not on wheel", wheel_tick));
+	return ((wheel_tick + plus) % NUM_OF_HPTSI_SLOTS);
 }
 
+static inline int
+tick_to_wheel(uint32_t cts_in_wticks)
+{
+	/* 
+	 * Given a timestamp in wheel ticks (10usec inc's)
+	 * map it to our limited space wheel.
+	 */
+	return (cts_in_wticks % NUM_OF_HPTSI_SLOTS);
+}
+
+static inline int
+hpts_ticks_diff(int prev_tick, int tick_now)
+{
+	/*
+	 * Given two ticks that are someplace
+	 * on our wheel. How far are they apart?
+	 */
+	if (tick_now > prev_tick)
+		return (tick_now - prev_tick);
+	else if (tick_now == prev_tick)
+		/* 
+		 * Special case, same means we can go all of our 
+		 * wheel less one slot.
+		 */
+		return (NUM_OF_HPTSI_SLOTS - 1);
+	else
+		return ((NUM_OF_HPTSI_SLOTS - prev_tick) + tick_now);
+}
+
+/*
+ * Given a tick on the wheel that is the current time
+ * mapped to the wheel (wheel_tick), what is the maximum
+ * distance forward that can be obtained without
+ * wrapping past either prev_tick or running_tick
+ * depending on the htps state? Also if passed
+ * a uint32_t *, fill it with the tick location.
+ *
+ * Note if you do not give this function the current
+ * time (that you think it is) mapped to the wheel 
+ * then the results will not be what you expect and
+ * could lead to invalid inserts.
+ */
+static inline int32_t
+max_ticks_available(struct tcp_hpts_entry *hpts, uint32_t wheel_tick, uint32_t *target_tick)
+{
+	uint32_t dis_to_travel, end_tick, pacer_to_now, avail_on_wheel;
+
+	if ((hpts->p_hpts_active == 1) &&
+	    (hpts->p_wheel_complete == 0)) {
+		end_tick = hpts->p_runningtick;
+		/* Back up one tick */
+		if (end_tick == 0)
+			end_tick = NUM_OF_HPTSI_SLOTS - 1;
+		else
+			end_tick--;
+		if (target_tick)
+			*target_tick = end_tick;
+	} else {
+		/*
+		 * For the case where we are
+		 * not active, or we have
+		 * completed the pass over
+		 * the wheel, we can use the
+		 * prev tick and subtract one from it. This puts us
+		 * as far out as possible on the wheel.
+		 */
+		end_tick = hpts->p_prev_slot;
+		if (end_tick == 0)
+			end_tick = NUM_OF_HPTSI_SLOTS - 1;
+		else
+			end_tick--;
+		if (target_tick)
+			*target_tick = end_tick;
+		/* 
+		 * Now we have close to the full wheel left minus the 
+		 * time it has been since the pacer went to sleep. Note
+		 * that wheel_tick, passed in, should be the current time
+		 * from the perspective of the caller, mapped to the wheel.
+		 */
+		if (hpts->p_prev_slot != wheel_tick)
+			dis_to_travel = hpts_ticks_diff(hpts->p_prev_slot, wheel_tick);
+		else
+			dis_to_travel = 1;
+		/* 
+		 * dis_to_travel in this case is the space from when the 
+		 * pacer stopped (p_prev_slot) and where our wheel_tick 
+		 * is now. To know how many slots we can put it in we 
+		 * subtract from the wheel size. We would not want
+		 * to place something after p_prev_slot or it will
+		 * get ran too soon.
+		 */
+		return (NUM_OF_HPTSI_SLOTS - dis_to_travel);
+	}
+	/* 
+	 * So how many slots are open between p_runningtick -> p_cur_slot 
+	 * that is what is currently un-available for insertion. Special
+	 * case when we are at the last slot, this gets 1, so that
+	 * the answer to how many slots are available is all but 1.
+	 */
+	if (hpts->p_runningtick == hpts->p_cur_slot)
+		dis_to_travel = 1;
+	else
+		dis_to_travel = hpts_ticks_diff(hpts->p_runningtick, hpts->p_cur_slot);
+	/* 
+	 * How long has the pacer been running?
+	 */
+	if (hpts->p_cur_slot != wheel_tick) {
+		/* The pacer is a bit late */
+		pacer_to_now = hpts_ticks_diff(hpts->p_cur_slot, wheel_tick);
+	} else {
+		/* The pacer is right on time, now == pacers start time */
+		pacer_to_now = 0;
+	}
+	/* 
+	 * To get the number left we can insert into we simply
+	 * subract the distance the pacer has to run from how
+	 * many slots there are.
+	 */
+	avail_on_wheel = NUM_OF_HPTSI_SLOTS - dis_to_travel;
+	/* 
+	 * Now how many of those we will eat due to the pacer's 
+	 * time (p_cur_slot) of start being behind the 
+	 * real time (wheel_tick)?
+	 */
+	if (avail_on_wheel <= pacer_to_now) {
+		/* 
+		 * Wheel wrap, we can't fit on the wheel, that
+		 * is unusual the system must be way overloaded!
+		 * Insert into the assured tick, and return special
+		 * "0".
+		 */
+		counter_u64_add(combined_wheel_wrap, 1);
+		*target_tick = hpts->p_nxt_slot;
+		return (0);
+	} else {
+		/* 
+		 * We know how many slots are open
+		 * on the wheel (the reverse of what
+		 * is left to run. Take away the time
+		 * the pacer started to now (wheel_tick)
+		 * and that tells you how many slots are
+		 * open that can be inserted into that won't
+		 * be touched by the pacer until later.
+		 */
+		return (avail_on_wheel - pacer_to_now);
+	}
+}
+
 static int
 tcp_queue_to_hpts_immediate_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line, int32_t noref)
 {
-	int32_t need_wake = 0;
-	uint32_t ticknow = 0;
-
+	uint32_t need_wake = 0;
+	
 	HPTS_MTX_ASSERT(hpts);
 	if (inp->inp_in_hpts == 0) {
 		/* Ok we need to set it on the hpts in the current slot */
-		if (hpts->p_hpts_active == 0) {
-			/* A sleeping hpts we want in next slot to run */
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_INSERT_SLEEPER, 0,
-				    hpts_tick(hpts, 1));
-			}
-			inp->inp_hptsslot = hpts_tick(hpts, 1);
-			inp->inp_hpts_request = 0;
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_SLEEP_BEFORE, 1, ticknow);
-			}
-			need_wake = 1;
+		inp->inp_hpts_request = 0;
+		if ((hpts->p_hpts_active == 0) ||
+		    (hpts->p_wheel_complete)) {
+			/*
+			 * A sleeping hpts we want in next slot to run 
+			 * note that in this state p_prev_slot == p_cur_slot
+			 */
+			inp->inp_hptsslot = hpts_tick(hpts->p_prev_slot, 1);
+			if ((hpts->p_on_min_sleep == 0) && (hpts->p_hpts_active == 0))
+				need_wake = 1;
 		} else if ((void *)inp == hpts->p_inp) {
 			/*
+			 * The hpts system is running and the caller
+			 * was awoken by the hpts system. 
 			 * We can't allow you to go into the same slot we
-			 * are in. We must put you out.
+			 * are in (we don't want a loop :-D).
 			 */
 			inp->inp_hptsslot = hpts->p_nxt_slot;
 		} else
-			inp->inp_hptsslot = hpts->p_cur_slot;
+			inp->inp_hptsslot = hpts->p_runningtick;
 		hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, noref);
-		inp->inp_hpts_request = 0;
-		if (logging_on) {
-			tcp_hpts_log_it(hpts, inp, HPTSLOG_IMMEDIATE, 0, 0);
-		}
 		if (need_wake) {
 			/*
 			 * Activate the hpts if it is sleeping and its
 			 * timeout is not 1.
 			 */
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_WAKEUP_HPTS, 0, ticknow);
-			}
 			hpts->p_direct_wake = 1;
 			tcp_wakehpts(hpts);
 		}
@@ -737,141 +850,129 @@
 	return (ret);
 }
 
+#ifdef INVARIANTS
 static void
-tcp_hpts_insert_locked(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t slot, uint32_t cts, int32_t line,
-    struct hpts_diag *diag, int32_t noref)
+check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t inp_hptsslot, int line)
 {
-	int32_t need_new_to = 0;
-	int32_t need_wakeup = 0;
-	uint32_t largest_slot;
-	uint32_t ticknow = 0;
-	uint32_t slot_calc;
+	/*
+	 * Sanity checks for the pacer with invariants 
+	 * on insert.
+	 */
+	if (inp_hptsslot >= NUM_OF_HPTSI_SLOTS)
+		panic("hpts:%p inp:%p slot:%d > max",
+		      hpts, inp, inp_hptsslot);
+	if ((hpts->p_hpts_active) &&
+	    (hpts->p_wheel_complete == 0)) {
+		/* 
+		 * If the pacer is processing a arc
+		 * of the wheel, we need to make
+		 * sure we are not inserting within
+		 * that arc.
+		 */
+		int distance, yet_to_run;
 
+		distance = hpts_ticks_diff(hpts->p_runningtick, inp_hptsslot);
+		if (hpts->p_runningtick != hpts->p_cur_slot)
+			yet_to_run = hpts_ticks_diff(hpts->p_runningtick, hpts->p_cur_slot);
+		else
+			yet_to_run = 0;	/* processing last slot */
+		if (yet_to_run > distance) {
+			panic("hpts:%p inp:%p slot:%d distance:%d yet_to_run:%d rs:%d cs:%d",
+			      hpts, inp, inp_hptsslot,
+			      distance, yet_to_run,
+			      hpts->p_runningtick, hpts->p_cur_slot);
+		}
+	}
+}
+#endif
+
+static void
+tcp_hpts_insert_locked(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t slot, int32_t line,
+		       struct hpts_diag *diag, struct timeval *tv)
+{
+	uint32_t need_new_to = 0;
+	uint32_t wheel_cts, last_tick;
+	int32_t wheel_tick, maxticks;
+	int8_t need_wakeup = 0;
+
 	HPTS_MTX_ASSERT(hpts);
 	if (diag) {
 		memset(diag, 0, sizeof(struct hpts_diag));
 		diag->p_hpts_active = hpts->p_hpts_active;
+		diag->p_prev_slot = hpts->p_prev_slot;
+		diag->p_runningtick = hpts->p_runningtick;
 		diag->p_nxt_slot = hpts->p_nxt_slot;
 		diag->p_cur_slot = hpts->p_cur_slot;
+		diag->p_curtick = hpts->p_curtick;
+		diag->p_lasttick = hpts->p_lasttick;
 		diag->slot_req = slot;
+		diag->p_on_min_sleep = hpts->p_on_min_sleep;
+		diag->hpts_sleep_time = hpts->p_hpts_sleep_time;
 	}
-	if ((inp->inp_in_hpts == 0) || noref) {
-		inp->inp_hpts_request = slot;
+	if (inp->inp_in_hpts == 0) {
 		if (slot == 0) {
 			/* Immediate */
-			tcp_queue_to_hpts_immediate_locked(inp, hpts, line, noref);
+			tcp_queue_to_hpts_immediate_locked(inp, hpts, line, 0);
 			return;
 		}
-		if (hpts->p_hpts_active) {
-			/*
-			 * Its slot - 1 since nxt_slot is the next tick that
-			 * will go off since the hpts is awake
-			 */
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_INSERT_NORMAL, slot, 0);
-			}
-			/*
-			 * We want to make sure that we don't place a inp in
-			 * the range of p_cur_slot <-> p_nxt_slot. If we
-			 * take from p_nxt_slot to the end, plus p_cur_slot
-			 * and then take away 2, we will know how many is
-			 * the max slots we can use.
-			 */
-			if (hpts->p_nxt_slot > hpts->p_cur_slot) {
-				/*
-				 * Non-wrap case nxt_slot <-> cur_slot we
-				 * don't want to land in. So the diff gives
-				 * us what is taken away from the number of
-				 * slots.
+		/* Get the current time relative to the wheel */
+		wheel_cts = tcp_tv_to_hptstick(tv);
+		/* Map it onto the wheel */
+		wheel_tick = tick_to_wheel(wheel_cts);
+		/* Now what's the max we can place it at? */
+		maxticks = max_ticks_available(hpts, wheel_tick, &last_tick);
+		if (diag) {
+			diag->wheel_tick = wheel_tick;
+			diag->maxticks = maxticks;
+			diag->wheel_cts = wheel_cts;
+		}
+		if (maxticks == 0) {
+			/* The pacer is in a wheel wrap behind, yikes! */
+			if (slot > 1) {
+				/* 
+				 * Reduce by 1 to prevent a forever loop in
+				 * case something else is wrong. Note this
+				 * probably does not hurt because the pacer
+				 * if its true is so far behind we will be
+				 * > 1second late calling anyway.
 				 */
-				largest_slot = NUM_OF_HPTSI_SLOTS - (hpts->p_nxt_slot - hpts->p_cur_slot);
-			} else if (hpts->p_nxt_slot == hpts->p_cur_slot) {
-				largest_slot = NUM_OF_HPTSI_SLOTS - 2;
-			} else {
-				/*
-				 * Wrap case so the diff gives us the number
-				 * of slots that we can land in.
-				 */
-				largest_slot = hpts->p_cur_slot - hpts->p_nxt_slot;
+				slot--;
 			}
-			/*
-			 * We take away two so we never have a problem (20
-			 * usec's) out of 1024000 usecs
-			 */
-			largest_slot -= 2;
-			if (inp->inp_hpts_request > largest_slot) {
-				/*
-				 * Restrict max jump of slots and remember
-				 * leftover
-				 */
-				slot = largest_slot;
-				inp->inp_hpts_request -= largest_slot;
-			} else {
-				/* This one will run when we hit it */
-				inp->inp_hpts_request = 0;
-			}
-			if (hpts->p_nxt_slot == hpts->p_cur_slot)
-				slot_calc = (hpts->p_nxt_slot + slot) % NUM_OF_HPTSI_SLOTS;
-			else
-				slot_calc = (hpts->p_nxt_slot + slot - 1) % NUM_OF_HPTSI_SLOTS;
-			if (slot_calc == hpts->p_cur_slot) {
+			inp->inp_hptsslot = last_tick;
+			inp->inp_hpts_request = slot;
+		} else 	if (maxticks >= slot) {
+			/* It all fits on the wheel */
+			inp->inp_hpts_request = 0;
+			inp->inp_hptsslot = hpts_tick(wheel_tick, slot);
+		} else {
+			/* It does not fit */
+			inp->inp_hpts_request = slot - maxticks;
+			inp->inp_hptsslot = last_tick;
+		}
+		if (diag) {
+			diag->slot_remaining = inp->inp_hpts_request;
+			diag->inp_hptsslot = inp->inp_hptsslot;
+		}
 #ifdef INVARIANTS
-				/* TSNH */
-				panic("Hpts:%p impossible slot calculation slot_calc:%u slot:%u largest:%u\n",
-				    hpts, slot_calc, slot, largest_slot);
+		check_if_slot_would_be_wrong(hpts, inp, inp->inp_hptsslot, line);
 #endif
-				if (slot_calc)
-					slot_calc--;
-				else
-					slot_calc = NUM_OF_HPTSI_SLOTS - 1;
-			}
-			inp->inp_hptsslot = slot_calc;
-			if (diag) {
-				diag->inp_hptsslot = inp->inp_hptsslot;
-			}
-		} else {
+		hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, 0);
+		if ((hpts->p_hpts_active == 0) &&
+		    (inp->inp_hpts_request == 0) &&
+		    (hpts->p_on_min_sleep == 0)) {
 			/*
-			 * The hpts is sleeping, we need to figure out where
+			 * The hpts is sleeping and not on a minimum
+			 * sleep time, we need to figure out where
 			 * it will wake up at and if we need to reschedule
 			 * its time-out.
 			 */
 			uint32_t have_slept, yet_to_sleep;
-			uint32_t slot_now;
-			struct timeval tv;
 
-			ticknow = tcp_gethptstick(&tv);
-			slot_now = ticknow % NUM_OF_HPTSI_SLOTS;
-			/*
-			 * The user wants to be inserted at (slot_now +
-			 * slot) % NUM_OF_HPTSI_SLOTS, so lets set that up.
-			 */
-			largest_slot = NUM_OF_HPTSI_SLOTS - 2;
-			if (inp->inp_hpts_request > largest_slot) {
-				/* Adjust the residual in inp_hpts_request */
-				slot = largest_slot;
-				inp->inp_hpts_request -= largest_slot;
-			} else {
-				/* No residual it all fits */
-				inp->inp_hpts_request = 0;
-			}
-			inp->inp_hptsslot = (slot_now + slot) % NUM_OF_HPTSI_SLOTS;
-			if (diag) {
-				diag->slot_now = slot_now;
-				diag->inp_hptsslot = inp->inp_hptsslot;
-				diag->p_on_min_sleep = hpts->p_on_min_sleep;
-			}
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_INSERT_SLEEPER, slot, ticknow);
-			}
 			/* Now do we need to restart the hpts's timer? */
-			if (TSTMP_GT(ticknow, hpts->p_curtick))
-				have_slept = ticknow - hpts->p_curtick;
-			else
-				have_slept = 0;
-			if (have_slept < hpts->p_hpts_sleep_time) {
-				/* This should be what happens */
+			have_slept = hpts_ticks_diff(hpts->p_prev_slot, wheel_tick);
+			if (have_slept < hpts->p_hpts_sleep_time)
 				yet_to_sleep = hpts->p_hpts_sleep_time - have_slept;
-			} else {
+			else {
 				/* We are over-due */
 				yet_to_sleep = 0;
 				need_wakeup = 1;
@@ -879,29 +980,22 @@
 			if (diag) {
 				diag->have_slept = have_slept;
 				diag->yet_to_sleep = yet_to_sleep;
-				diag->hpts_sleep_time = hpts->p_hpts_sleep_time;
 			}
-			if ((hpts->p_on_min_sleep == 0) && (yet_to_sleep > slot)) {
+			if (yet_to_sleep &&
+			    (yet_to_sleep > slot)) {
 				/*
-				 * We need to reschedule the hptss time-out.
+				 * We need to reschedule the hpts's time-out.
 				 */
 				hpts->p_hpts_sleep_time = slot;
 				need_new_to = slot * HPTS_TICKS_PER_USEC;
 			}
 		}
-		hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, noref);
-		if (logging_on) {
-			tcp_hpts_log_it(hpts, inp, HPTSLOG_INSERTED, slot, ticknow);
-		}
 		/*
 		 * Now how far is the hpts sleeping to? if active is 1, its
 		 * up and ticking we do nothing, otherwise we may need to
 		 * reschedule its callout if need_new_to is set from above.
 		 */
 		if (need_wakeup) {
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_RESCHEDULE, 1, 0);
-			}
 			hpts->p_direct_wake = 1;
 			tcp_wakehpts(hpts);
 			if (diag) {
@@ -944,9 +1038,10 @@
 }
 
 uint32_t
-tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag){
+tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag)
+{
 	struct tcp_hpts_entry *hpts;
-	uint32_t slot_on, cts;
+	uint32_t slot_on;
 	struct timeval tv;
 
 	/*
@@ -956,12 +1051,8 @@
 	 */
 	INP_WLOCK_ASSERT(inp);
 	hpts = tcp_hpts_lock(inp);
-	if (in_ts_percision)
-		microuptime(&tv);
-	else
-		getmicrouptime(&tv);
-	cts = tcp_tv_to_usectick(&tv);
-	tcp_hpts_insert_locked(hpts, inp, slot, cts, line, diag, 0);
+	microuptime(&tv);
+	tcp_hpts_insert_locked(hpts, inp, slot, line, diag, &tv);
 	slot_on = hpts->p_nxt_slot;
 	mtx_unlock(&hpts->p_mtx);
 	return (slot_on);
@@ -971,7 +1062,6 @@
 __tcp_hpts_insert(struct inpcb *inp, uint32_t slot, int32_t line){
 	return (tcp_hpts_insert_diag(inp, slot, line, NULL));
 }
-
 int
 __tcp_queue_to_input_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line)
 {
@@ -986,9 +1076,6 @@
 			/*
 			 * Activate the hpts if it is sleeping.
 			 */
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_WAKEUP_INPUT, 0, 0);
-			}
 			retval = 2;
 			hpts->p_direct_wake = 1;
 			tcp_wakeinput(hpts);
@@ -1001,36 +1088,14 @@
 	return (retval);
 }
 
-void
-tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
-    int32_t tlen, int32_t drop_hdrlen, uint8_t iptos)
-{
-	/* Setup packet for input first */
-	INP_WLOCK_ASSERT(tp->t_inpcb);
-	m->m_pkthdr.pace_thoff = (uint16_t) ((caddr_t)th - mtod(m, caddr_t));
-	m->m_pkthdr.pace_tlen = (uint16_t) tlen;
-	m->m_pkthdr.pace_drphdrlen = drop_hdrlen;
-	m->m_pkthdr.pace_tos = iptos;
-	m->m_pkthdr.pace_lock = (curthread->td_epochnest != 0);
-	if (tp->t_in_pkt == NULL) {
-		tp->t_in_pkt = m;
-		tp->t_tail_pkt = m;
-	} else {
-		tp->t_tail_pkt->m_nextpkt = m;
-		tp->t_tail_pkt = m;
-	}
-}
-
-
 int32_t
-__tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
-    int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, int32_t line){
+__tcp_queue_to_input(struct inpcb *inp, int line)
+{
 	struct tcp_hpts_entry *hpts;
 	int32_t ret;
 
-	tcp_queue_pkt_to_input(tp, m, th, tlen, drop_hdrlen, iptos);
-	hpts = tcp_input_lock(tp->t_inpcb);
-	ret = __tcp_queue_to_input_locked(tp->t_inpcb, hpts, line);
+	hpts = tcp_input_lock(inp);
+	ret = __tcp_queue_to_input_locked(inp, hpts, line);
 	mtx_unlock(&hpts->p_mtx);
 	return (ret);
 }
@@ -1132,6 +1197,25 @@
 #endif
 }
 
+static void
+tcp_drop_in_pkts(struct tcpcb *tp)
+{
+	struct mbuf *m, *n;
+	
+	m = tp->t_in_pkt;
+	if (m)
+		n = m->m_nextpkt;
+	else
+		n = NULL;
+	tp->t_in_pkt = NULL;
+	while (m) {
+		m_freem(m);
+		m = n;
+		if (m)
+			n = m->m_nextpkt;
+	}
+}
+
 /*
  * Do NOT try to optimize the processing of inp's
  * by first pulling off all the inp's into a temporary
@@ -1142,7 +1226,7 @@
  * but then while you were processing one of the inp's
  * some other one that you switch will get a new
  * packet on the different CPU. It will insert it
- * on the new hptss input list. Creating a temporary
+ * on the new hpts's input list. Creating a temporary
  * link in the inp will not fix it either, since
  * the other hpts will be doing the same thing and
  * you will both end up using the temporary link.
@@ -1155,16 +1239,18 @@
 static void
 tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
 {
-	struct mbuf *m, *n;
 	struct tcpcb *tp;
 	struct inpcb *inp;
 	uint16_t drop_reason;
 	int16_t set_cpu;
 	uint32_t did_prefetch = 0;
-	int32_t ti_locked = TI_UNLOCKED;
+	int dropped;
 	struct epoch_tracker et;
 
 	HPTS_MTX_ASSERT(hpts);
+#ifndef VIMAGE
+	INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+#endif
 	while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) {
 		HPTS_MTX_ASSERT(hpts);
 		hpts_sane_input_remove(hpts, inp, 0);
@@ -1177,26 +1263,22 @@
 		drop_reason = inp->inp_hpts_drop_reas;
 		inp->inp_in_input = 0;
 		mtx_unlock(&hpts->p_mtx);
-		CURVNET_SET(inp->inp_vnet);
-		if (drop_reason) {
-			INP_INFO_RLOCK_ET(&V_tcbinfo, et);
-			ti_locked = TI_RLOCKED;
-		} else {
-			ti_locked = TI_UNLOCKED;
-		}
 		INP_WLOCK(inp);
+#ifdef VIMAGE
+		CURVNET_SET(inp->inp_vnet);
+		INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+#endif
 		if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
 		    (inp->inp_flags2 & INP_FREED)) {
 out:
 			hpts->p_inp = NULL;
-			if (ti_locked == TI_RLOCKED) {
-				INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
-			}
 			if (in_pcbrele_wlocked(inp) == 0) {
 				INP_WUNLOCK(inp);
 			}
-			ti_locked = TI_UNLOCKED;
+#ifdef VIMAGE
+			INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 			CURVNET_RESTORE();
+#endif
 			mtx_lock(&hpts->p_mtx);
 			continue;
 		}
@@ -1206,26 +1288,17 @@
 		}
 		if (drop_reason) {
 			/* This tcb is being destroyed for drop_reason */
-			m = tp->t_in_pkt;
-			if (m)
-				n = m->m_nextpkt;
-			else
-				n = NULL;
-			tp->t_in_pkt = NULL;
-			while (m) {
-				m_freem(m);
-				m = n;
-				if (m)
-					n = m->m_nextpkt;
-			}
+			tcp_drop_in_pkts(tp);
 			tp = tcp_drop(tp, drop_reason);
-			INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 			if (tp == NULL) {
 				INP_WLOCK(inp);
 			}
 			if (in_pcbrele_wlocked(inp) == 0)
 				INP_WUNLOCK(inp);
+#ifdef VIMAGE
+			INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 			CURVNET_RESTORE();
+#endif
 			mtx_lock(&hpts->p_mtx);
 			continue;
 		}
@@ -1246,220 +1319,184 @@
 			 */
 			tcp_set_hpts(inp);
 		}
-		m = tp->t_in_pkt;
-		n = NULL;
-		if (m != NULL &&
-		    (m->m_pkthdr.pace_lock == TI_RLOCKED ||
-		    tp->t_state != TCPS_ESTABLISHED)) {
-			ti_locked = TI_RLOCKED;
-			INP_INFO_RLOCK_ET(&V_tcbinfo, et);
-			m = tp->t_in_pkt;
-		}
-		if (in_newts_every_tcb) {
-			if (in_ts_percision)
-				microuptime(tv);
-			else
-				getmicrouptime(tv);
-		}
 		if (tp->t_fb_ptr != NULL) {
 			kern_prefetch(tp->t_fb_ptr, &did_prefetch);
 			did_prefetch = 1;
 		}
-		/* Any input work to do, if so do it first */
-		if ((m != NULL) && (m == tp->t_in_pkt)) {
-			struct tcphdr *th;
-			int32_t tlen, drop_hdrlen, nxt_pkt;
-			uint8_t iptos;
-
-			n = m->m_nextpkt;
-			tp->t_in_pkt = tp->t_tail_pkt = NULL;
-			while (m) {
-				th = (struct tcphdr *)(mtod(m, caddr_t)+m->m_pkthdr.pace_thoff);
-				tlen = m->m_pkthdr.pace_tlen;
-				drop_hdrlen = m->m_pkthdr.pace_drphdrlen;
-				iptos = m->m_pkthdr.pace_tos;
-				m->m_nextpkt = NULL;
-				if (n)
-					nxt_pkt = 1;
-				else
-					nxt_pkt = 0;
-				inp->inp_input_calls = 1;
-				if (tp->t_fb->tfb_tcp_hpts_do_segment) {
-					/* Use the hpts specific do_segment */
-					(*tp->t_fb->tfb_tcp_hpts_do_segment) (m, th, inp->inp_socket,
-					    tp, drop_hdrlen,
-					    tlen, iptos, nxt_pkt, tv);
-				} else {
-					/* Use the default do_segment */
-					(*tp->t_fb->tfb_tcp_do_segment) (m, th, inp->inp_socket,
-					    tp, drop_hdrlen,
-						tlen, iptos);
-				}
-				if (ti_locked == TI_RLOCKED)
-					INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
-				/*
-				 * Do segment returns unlocked we need the
-				 * lock again but we also need some kasserts
-				 * here.
-				 */
-				INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
-				INP_UNLOCK_ASSERT(inp);
-				m = n;
-				if (m)
-					n = m->m_nextpkt;
-				if (m != NULL &&
-				    m->m_pkthdr.pace_lock == TI_RLOCKED) {
-					INP_INFO_RLOCK_ET(&V_tcbinfo, et);
-					ti_locked = TI_RLOCKED;
-				} else
-					ti_locked = TI_UNLOCKED;
+		if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) && tp->t_in_pkt) {
+			if (inp->inp_in_input)
+				tcp_hpts_remove(inp, HPTS_REMOVE_INPUT);
+			dropped = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0);
+			if (dropped) {
+				/* Re-acquire the wlock so we can release the reference */
 				INP_WLOCK(inp);
-				/*
-				 * Since we have an opening here we must
-				 * re-check if the tcb went away while we
-				 * were getting the lock(s).
-				 */
-				if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
-				    (inp->inp_flags2 & INP_FREED)) {
-					while (m) {
-						m_freem(m);
-						m = n;
-						if (m)
-							n = m->m_nextpkt;
-					}
-					goto out;
-				}
-				/*
-				 * Now that we hold the INP lock, check if
-				 * we need to upgrade our lock.
-				 */
-				if (ti_locked == TI_UNLOCKED &&
-				    (tp->t_state != TCPS_ESTABLISHED)) {
-					ti_locked = TI_RLOCKED;
-					INP_INFO_RLOCK_ET(&V_tcbinfo, et);
-				}
-			}	/** end while(m) */
-		}		/** end if ((m != NULL)  && (m == tp->t_in_pkt)) */
+			}
+		} else if (tp->t_in_pkt) {
+			/* 
+			 * We reach here only if we had a 
+			 * stack that supported INP_SUPPORTS_MBUFQ
+			 * and then somehow switched to a stack that
+			 * does not. The packets are basically stranded
+			 * and would hang with the connection until
+			 * cleanup without this code. Its not the
+			 * best way but I know of no other way to
+			 * handle it since the stack needs functions
+			 * it does not have to handle queued packets.
+			 */
+			tcp_drop_in_pkts(tp);
+		}
 		if (in_pcbrele_wlocked(inp) == 0)
 			INP_WUNLOCK(inp);
-		if (ti_locked == TI_RLOCKED)
-			INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
-		INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
 		INP_UNLOCK_ASSERT(inp);
-		ti_locked = TI_UNLOCKED;
+#ifdef VIMAGE
+		INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+		CURVNET_RESTORE();
+#endif
 		mtx_lock(&hpts->p_mtx);
 		hpts->p_inp = NULL;
-		CURVNET_RESTORE();
 	}
+#ifndef VIMAGE
+	INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+#endif
 }
 
-static int
-tcp_hpts_est_run(struct tcp_hpts_entry *hpts)
-{
-	int32_t ticks_to_run;
-
-	if (hpts->p_prevtick && (SEQ_GT(hpts->p_curtick, hpts->p_prevtick))) {
-		ticks_to_run = hpts->p_curtick - hpts->p_prevtick;
-		if (ticks_to_run >= (NUM_OF_HPTSI_SLOTS - 1)) {
-			ticks_to_run = NUM_OF_HPTSI_SLOTS - 2;
-		}
-	} else {
-		if (hpts->p_prevtick == hpts->p_curtick) {
-			/* This happens when we get woken up right away */
-			return (-1);
-		}
-		ticks_to_run = 1;
-	}
-	/* Set in where we will be when we catch up */
-	hpts->p_nxt_slot = (hpts->p_cur_slot + ticks_to_run) % NUM_OF_HPTSI_SLOTS;
-	if (hpts->p_nxt_slot == hpts->p_cur_slot) {
-		panic("Impossible math -- hpts:%p p_nxt_slot:%d p_cur_slot:%d ticks_to_run:%d",
-		    hpts, hpts->p_nxt_slot, hpts->p_cur_slot, ticks_to_run);
-	}
-	return (ticks_to_run);
-}
-
 static void
-tcp_hptsi(struct tcp_hpts_entry *hpts, struct timeval *ctick)
+tcp_hptsi(struct tcp_hpts_entry *hpts)
 {
+	struct epoch_tracker et;
 	struct tcpcb *tp;
 	struct inpcb *inp = NULL, *ninp;
 	struct timeval tv;
-	int32_t ticks_to_run, i, error, tick_now, interum_tick;
+	int32_t ticks_to_run, i, error;
 	int32_t paced_cnt = 0;
+	int32_t loop_cnt = 0;
 	int32_t did_prefetch = 0;
 	int32_t prefetch_ninp = 0;
 	int32_t prefetch_tp = 0;
-	uint32_t cts;
+	int32_t wrap_loop_cnt = 0;
 	int16_t set_cpu;
 
 	HPTS_MTX_ASSERT(hpts);
-	hpts->p_curtick = tcp_tv_to_hptstick(ctick);
-	cts = tcp_tv_to_usectick(ctick);
-	memcpy(&tv, ctick, sizeof(struct timeval));
-	hpts->p_cur_slot = hpts_tick(hpts, 1);
+	/* record previous info for any logging */
+	hpts->saved_lasttick = hpts->p_lasttick;
+	hpts->saved_curtick = hpts->p_curtick;
+	hpts->saved_curslot = hpts->p_cur_slot;
+	hpts->saved_prev_slot = hpts->p_prev_slot;
 
-	/* Figure out if we had missed ticks */
+	hpts->p_lasttick = hpts->p_curtick;
+	hpts->p_curtick = tcp_gethptstick(&tv);
+	hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+	if ((hpts->p_on_queue_cnt == 0) ||
+	    (hpts->p_lasttick == hpts->p_curtick)) {
+		/* 
+		 * No time has yet passed, 
+		 * or nothing to do.
+		 */
+		hpts->p_prev_slot = hpts->p_cur_slot;
+		hpts->p_lasttick = hpts->p_curtick;
+		goto no_run;
+	}
 again:
+	hpts->p_wheel_complete = 0;
 	HPTS_MTX_ASSERT(hpts);
-	ticks_to_run = tcp_hpts_est_run(hpts);
-	if (!TAILQ_EMPTY(&hpts->p_input)) {
-		tcp_input_data(hpts, &tv);
+	ticks_to_run = hpts_ticks_diff(hpts->p_prev_slot, hpts->p_cur_slot);
+	if (((hpts->p_curtick - hpts->p_lasttick) > ticks_to_run) &&
+	    (hpts->p_on_queue_cnt != 0)) {
+		/* 
+		 * Wheel wrap is occuring, basically we
+		 * are behind and the distance between
+		 * run's has spread so much it has exceeded
+		 * the time on the wheel (1.024 seconds). This
+		 * is ugly and should NOT be happening. We
+		 * need to run the entire wheel. We last processed
+		 * p_prev_slot, so that needs to be the last slot
+		 * we run. The next slot after that should be our
+		 * reserved first slot for new, and then starts
+		 * the running postion. Now the problem is the
+		 * reserved "not to yet" place does not exist
+		 * and there may be inp's in there that need
+		 * running. We can merge those into the
+		 * first slot at the head.
+		 */
+		wrap_loop_cnt++;
+		hpts->p_nxt_slot = hpts_tick(hpts->p_prev_slot, 1);
+		hpts->p_runningtick = hpts_tick(hpts->p_prev_slot, 2);
+		/* 
+		 * Adjust p_cur_slot to be where we are starting from
+		 * hopefully we will catch up (fat chance if something
+		 * is broken this bad :( )
+		 */
+		hpts->p_cur_slot = hpts->p_prev_slot;
+		/*
+		 * The next slot has guys to run too, and that would
+		 * be where we would normally start, lets move them into
+		 * the next slot (p_prev_slot + 2) so that we will
+		 * run them, the extra 10usecs of late (by being
+		 * put behind) does not really matter in this situation.
+		 */
+#ifdef INVARIANTS
+		/* 
+		 * To prevent a panic we need to update the inpslot to the
+		 * new location. This is safe since it takes both the
+		 * INP lock and the pacer mutex to change the inp_hptsslot.
+		 */
+		TAILQ_FOREACH(inp, &hpts->p_hptss[hpts->p_nxt_slot], inp_hpts) {
+			inp->inp_hptsslot = hpts->p_runningtick;
+		}
+#endif
+		TAILQ_CONCAT(&hpts->p_hptss[hpts->p_runningtick],
+			     &hpts->p_hptss[hpts->p_nxt_slot], inp_hpts);
+		ticks_to_run = NUM_OF_HPTSI_SLOTS - 1;
+		counter_u64_add(wheel_wrap, 1);
+	} else {
+		/* 
+		 * Nxt slot is always one after p_runningtick though
+		 * its not used usually unless we are doing wheel wrap.
+		 */
+		hpts->p_nxt_slot = hpts->p_prev_slot;
+		hpts->p_runningtick = hpts_tick(hpts->p_prev_slot, 1);
 	}
 #ifdef INVARIANTS
 	if (TAILQ_EMPTY(&hpts->p_input) &&
 	    (hpts->p_on_inqueue_cnt != 0)) {
 		panic("tp:%p in_hpts input empty but cnt:%d",
-		    hpts, hpts->p_on_inqueue_cnt);
+		      hpts, hpts->p_on_inqueue_cnt);
 	}
 #endif
 	HPTS_MTX_ASSERT(hpts);
-	/* Reset the ticks to run and time if we need too */
-	interum_tick = tcp_gethptstick(&tv);
-	if (interum_tick != hpts->p_curtick) {
-		/* Save off the new time we execute to */
-		*ctick = tv;
-		hpts->p_curtick = interum_tick;
-		cts = tcp_tv_to_usectick(&tv);
-		hpts->p_cur_slot = hpts_tick(hpts, 1);
-		ticks_to_run = tcp_hpts_est_run(hpts);
-	}
-	if (ticks_to_run == -1) {
-		goto no_run;
-	}
-	if (logging_on) {
-		tcp_hpts_log_it(hpts, inp, HPTSLOG_SETTORUN, ticks_to_run, 0);
-	}
 	if (hpts->p_on_queue_cnt == 0) {
 		goto no_one;
 	}
 	HPTS_MTX_ASSERT(hpts);
+#ifndef VIMAGE
+	INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+#endif
 	for (i = 0; i < ticks_to_run; i++) {
 		/*
 		 * Calculate our delay, if there are no extra ticks there
-		 * was not any
+		 * was not any (i.e. if ticks_to_run == 1, no delay).
 		 */
 		hpts->p_delayed_by = (ticks_to_run - (i + 1)) * HPTS_TICKS_PER_USEC;
 		HPTS_MTX_ASSERT(hpts);
-		while ((inp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_cur_slot])) != NULL) {
+		while ((inp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_runningtick])) != NULL) {
 			/* For debugging */
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_HPTSI, ticks_to_run, i);
-			}
 			hpts->p_inp = inp;
 			paced_cnt++;
-			if (hpts->p_cur_slot != inp->inp_hptsslot) {
+#ifdef INVARIANTS
+			if (hpts->p_runningtick != inp->inp_hptsslot) {
 				panic("Hpts:%p inp:%p slot mis-aligned %u vs %u",
-				    hpts, inp, hpts->p_cur_slot, inp->inp_hptsslot);
+				      hpts, inp, hpts->p_runningtick, inp->inp_hptsslot);
 			}
+#endif
 			/* Now pull it */
 			if (inp->inp_hpts_cpu_set == 0) {
 				set_cpu = 1;
 			} else {
 				set_cpu = 0;
 			}
-			hpts_sane_pace_remove(hpts, inp, &hpts->p_hptss[hpts->p_cur_slot], 0);
-			if ((ninp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_cur_slot])) != NULL) {
+			hpts_sane_pace_remove(hpts, inp, &hpts->p_hptss[hpts->p_runningtick], 0);
+			if ((ninp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_runningtick])) != NULL) {
 				/* We prefetch the next inp if possible */
 				kern_prefetch(ninp, &prefetch_ninp);
 				prefetch_ninp = 1;
@@ -1467,25 +1504,36 @@
 			if (inp->inp_hpts_request) {
 				/*
 				 * This guy is deferred out further in time
-				 * then our wheel had on it. Push him back
-				 * on the wheel.
+				 * then our wheel had available on it. 
+				 * Push him back on the wheel or run it
+				 * depending.
 				 */
-				int32_t remaining_slots;
-
+				uint32_t maxticks, last_tick, remaining_slots;
+				
 				remaining_slots = ticks_to_run - (i + 1);
 				if (inp->inp_hpts_request > remaining_slots) {
 					/*
-					 * Keep INVARIANTS happy by clearing
-					 * the flag
+					 * How far out can we go?
 					 */
-					tcp_hpts_insert_locked(hpts, inp, inp->inp_hpts_request, cts, __LINE__, NULL, 1);
+					maxticks = max_ticks_available(hpts, hpts->p_cur_slot, &last_tick);
+					if (maxticks >= inp->inp_hpts_request) {
+						/* we can place it finally to be processed  */
+						inp->inp_hptsslot = hpts_tick(hpts->p_runningtick, inp->inp_hpts_request);
+						inp->inp_hpts_request = 0;
+					} else {
+						/* Work off some more time */
+						inp->inp_hptsslot = last_tick;
+						inp->inp_hpts_request-= maxticks;
+					}
+					hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], __LINE__, 1);
 					hpts->p_inp = NULL;
 					continue;
 				}
 				inp->inp_hpts_request = 0;
+				/* Fall through we will so do it now */
 			}
 			/*
-			 * We clear the hpts flag here after dealing with
+			 * We clear the hpts flag here after dealing with	
 			 * remaining slots. This way anyone looking with the
 			 * TCB lock will see its on the hpts until just
 			 * before we unlock.
@@ -1495,23 +1543,20 @@
 			INP_WLOCK(inp);
 			if (in_pcbrele_wlocked(inp)) {
 				mtx_lock(&hpts->p_mtx);
-				if (logging_on)
-					tcp_hpts_log_it(hpts, hpts->p_inp, HPTSLOG_INP_DONE, 0, 1);
 				hpts->p_inp = NULL;
 				continue;
 			}
-			if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
-out_now:
+			if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
+			    (inp->inp_flags2 & INP_FREED)) {
+			out_now:
 #ifdef INVARIANTS
 				if (mtx_owned(&hpts->p_mtx)) {
 					panic("Hpts:%p owns mtx prior-to lock line:%d",
-					    hpts, __LINE__);
+					      hpts, __LINE__);
 				}
 #endif
 				INP_WUNLOCK(inp);
 				mtx_lock(&hpts->p_mtx);
-				if (logging_on)
-					tcp_hpts_log_it(hpts, hpts->p_inp, HPTSLOG_INP_DONE, 0, 3);
 				hpts->p_inp = NULL;
 				continue;
 			}
@@ -1539,16 +1584,14 @@
 				 */
 				tcp_set_hpts(inp);
 			}
-			if (out_newts_every_tcb) {
-				struct timeval sv;
-
-				if (out_ts_percision)
-					microuptime(&sv);
-				else
-					getmicrouptime(&sv);
-				cts = tcp_tv_to_usectick(&sv);
-			}
+#ifdef VIMAGE
 			CURVNET_SET(inp->inp_vnet);
+			INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+#endif
+			/* Lets do any logging that we might want to */
+			if (hpts_does_tp_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) {
+				tcp_hpts_log(hpts, tp, &tv, ticks_to_run, i);
+			}
 			/*
 			 * There is a hole here, we get the refcnt on the
 			 * inp so it will still be preserved but to make
@@ -1560,19 +1603,23 @@
 #ifdef INVARIANTS
 			if (mtx_owned(&hpts->p_mtx)) {
 				panic("Hpts:%p owns mtx before tcp-output:%d",
-				    hpts, __LINE__);
+				      hpts, __LINE__);
 			}
 #endif
 			if (tp->t_fb_ptr != NULL) {
 				kern_prefetch(tp->t_fb_ptr, &did_prefetch);
 				did_prefetch = 1;
 			}
-			inp->inp_hpts_calls = 1;
-			if (tp->t_fb->tfb_tcp_output_wtime != NULL) {
-				error = (*tp->t_fb->tfb_tcp_output_wtime) (tp, &tv);
-			} else {
-				error = tp->t_fb->tfb_tcp_output(tp);
+			if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) && tp->t_in_pkt) {
+				error = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0);
+				if (error) {
+					/* The input killed the connection */
+					goto skip_pacing;
+				}
 			}
+			inp->inp_hpts_calls = 1;
+			error = tp->t_fb->tfb_tcp_output(tp);
+			inp->inp_hpts_calls = 0;
 			if (ninp && ninp->inp_ppcb) {
 				/*
 				 * If we have a nxt inp, see if we can
@@ -1609,74 +1656,112 @@
 				prefetch_tp = 1;
 			}
 			INP_WUNLOCK(inp);
-			INP_UNLOCK_ASSERT(inp);
+		skip_pacing:
+#ifdef VIMAGE
+			INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 			CURVNET_RESTORE();
+#endif
+			INP_UNLOCK_ASSERT(inp);
 #ifdef INVARIANTS
 			if (mtx_owned(&hpts->p_mtx)) {
 				panic("Hpts:%p owns mtx prior-to lock line:%d",
-				    hpts, __LINE__);
+				      hpts, __LINE__);
 			}
 #endif
 			mtx_lock(&hpts->p_mtx);
-			if (logging_on)
-				tcp_hpts_log_it(hpts, hpts->p_inp, HPTSLOG_INP_DONE, 0, 4);
 			hpts->p_inp = NULL;
 		}
 		HPTS_MTX_ASSERT(hpts);
 		hpts->p_inp = NULL;
-		hpts->p_cur_slot++;
-		if (hpts->p_cur_slot >= NUM_OF_HPTSI_SLOTS) {
-			hpts->p_cur_slot = 0;
+		hpts->p_runningtick++;
+		if (hpts->p_runningtick >= NUM_OF_HPTSI_SLOTS) {
+			hpts->p_runningtick = 0;
 		}
 	}
+#ifndef VIMAGE
+	INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+#endif
 no_one:
 	HPTS_MTX_ASSERT(hpts);
-	hpts->p_prevtick = hpts->p_curtick;
 	hpts->p_delayed_by = 0;
 	/*
 	 * Check to see if we took an excess amount of time and need to run
 	 * more ticks (if we did not hit eno-bufs).
 	 */
-	/* Re-run any input that may be there */
-	(void)tcp_gethptstick(&tv);
-	if (!TAILQ_EMPTY(&hpts->p_input)) {
-		tcp_input_data(hpts, &tv);
-	}
 #ifdef INVARIANTS
 	if (TAILQ_EMPTY(&hpts->p_input) &&
 	    (hpts->p_on_inqueue_cnt != 0)) {
 		panic("tp:%p in_hpts input empty but cnt:%d",
-		    hpts, hpts->p_on_inqueue_cnt);
+		      hpts, hpts->p_on_inqueue_cnt);
 	}
 #endif
-	tick_now = tcp_gethptstick(&tv);
-	if (SEQ_GT(tick_now, hpts->p_prevtick)) {
-		struct timeval res;
-
-		/* Did we really spend a full tick or more in here? */
-		timersub(&tv, ctick, &res);
-		if (res.tv_sec || (res.tv_usec >= HPTS_TICKS_PER_USEC)) {
+	hpts->p_prev_slot = hpts->p_cur_slot;
+	hpts->p_lasttick = hpts->p_curtick;
+	if (loop_cnt > max_pacer_loops) {	    
+		/*
+		 * Something is serious slow we have
+		 * looped through processing the wheel
+		 * and by the time we cleared the
+		 * needs to run max_pacer_loops time
+		 * we still needed to run. That means
+		 * the system is hopelessly behind and
+		 * can never catch up :(
+		 *
+		 * We will just lie to this thread
+		 * and let it thing p_curtick is 
+		 * correct. When it next awakens
+		 * it will find itself further behind.
+		 */
+		counter_u64_add(hpts_hopelessly_behind, 1);
+		goto no_run;
+	}
+	hpts->p_curtick = tcp_gethptstick(&tv);
+	hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+	if ((wrap_loop_cnt < 2) &&
+	    (hpts->p_lasttick != hpts->p_curtick)) {
+		counter_u64_add(hpts_loops, 1);
+		loop_cnt++;
+		goto again;
+	}
+no_run:
+	/*
+	 * Set flag to tell that we are done for
+	 * any slot input that happens during
+	 * input.
+	 */
+	hpts->p_wheel_complete = 1;
+	/* 
+	 * Run any input that may be there not covered
+	 * in running data.
+	 */
+	if (!TAILQ_EMPTY(&hpts->p_input)) {
+		tcp_input_data(hpts, &tv);
+		/*
+		 * Now did we spend too long running
+		 * input and need to run more ticks?
+		 */
+		KASSERT(hpts->p_prev_slot == hpts->p_cur_slot,
+			("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
+			 hpts->p_prev_slot, hpts->p_cur_slot));
+		KASSERT(hpts->p_lasttick == hpts->p_curtick,
+			("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
+			 hpts->p_lasttick, hpts->p_curtick));
+		hpts->p_curtick = tcp_gethptstick(&tv);
+		if (hpts->p_lasttick != hpts->p_curtick) {
 			counter_u64_add(hpts_loops, 1);
-			if (logging_on) {
-				tcp_hpts_log_it(hpts, inp, HPTSLOG_TOLONG, (uint32_t) res.tv_usec, tick_now);
-			}
-			*ctick = res;
-			hpts->p_curtick = tick_now;
+			hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
 			goto again;
 		}
 	}
-no_run:
 	{
 		uint32_t t = 0, i, fnd = 0;
 
-		if (hpts->p_on_queue_cnt) {
-
-
+		if ((hpts->p_on_queue_cnt) && (wrap_loop_cnt < 2)) {
 			/*
 			 * Find next slot that is occupied and use that to
 			 * be the sleep time.
 			 */
-			for (i = 1, t = hpts->p_nxt_slot; i < NUM_OF_HPTSI_SLOTS; i++) {
+			for (i = 0, t = hpts_tick(hpts->p_cur_slot, 1); i < NUM_OF_HPTSI_SLOTS; i++) {
 				if (TAILQ_EMPTY(&hpts->p_hptss[t]) == 0) {
 					fnd = 1;
 					break;
@@ -1684,27 +1769,23 @@
 				t = (t + 1) % NUM_OF_HPTSI_SLOTS;
 			}
 			if (fnd) {
-				hpts->p_hpts_sleep_time = i;
+				hpts->p_hpts_sleep_time = min((i + 1), hpts_sleep_max);
 			} else {
-				counter_u64_add(back_tosleep, 1);
 #ifdef INVARIANTS
-				panic("Hpts:%p cnt:%d but non found", hpts, hpts->p_on_queue_cnt);
+				panic("Hpts:%p cnt:%d but none found", hpts, hpts->p_on_queue_cnt);
 #endif
+				counter_u64_add(back_tosleep, 1);
 				hpts->p_on_queue_cnt = 0;
 				goto non_found;
 			}
-			t++;
+		} else if (wrap_loop_cnt >= 2) {
+			/* Special case handling */
+			hpts->p_hpts_sleep_time = tcp_min_hptsi_time;
 		} else {
-			/* No one on the wheel sleep for all but 2 slots  */
-non_found:
-			if (hpts_sleep_max == 0)
-				hpts_sleep_max = 1;
-			hpts->p_hpts_sleep_time = min((NUM_OF_HPTSI_SLOTS - 2), hpts_sleep_max);
-			t = 0;
+			/* No one on the wheel sleep for all but 400 slots or sleep max  */
+		non_found:
+			hpts->p_hpts_sleep_time = hpts_sleep_max;
 		}
-		if (logging_on) {
-			tcp_hpts_log_it(hpts, inp, HPTSLOG_SLEEPSET, t, (hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC));
-		}
 	}
 }
 
@@ -1746,33 +1827,29 @@
 	mtx_lock(&hpts->p_mtx);
 	if (hpts->p_direct_wake) {
 		/* Signaled by input */
-		if (logging_on)
-			tcp_hpts_log_it(hpts, NULL, HPTSLOG_AWAKE, 1, 1);
 		callout_stop(&hpts->co);
 	} else {
 		/* Timed out */
 		if (callout_pending(&hpts->co) ||
 		    !callout_active(&hpts->co)) {
-			if (logging_on)
-				tcp_hpts_log_it(hpts, NULL, HPTSLOG_AWAKE, 2, 2);
 			mtx_unlock(&hpts->p_mtx);
 			return;
 		}
 		callout_deactivate(&hpts->co);
-		if (logging_on)
-			tcp_hpts_log_it(hpts, NULL, HPTSLOG_AWAKE, 3, 3);
 	}
+	hpts->p_hpts_wake_scheduled = 0;
 	hpts->p_hpts_active = 1;
-	(void)tcp_gethptstick(&tv);
-	tcp_hptsi(hpts, &tv);
+	tcp_hptsi(hpts);
 	HPTS_MTX_ASSERT(hpts);
 	tv.tv_sec = 0;
 	tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC;
 	if (tcp_min_hptsi_time && (tv.tv_usec < tcp_min_hptsi_time)) {
+		hpts->overidden_sleep = tv.tv_usec;
 		tv.tv_usec = tcp_min_hptsi_time;
 		hpts->p_on_min_sleep = 1;
 	} else {
 		/* Clear the min sleep flag */
+		hpts->overidden_sleep = 0;
 		hpts->p_on_min_sleep = 0;
 	}
 	hpts->p_hpts_active = 0;
@@ -1809,9 +1886,11 @@
 
 	tcp_pace.rp_proc = NULL;
 	tcp_pace.rp_num_hptss = ncpus;
+	hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK);
 	hpts_loops = counter_u64_alloc(M_WAITOK);
 	back_tosleep = counter_u64_alloc(M_WAITOK);
-
+	combined_wheel_wrap = counter_u64_alloc(M_WAITOK);
+	wheel_wrap = counter_u64_alloc(M_WAITOK);
 	sz = (tcp_pace.rp_num_hptss * sizeof(struct tcp_hpts_entry *));
 	tcp_pace.rp_ent = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO);
 	asz = sizeof(struct hptsh) * NUM_OF_HPTSI_SLOTS;
@@ -1850,7 +1929,7 @@
 		    OID_AUTO, "out_qcnt", CTLFLAG_RD,
 		    &hpts->p_on_queue_cnt, 0,
 		    "Count TCB's awaiting output processing");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+		SYSCTL_ADD_U16(&hpts->hpts_ctx,
 		    SYSCTL_CHILDREN(hpts->hpts_root),
 		    OID_AUTO, "active", CTLFLAG_RD,
 		    &hpts->p_hpts_active, 0,
@@ -1859,29 +1938,23 @@
 		    SYSCTL_CHILDREN(hpts->hpts_root),
 		    OID_AUTO, "curslot", CTLFLAG_RD,
 		    &hpts->p_cur_slot, 0,
-		    "What the current slot is if active");
+		    "What the current running pacers goal");
 		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
 		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "curtick", CTLFLAG_RD,
-		    &hpts->p_curtick, 0,
-		    "What the current tick on if active");
+		    OID_AUTO, "runtick", CTLFLAG_RD,
+		    &hpts->p_runningtick, 0,
+		    "What the running pacers current slot is");
 		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
 		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "logsize", CTLFLAG_RD,
-		    &hpts->p_logsize, 0,
-		    "Hpts logging buffer size");
-		hpts->p_hpts_sleep_time = NUM_OF_HPTSI_SLOTS - 2;
+		    OID_AUTO, "curtick", CTLFLAG_RD,
+		    &hpts->p_curtick, 0,
+		    "What the running pacers last tick mapped to the wheel was");
+		hpts->p_hpts_sleep_time = hpts_sleep_max;
 		hpts->p_num = i;
-		hpts->p_prevtick = hpts->p_curtick = tcp_gethptstick(&tv);
-		hpts->p_prevtick -= 1;
-		hpts->p_prevtick %= NUM_OF_HPTSI_SLOTS;
+		hpts->p_curtick = tcp_gethptstick(&tv);
+		hpts->p_prev_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
 		hpts->p_cpu = 0xffff;
-		hpts->p_nxt_slot = 1;
-		hpts->p_logsize = tcp_hpts_logging_size;
-		if (hpts->p_logsize) {
-			sz = (sizeof(struct hpts_log) * hpts->p_logsize);
-			hpts->p_log = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO);
-		}
+		hpts->p_nxt_slot = hpts_tick(hpts->p_cur_slot, 1);
 		callout_init(&hpts->co, 1);
 	}
 
Index: head/sys/netinet/tcp_log_buf.h
===================================================================
--- head/sys/netinet/tcp_log_buf.h
+++ head/sys/netinet/tcp_log_buf.h
@@ -175,7 +175,7 @@
 	TCP_LOG_BAD_RETRAN, /* Detected bad retransmission 5 */
 	TCP_LOG_PRR,	/* Doing PRR                       6 */
 	TCP_LOG_REORDER,/* Detected reorder                7 */
-	TCP_LOG_PACER,	/* Pacer sending a packet          8 */
+	TCP_LOG_HPTS,	/* Hpts sending a packet          8 */
 	BBR_LOG_BBRUPD,		/* We updated BBR info     9 */
 	BBR_LOG_BBRSND,		/* We did a slot calculation and sending is done 10 */
 	BBR_LOG_ACKCLEAR,	/* A ack clears all outstanding     11 */
@@ -194,31 +194,38 @@
 	BBR_LOG_PERSIST,        /* BBR changed to/from a persists   24 */
 	TCP_LOG_FLOWEND,        /* End of a flow                    25 */
 	BBR_LOG_RTO,            /* BBR's timeout includes BBR info  26 */
-	BBR_LOG_DOSEG_DONE,     /* pacer do_segment completes       27 */
-	BBR_LOG_EXIT_GAIN,      /* pacer do_segment completes       28 */
+	BBR_LOG_DOSEG_DONE,     /* hpts do_segment completes       27 */
+	BBR_LOG_EXIT_GAIN,      /* hpts do_segment completes       28 */
 	BBR_LOG_THRESH_CALC,    /* Doing threshold calculation      29 */
 	BBR_LOG_EXTRACWNDGAIN,	/* Removed                          30 */
 	TCP_LOG_USERSEND, 	/* User level sends data            31 */
-	UNUSED_32,	 	/* Unused                           32 */
-	UNUSED_33, 		/* Unused                           33 */
+	BBR_RSM_CLEARED,	/* RSM cleared of ACK flags         32 */
+	BBR_LOG_STATE_TARGET, 	/* Log of target at state           33 */
 	BBR_LOG_TIME_EPOCH, 	/* A timed based Epoch occured      34 */
 	BBR_LOG_TO_PROCESS,	/* A to was processed               35 */
 	BBR_LOG_BBRTSO, 	/* TSO update	                    36 */
-	BBR_LOG_PACERDIAG,	/* Pacer diag insert                37 */
+	BBR_LOG_HPTSDIAG,	/* Hpts diag insert                37 */
 	BBR_LOG_LOWGAIN,	/* Low gain accounting              38 */
 	BBR_LOG_PROGRESS,	/* Progress timer event             39 */
 	TCP_LOG_SOCKET_OPT,	/* A socket option is set	    40 */
 	BBR_LOG_TIMERPREP,	/* A BBR var to debug out TLP issues  41 */
 	BBR_LOG_ENOBUF_JMP,	/* We had a enobuf jump 42 */
-	BBR_LOG_PACING_CALC,	/* calc the pacing time 43 */
+	BBR_LOG_HPTSI_CALC,	/* calc the hptsi time 43 */
 	BBR_LOG_RTT_SHRINKS,	/* We had a log reduction of rttProp 44 */
 	BBR_LOG_BW_RED_EV,	/* B/W reduction events 45 */
 	BBR_LOG_REDUCE,		/* old bbr log reduce for 4.1 and earlier 46*/
 	TCP_LOG_RTT,		/* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
 	BBR_LOG_SETTINGS_CHG,   /* Settings changed for loss response 48 */
-	BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */
+	BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining -- now not used 49 */
 	TCP_LOG_REASS,		/* Reassembly buffer logging 50 */
-	TCP_LOG_END		/* End (keep at end)	            51 */
+	TCP_HDWR_TLS,		/* TCP Hardware TLS logs 51 */
+	BBR_LOG_HDWR_PACE,	/* TCP Hardware pacing log 52 */
+	BBR_LOG_TSTMP_VAL,	/* Temp debug timestamp validation 53 */
+	TCP_LOG_CONNEND,	/* End of connection 54 */
+	TCP_LOG_LRO,		/* LRO entry 55 */
+	TCP_SACK_FILTER_RES,	/* Results of SACK Filter 56 */
+	TCP_SAD_DETECTION,	/* Sack Attack Detection 57 */
+	TCP_LOG_END		/* End (keep at end)	   58 */
 };
 
 enum tcp_log_states {
@@ -275,8 +282,8 @@
 
 #ifdef _KERNEL
 
-#define	TCP_LOG_BUF_DEFAULT_SESSION_LIMIT	10000
-#define	TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT	1000000
+#define	TCP_LOG_BUF_DEFAULT_SESSION_LIMIT	5000
+#define	TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT	5000000
 
 /*
  * TCP_LOG_EVENT_VERBOSE: The same as TCP_LOG_EVENT, except it always
Index: head/sys/netinet/tcp_stacks/rack.c
===================================================================
--- head/sys/netinet/tcp_stacks/rack.c
+++ head/sys/netinet/tcp_stacks/rack.c
@@ -1,5 +1,6 @@
 /*-
- * Copyright (c) 2016-2019 Netflix, Inc.
+ * Copyright (c) 2016
+ *	Netflix Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -44,12 +45,16 @@
 #include <sys/mutex.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
+#ifdef NETFLIX_STATS
+#include <sys/qmath.h>
+#endif
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#include <sys/tree.h>
 #ifdef NETFLIX_STATS
-#include <sys/stats.h>
+#include <sys/stats.h> /* Must come after qmath.h and tree.h */
 #endif
 #include <sys/refcount.h>
 #include <sys/queue.h>
@@ -74,8 +79,8 @@
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
-#include <netinet/tcp.h>
 #define	TCPOUTFLAGS
+#include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
@@ -84,9 +89,6 @@
 #include <netinet/tcp_hpts.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
-#ifdef NETFLIX_CWV
-#include <netinet/tcp_newcwv.h>
-#endif
 #include <netinet/tcp_fastopen.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
@@ -126,6 +128,10 @@
 struct sysctl_ctx_list rack_sysctl_ctx;
 struct sysctl_oid *rack_sysctl_root;
 
+#ifndef TCPHPTS
+fatal error missing option TCPHSTS in the build;
+#endif
+
 #define CUM_ACKED 1
 #define SACKED 2
 
@@ -178,6 +184,9 @@
 static int32_t rack_tlp_in_recovery = 1;	/* Can we do TLP in recovery? */
 static int32_t rack_verbose_logging = 0;
 static int32_t rack_ignore_data_after_close = 1;
+static int32_t rack_map_entries_limit = 1024;
+static int32_t rack_map_split_limit = 256;
+
 /*
  * Currently regular tcp has a rto_min of 30ms
  * the backoff goes 12 times so that ends up
@@ -202,7 +211,6 @@
 static int32_t rack_sack_block_limit = 128;
 static int32_t rack_use_sack_filter = 1;
 static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
-static uint32_t rack_map_split_limit = 0;	/* unlimited by default */
 
 /* Rack specific counters */
 counter_u64_t rack_badfr;
@@ -228,6 +236,7 @@
 counter_u64_t rack_to_alloc;
 counter_u64_t rack_to_alloc_hard;
 counter_u64_t rack_to_alloc_emerg;
+counter_u64_t rack_to_alloc_limited;
 counter_u64_t rack_alloc_limited_conns;
 counter_u64_t rack_split_limited;
 
@@ -248,12 +257,21 @@
 counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE];
 counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
 
+/*
+ * This was originally defined in tcp_timer.c, but is now reproduced here given
+ * the unification of the SYN and non-SYN retransmit timer exponents combined
+ * with wanting to retain previous behaviour for previously deployed stack
+ * versions.
+ */
+int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
+
 static void
 rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,  int event, int line);
 
 static int
 rack_process_ack(struct mbuf *m, struct tcphdr *th,
-    struct socket *so, struct tcpcb *tp, struct tcpopt *to,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, 
     uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val);
 static int
 rack_process_data(struct mbuf *m, struct tcphdr *th,
@@ -351,14 +369,13 @@
 rack_do_closing(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static void rack_do_drop(struct mbuf *m, struct tcpcb *tp);
 static void
-rack_do_drop(struct mbuf *m, struct tcpcb *tp);
-static void
 rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp,
     struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val);
 static void
 rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp,
-	struct tcphdr *th, int32_t rstreason, int32_t tlen);
+    struct tcphdr *th, int32_t rstreason, int32_t tlen);
 static int
 rack_do_established(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
@@ -449,6 +466,7 @@
 		counter_u64_zero(rack_sack_proc_short);
 		counter_u64_zero(rack_sack_proc_restart);
 		counter_u64_zero(rack_to_alloc);
+		counter_u64_zero(rack_to_alloc_limited);
 		counter_u64_zero(rack_alloc_limited_conns);
 		counter_u64_zero(rack_split_limited);
 		counter_u64_zero(rack_find_high);
@@ -470,6 +488,18 @@
 {
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "map_limit", CTLFLAG_RW,
+	    &rack_map_entries_limit , 1024,
+	    "Is there a limit on how big the sendmap can grow? ");
+
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "map_splitlimit", CTLFLAG_RW,
+	    &rack_map_split_limit , 256,
+	    "Is there a limit on how much splitting a peer can do?");
+
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "rate_sample_method", CTLFLAG_RW,
 	    &rack_rate_sample_method , USE_RTT_LOW,
 	    "What method should we use for rate sampling 0=high, 1=low ");
@@ -628,11 +658,6 @@
 	    OID_AUTO, "pktdelay", CTLFLAG_RW,
 	    &rack_pkt_delay, 1,
 	    "Extra RACK time (in ms) besides reordering thresh");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_sysctl_root),
-	    OID_AUTO, "split_limit", CTLFLAG_RW,
-	    &rack_map_split_limit, 0,
-	    "Is there a limit on the number of map split entries (0=unlimited)");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "inc_var", CTLFLAG_RW,
@@ -769,6 +794,12 @@
 	    OID_AUTO, "allocemerg", CTLFLAG_RD,
 	    &rack_to_alloc_emerg,
 	    "Total allocations done from emergency cache");
+	rack_to_alloc_limited = counter_u64_alloc(M_WAITOK);
+	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "alloc_limited", CTLFLAG_RD,
+	    &rack_to_alloc_limited,
+	    "Total allocations dropped due to limit");
 	rack_alloc_limited_conns = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
@@ -859,6 +890,7 @@
 static inline int32_t
 rack_progress_timeout_check(struct tcpcb *tp)
 {
+#ifdef NETFLIX_PROGRESS
 	if (tp->t_maxunacktime && tp->t_acktime && TSTMP_GT(ticks, tp->t_acktime)) {
 		if ((ticks - tp->t_acktime) >= tp->t_maxunacktime) {
 			/*
@@ -869,13 +901,12 @@
 			struct tcp_rack *rack;
 			rack = (struct tcp_rack *)tp->t_fb_ptr;
 			counter_u64_add(rack_progress_drops, 1);
-#ifdef NETFLIX_STATS
 			TCPSTAT_INC(tcps_progdrops);
-#endif
 			rack_log_progress_event(rack, tp, ticks, PROGRESS_DROP, __LINE__);
 			return (1);
 		}
 	}
+#endif
 	return (0);
 }
 
@@ -962,6 +993,7 @@
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 		
+		memset(&log, 0, sizeof(log));
 		/* Convert our ms to a microsecond */
 		log.u_bbr.flex1 = rtt * 1000;
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
@@ -1021,6 +1053,8 @@
 {
 	if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
 		union tcp_log_stackspecific log;
+
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = did_out;
 		log.u_bbr.flex2 = nxt_pkt;
 		log.u_bbr.flex3 = way_out;
@@ -1127,6 +1161,8 @@
 	counter_u64_free(rack_sack_proc_short);
 	counter_u64_free(rack_sack_proc_restart);
 	counter_u64_free(rack_to_alloc);
+	counter_u64_free(rack_to_alloc_limited);
+	counter_u64_free(rack_split_limited);
 	counter_u64_free(rack_find_high);
 	counter_u64_free(rack_runt_sacks);
 	counter_u64_free(rack_enter_tlp_calc);
@@ -1146,9 +1182,8 @@
 
 	rsm = uma_zalloc(rack_zone, M_NOWAIT);
 	if (rsm) {
-alloc_done:
-		counter_u64_add(rack_to_alloc, 1);
 		rack->r_ctl.rc_num_maps_alloced++;
+		counter_u64_add(rack_to_alloc, 1);
 		return (rsm);
 	}
 	if (rack->rc_free_cnt) {
@@ -1156,11 +1191,26 @@
 		rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
 		TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_next);
 		rack->rc_free_cnt--;
-		goto alloc_done;
+		return (rsm);
 	}
 	return (NULL);
 }
 
+static struct rack_sendmap *
+rack_alloc_full_limit(struct tcp_rack *rack)
+{
+	if ((rack_map_entries_limit > 0) &&
+	    (rack->r_ctl.rc_num_maps_alloced >= rack_map_entries_limit)) {
+		counter_u64_add(rack_to_alloc_limited, 1);
+		if (!rack->alloc_limit_reported) {
+			rack->alloc_limit_reported = 1;
+			counter_u64_add(rack_alloc_limited_conns, 1);
+		}
+		return (NULL);
+	}
+	return (rack_alloc(rack));
+}
+
 /* wrapper to allocate a sendmap entry, subject to a specific limit */
 static struct rack_sendmap *
 rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
@@ -1196,7 +1246,6 @@
 		/* currently there is only one limit type */
 		rack->r_ctl.rc_num_split_allocs--;
 	}
-	rack->r_ctl.rc_num_maps_alloced--;
 	if (rack->r_ctl.rc_tlpsend == rsm)
 		rack->r_ctl.rc_tlpsend = NULL;
 	if (rack->r_ctl.rc_next == rsm)
@@ -1206,9 +1255,11 @@
 	if (rack->rc_free_cnt < rack_free_cache) {
 		memset(rsm, 0, sizeof(struct rack_sendmap));
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_free, rsm, r_next);
+		rsm->r_limit_type = 0;
 		rack->rc_free_cnt++;
 		return;
 	}
+	rack->r_ctl.rc_num_maps_alloced--;
 	uma_zfree(rack_zone, rsm);
 }
 
@@ -1222,11 +1273,9 @@
 #ifdef NETFLIX_STATS
 	int32_t gput;
 #endif
-#ifdef NETFLIX_CWV
-	u_long old_cwnd = tp->snd_cwnd;
-#endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
+
 	tp->ccv->nsegs = nsegs;
 	tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
 	if ((recovery) && (rack->r_ctl.rc_early_recovery_segs)) {
@@ -1264,7 +1313,6 @@
 				    tp->t_stats_gput_prev);
 			tp->t_flags &= ~TF_GPUTINPROG;
 			tp->t_stats_gput_prev = gput;
-#ifdef NETFLIX_CWV
 			if (tp->t_maxpeakrate) {
 				/*
 				 * We update t_peakrate_thr. This gives us roughly
@@ -1272,7 +1320,6 @@
 				 */
 				tcp_update_peakrate_thr(tp);
 			}
-#endif
 		}
 #endif
 		if (tp->snd_cwnd > tp->snd_ssthresh) {
@@ -1298,39 +1345,10 @@
 	if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) {
 		rack->r_ctl.rc_rack_largest_cwnd = tp->snd_cwnd;
 	}
-#ifdef NETFLIX_CWV
-	if (tp->cwv_enabled) {
-		/*
-		 * Per RFC 7661: The behaviour in the non-validated phase is
-		 * specified as: o  A sender determines whether to increase
-		 * the cwnd based upon whether it is cwnd-limited (see
-		 * Section 4.5.3): * A sender that is cwnd-limited MAY use
-		 * the standard TCP method to increase cwnd (i.e., the
-		 * standard method permits a TCP sender that fully utilises
-		 * the cwnd to increase the cwnd each time it receives an
-		 * ACK). * A sender that is not cwnd-limited MUST NOT
-		 * increase the cwnd when ACK packets are received in this
-		 * phase (i.e., needs to avoid growing the cwnd when it has
-		 * not recently sent using the current size of cwnd).
-		 */
-		if ((tp->snd_cwnd > old_cwnd) &&
-		    (tp->cwv_cwnd_valid == 0) &&
-		    (!(tp->ccv->flags & CCF_CWND_LIMITED))) {
-			tp->snd_cwnd = old_cwnd;
-		}
-		/* Try to update pipeAck and NCWV state */
-		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
-		    !IN_RECOVERY(tp->t_flags)) {
-			uint32_t data = sbavail(&(tp->t_inpcb->inp_socket->so_snd));
-
-			tcp_newcwv_update_pipeack(tp, data);
-		}
-	}
 	/* we enforce max peak rate if it is set. */
 	if (tp->t_peakrate_thr && tp->snd_cwnd > tp->t_peakrate_thr) {
 		tp->snd_cwnd = tp->t_peakrate_thr;
 	}
-#endif
 }
 
 static void
@@ -1379,16 +1397,8 @@
 		tp->snd_cwnd += rack->r_ctl.rc_prr_sndcnt;
 		rack->r_ctl.rc_prr_sndcnt = 0;
 	}
+	tp->snd_recover = tp->snd_una;
 	EXIT_RECOVERY(tp->t_flags);
-
-
-#ifdef NETFLIX_CWV
-	if (tp->cwv_enabled) {
-		if ((tp->cwv_cwnd_valid == 0) &&
-		    (tp->snd_cwv.in_recovery))
-			tcp_newcwv_end_recovery(tp);
-	}
-#endif
 }
 
 static void
@@ -1450,16 +1460,6 @@
 			tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->cong_signal(tp->ccv, type);
 	}
-#ifdef NETFLIX_CWV
-	if (tp->cwv_enabled) {
-		if (tp->snd_cwv.in_recovery == 0 && IN_RECOVERY(tp->t_flags)) {
-			tcp_newcwv_enter_recovery(tp);
-		}
-		if (type == CC_RTO) {
-			tcp_newcwv_reset(tp);
-		}
-	}
-#endif
 }
 
 
@@ -1479,11 +1479,21 @@
 	if (CC_ALGO(tp)->after_idle != NULL)
 		CC_ALGO(tp)->after_idle(tp->ccv);
 
-	if (tp->snd_cwnd == 1)
-		i_cwnd = tp->t_maxseg;		/* SYN(-ACK) lost */
-	else 
-		i_cwnd = tcp_compute_initwnd(tcp_maxseg(tp));
-
+	if (V_tcp_initcwnd_segments)
+		i_cwnd = min((V_tcp_initcwnd_segments * tp->t_maxseg),
+		    max(2 * tp->t_maxseg, 14600));
+	else if (V_tcp_do_rfc3390)
+		i_cwnd = min(4 * tp->t_maxseg,
+		    max(2 * tp->t_maxseg, 4380));
+	else {
+		/* Per RFC5681 Section 3.1 */
+		if (tp->t_maxseg > 2190)
+			i_cwnd = 2 * tp->t_maxseg;
+		else if (tp->t_maxseg > 1095)
+			i_cwnd = 3 * tp->t_maxseg;
+		else
+			i_cwnd = 4 * tp->t_maxseg;
+	}
 	if (reduce_largest) {
 		/*
 		 * Do we reduce the largest cwnd to make 
@@ -1549,8 +1559,7 @@
 }
 
 static void
-rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
-    int32_t rstreason, int32_t tlen)
+rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t rstreason, int32_t tlen)
 {
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
@@ -1736,7 +1745,7 @@
  * TCB is still valid and locked.
  */
 static int
-rack_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp,  int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val)
+rack_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val)
 {
 	int32_t todrop;
 	int32_t thflags;
@@ -1778,17 +1787,6 @@
 			TCPSTAT_INC(tcps_rcvpartduppack);
 			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
 		}
-		/*
-		 * DSACK - add SACK block for dropped range
-		 */
-		if (tp->t_flags & TF_SACK_PERMIT) {
-			tcp_update_sack_list(tp, th->th_seq, th->th_seq + tlen);
-			/*
-			 * ACK now, as the next in-sequence segment
-			 * will clear the DSACK block again
-			 */
-			tp->t_flags |= TF_ACKNOW;
-		}
 		*drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
@@ -2124,8 +2122,6 @@
 		/* We can't start any timer in persists */
 		return (rack_get_persists_timer_val(tp, rack));
 	}
-	if (tp->t_state < TCPS_ESTABLISHED)
-		goto activate_rxt;
 	rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
 	if (rsm == NULL) {
 		/* Nothing on the send map */
@@ -2184,6 +2180,12 @@
 			 */
 			goto activate_rxt;
 		}
+		if ((tp->snd_max - tp->snd_una) > tp->snd_wnd) {
+			/*
+			 * Peer collapsed rwnd, don't do TLP.
+			 */
+			goto activate_rxt;
+		}
 		rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_tmap, rack_sendmap, r_tnext);
 		if (rsm == NULL) {
 			/* We found no rsm to TLP with. */
@@ -2288,7 +2290,9 @@
 		/* A previous call is already set up */
 		return;
 	}
-	if (tp->t_state == TCPS_CLOSED) {
+
+	if ((tp->t_state == TCPS_CLOSED) ||
+	    (tp->t_state == TCPS_LISTEN)) {
 		return;
 	}
 	stopped = rack->rc_tmr_stopped;
@@ -2307,8 +2311,8 @@
 		 * We are still left on the hpts when the to goes
 		 * it will be for output.
 		 */
-		if (TSTMP_GT(cts, rack->r_ctl.rc_last_output_to))
-			slot = cts - rack->r_ctl.rc_last_output_to;
+		if (TSTMP_GT(rack->r_ctl.rc_last_output_to, cts))
+			slot = rack->r_ctl.rc_last_output_to - cts;
 		else
 			slot = 1;
 	}
@@ -2330,7 +2334,7 @@
 	}
 	hpts_timeout = rack_timer_start(tp, rack, cts);
 	if (tp->t_flags & TF_DELACK) {
-		delayed_ack = TICKS_2_MSEC(tcp_delacktime);
+		delayed_ack = tcp_delacktime;
 		rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
 	}
 	if (delayed_ack && ((hpts_timeout == 0) ||
@@ -2487,6 +2491,43 @@
 	return (0);
 }
 
+static struct rack_sendmap *
+rack_merge_rsm(struct tcp_rack *rack,
+	       struct rack_sendmap *l_rsm,
+	       struct rack_sendmap *r_rsm)
+{
+	/* 
+	 * We are merging two ack'd RSM's,
+	 * the l_rsm is on the left (lower seq
+	 * values) and the r_rsm is on the right
+	 * (higher seq value). The simplest way
+	 * to merge these is to move the right
+	 * one into the left. I don't think there
+	 * is any reason we need to try to find
+	 * the oldest (or last oldest retransmitted).
+	 */
+	l_rsm->r_end = r_rsm->r_end;
+	if (r_rsm->r_rtr_bytes)
+		l_rsm->r_rtr_bytes += r_rsm->r_rtr_bytes;
+	if (r_rsm->r_in_tmap) {
+		/* This really should not happen */
+		TAILQ_REMOVE(&rack->r_ctl.rc_tmap, r_rsm, r_tnext);
+	}
+	/* Now the flags */
+	if (r_rsm->r_flags & RACK_HAS_FIN)
+		l_rsm->r_flags |= RACK_HAS_FIN;
+	if (r_rsm->r_flags & RACK_TLP)
+		l_rsm->r_flags |= RACK_TLP;
+	TAILQ_REMOVE(&rack->r_ctl.rc_map, r_rsm, r_next);	
+	if ((r_rsm->r_limit_type == 0) && (l_rsm->r_limit_type != 0)) {
+		/* Transfer the split limit to the map we free */
+		r_rsm->r_limit_type = l_rsm->r_limit_type;
+		l_rsm->r_limit_type = 0;
+	}
+	rack_free(rack, r_rsm);
+	return(l_rsm);
+}
+
 /*
  * TLP Timer, here we simply setup what segment we want to
  * have the TLP expire on, the normal rack_output() will then
@@ -2590,7 +2631,7 @@
 		int32_t idx;
 		struct rack_sendmap *nrsm;
 
-		nrsm = rack_alloc(rack);
+		nrsm = rack_alloc_full_limit(rack);
 		if (nrsm == NULL) {
 			/*
 			 * No memory to split, we will just exit and punt
@@ -2937,7 +2978,7 @@
 	TCPSTAT_INC(tcps_rexmttimeo);
 	if ((tp->t_state == TCPS_SYN_SENT) ||
 	    (tp->t_state == TCPS_SYN_RECEIVED))
-		rexmt = MSEC_2_TICKS(RACK_INITIAL_RTO * tcp_backoff[tp->t_rxtshift]);
+		rexmt = MSEC_2_TICKS(RACK_INITIAL_RTO * tcp_syn_backoff[tp->t_rxtshift]);
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
@@ -3281,7 +3322,7 @@
 	 * Here we retransmitted less than the whole thing which means we
 	 * have to split this into what was transmitted and what was not.
 	 */
-	nrsm = rack_alloc(rack);
+	nrsm = rack_alloc_full_limit(rack);
 	if (nrsm == NULL) {
 		/*
 		 * We can't get memory, so lets not proceed.
@@ -3415,9 +3456,6 @@
 			 * Hmm out of memory and the tcb got destroyed while
 			 * we tried to wait.
 			 */
-#ifdef INVARIANTS
-			panic("Out of memory when we should not be rack:%p", rack);
-#endif
 			return;
 		}
 		if (th_flags & TH_FIN) {
@@ -3428,15 +3466,8 @@
 		rsm->r_tim_lastsent[0] = ts;
 		rsm->r_rtr_cnt = 1;
 		rsm->r_rtr_bytes = 0;
-		if (th_flags & TH_SYN) {
-			/* The data space is one beyond snd_una */
-			rsm->r_start = seq_out + 1;
-			rsm->r_end = rsm->r_start + (len - 1);
-		} else {
-			/* Normal case */
-			rsm->r_start = seq_out;
-			rsm->r_end = rsm->r_start + len;
-		}
+		rsm->r_start = seq_out;
+		rsm->r_end = rsm->r_start + len;
 		rsm->r_sndcnt = 0;
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_map, rsm, r_next);
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
@@ -3486,11 +3517,8 @@
 			 * Ok we must split off the front and then let the
 			 * update do the rest
 			 */
-			nrsm = rack_alloc(rack);
+			nrsm = rack_alloc_full_limit(rack);
 			if (nrsm == NULL) {
-#ifdef INVARIANTS
-				panic("Ran out of memory that was preallocated? rack:%p", rack);
-#endif
 				rack_update_rsm(tp, rack, rsm, ts);
 				return;
 			}
@@ -3908,6 +3936,14 @@
 		if (nrsm->r_flags & RACK_ACKED) {
 			/* Skip ack'd segments */
 			continue;
+		} 
+		if (nrsm->r_flags & RACK_SACK_PASSED) {
+			/* 
+			 * We found one that is already marked
+			 * passed, we have been here before and
+			 * so all others below this are marked.
+			 */
+			break;
 		}
 		idx = nrsm->r_rtr_cnt - 1;
 		if (ts == nrsm->r_tim_lastsent[idx]) {
@@ -4114,6 +4150,26 @@
 		rsm->r_in_tmap = 0;
 	}
 out:
+	if (rsm && (rsm->r_flags & RACK_ACKED)) {
+		/* 
+		 * Now can we merge this newly acked
+		 * block with either the previous or
+		 * next block?
+		 */
+		nrsm = TAILQ_NEXT(rsm, r_next);
+		if (nrsm &&
+		    (nrsm->r_flags & RACK_ACKED)) {
+			/* yep this and next can be merged */
+			rsm = rack_merge_rsm(rack, rsm, nrsm);
+		}
+		/* Now what about the previous? */
+		nrsm = TAILQ_PREV(rsm, rack_head, r_next);
+		if (nrsm &&
+		    (nrsm->r_flags & RACK_ACKED)) {
+			/* yep the previous and this can be merged */
+			rsm = rack_merge_rsm(rack, nrsm, rsm);
+		}
+	}
 	if (used_ref == 0) {
 		counter_u64_add(rack_sack_proc_all, 1);
 	} else {
@@ -4353,16 +4409,13 @@
 			}
 			sack_blocks[num_sack_blks] = sack;
 			num_sack_blks++;
-#ifdef NETFLIX_STATS
 		} else if (SEQ_LEQ(sack.start, th_ack) &&
 			   SEQ_LEQ(sack.end, th_ack)) {
 			/*
 			 * Its a D-SACK block.
 			 */
-			tcp_record_dsack(sack.start, sack.end);
-#endif
+/*			tcp_record_dsack(sack.start, sack.end); */
 		}
-
 	}
 	if (num_sack_blks == 0)
 		goto out;
@@ -4371,7 +4424,9 @@
 	 * just one pass.
 	 */
 	if (rack_use_sack_filter) {
-		num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks, num_sack_blks, th->th_ack);
+		num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks,
+						 num_sack_blks, th->th_ack);
+		ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks);
 	}
 	if (num_sack_blks < 2) {
 		goto do_sack_work;
@@ -4620,8 +4675,9 @@
 		return (0);
 	}
 	if (rack->r_ctl.rc_early_recovery) {
-		if (IN_FASTRECOVERY(tp->t_flags)) {
-			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+		if (IN_RECOVERY(tp->t_flags)) {
+			if (SEQ_LT(th->th_ack, tp->snd_recover) &&
+			    (SEQ_LT(th->th_ack, tp->snd_max))) {
 				tcp_rack_partialack(tp, th);
 			} else {
 				rack_post_recovery(tp, th);
@@ -4648,8 +4704,9 @@
 	sowwakeup_locked(so);
 	m_freem(mfree);
 	if (rack->r_ctl.rc_early_recovery == 0) {
-		if (IN_FASTRECOVERY(tp->t_flags)) {
-			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+		if (IN_RECOVERY(tp->t_flags)) {
+			if (SEQ_LT(th->th_ack, tp->snd_recover) &&
+			    (SEQ_LT(th->th_ack, tp->snd_max))) {
 				tcp_rack_partialack(tp, th);
 			} else {
 				rack_post_recovery(tp, th);
@@ -4707,7 +4764,11 @@
 	 * send garbage on first SYN.
 	 */
 	int32_t nsegs;
+#ifdef TCP_RFC7413
 	int32_t tfo_syn;
+#else
+#define	tfo_syn	(FALSE)
+#endif
 	struct tcp_rack *rack;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
@@ -4816,8 +4877,10 @@
 	 * PRU_RCVD).  If a FIN has already been received on this connection
 	 * then we just ignore the text.
 	 */
+#ifdef TCP_RFC7413
 	tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
-		   IS_FASTOPEN(tp->t_flags));
+	    (tp->t_flags & TF_FASTOPEN));
+#endif
 	if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
@@ -5024,8 +5087,9 @@
 
 
 	/* Clean receiver SACK report if present */
-	if (tp->rcv_numsacks)
-		tcp_clean_sackreport(tp);
+/*	if (tp->rcv_numsacks)
+	        tcp_clean_sackreport(tp);
+*/
 	TCPSTAT_INC(tcps_preddat);
 	tp->rcv_nxt += tlen;
 	/*
@@ -5284,8 +5348,6 @@
 	tp->irs = th->th_seq;
 	tcp_rcvseqinit(tp);
 	if (thflags & TH_ACK) {
-		int tfo_partial = 0;
-		
 		TCPSTAT_INC(tcps_connects);
 		soisconnected(so);
 #ifdef MAC
@@ -5299,19 +5361,10 @@
 		tp->rcv_adv += min(tp->rcv_wnd,
 		    TCP_MAXWIN << tp->rcv_scale);
 		/*
-		 * If not all the data that was sent in the TFO SYN
-		 * has been acked, resend the remainder right away.
-		 */
-		if (IS_FASTOPEN(tp->t_flags) &&
-		    (tp->snd_una != tp->snd_max)) {
-			tp->snd_nxt = th->th_ack;
-			tfo_partial = 1;
-		}
-		/*
 		 * If there's data, delay ACK; if there's also a FIN ACKNOW
 		 * will be turned on later.
 		 */
-		if (DELAY_ACK(tp, tlen) && tlen != 0 && (tfo_partial == 0)) {
+		if (DELAY_ACK(tp, tlen) && tlen != 0) {
 			rack_timer_cancel(tp, (struct tcp_rack *)tp->t_fb_ptr,
 					  ((struct tcp_rack *)tp->t_fb_ptr)->r_ctl.rc_rcvtime, __LINE__);
 			tp->t_flags |= TF_DELACK;
@@ -5320,26 +5373,10 @@
 			tp->t_flags |= TF_ACKNOW;
 		}
 
-		if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
-		    V_tcp_do_ecn) {
+		if ((thflags & TH_ECE) && V_tcp_do_ecn) {
 			tp->t_flags |= TF_ECN_PERMIT;
 			TCPSTAT_INC(tcps_ecn_shs);
 		}
-		if (SEQ_GT(th->th_ack, tp->snd_una)) {
-			/* 
-			 * We advance snd_una for the 
-			 * fast open case. If th_ack is
-			 * acknowledging data beyond 
-			 * snd_una we can't just call
-			 * ack-processing since the 
-			 * data stream in our send-map
-			 * will start at snd_una + 1 (one
-			 * beyond the SYN). If its just
-			 * equal we don't need to do that
-			 * and there is no send_map.
-			 */
-			tp->snd_una++;
-		}
 		/*
 		 * Received <SYN,ACK> in SYN_SENT[*] state. Transitions:
 		 * SYN_SENT  --> ESTABLISHED SYN_SENT* --> FIN_WAIT_1
@@ -5423,7 +5460,7 @@
 		}
 	}
 	return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
-	   tiwin, thflags, nxt_pkt));
+	    tiwin, thflags, nxt_pkt));
 }
 
 /*
@@ -5447,13 +5484,13 @@
 		rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
 		return (1);
 	}
-	if (IS_FASTOPEN(tp->t_flags)) {
+#ifdef TCP_RFC7413
+	if (tp->t_flags & TF_FASTOPEN) {
 		/*
-		 * When a TFO connection is in SYN_RECEIVED, the
-		 * only valid packets are the initial SYN, a
-		 * retransmit/copy of the initial SYN (possibly with
-		 * a subset of the original data), a valid ACK, a
-		 * FIN, or a RST.
+		 * When a TFO connection is in SYN_RECEIVED, the only valid
+		 * packets are the initial SYN, a retransmit/copy of the
+		 * initial SYN (possibly with a subset of the original
+		 * data), a valid ACK, a FIN, or a RST.
 		 */
 		if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
 			rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
@@ -5474,9 +5511,18 @@
 			return (0);
 		}
 	}
+#endif
 	if (thflags & TH_RST)
 		return (rack_process_rst(m, th, so, tp));
 	/*
+	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
+	 * synchronized state.
+	 */
+	if (thflags & TH_SYN) {
+		rack_challenge_ack(m, th, tp, &ret_val);
+		return (ret_val);
+	}
+	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment and
 	 * it's less than ts_recent, drop it.
 	 */
@@ -5520,16 +5566,18 @@
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
-	tp->snd_wnd = tiwin;
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN flag
 	 * is on (half-synchronized state), then queue data for later
 	 * processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
-		if (IS_FASTOPEN(tp->t_flags)) {
+#ifdef TCP_RFC7413
+		if (tp->t_flags & TF_FASTOPEN) {
+			tp->snd_wnd = tiwin;
 			cc_conn_init(tp);
 		}
+#endif
 		return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen,
 		    tiwin, thflags, nxt_pkt));
 	}
@@ -5539,22 +5587,13 @@
 	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
 		tp->rcv_scale = tp->request_r_scale;
+		tp->snd_wnd = tiwin;
 	}
 	/*
 	 * Make transitions: SYN-RECEIVED  -> ESTABLISHED SYN-RECEIVED* ->
 	 * FIN-WAIT-1
 	 */
 	tp->t_starttime = ticks;
-	if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
-		tcp_fastopen_decrement_counter(tp->t_tfo_pending);
-		tp->t_tfo_pending = NULL;
-
-		/*
-		 * Account for the ACK of our SYN prior to
-		 * regular ACK processing below.
-		 */ 
-		tp->snd_una++;
-	}
 	if (tp->t_flags & TF_NEEDFIN) {
 		tcp_state_change(tp, TCPS_FIN_WAIT_1);
 		tp->t_flags &= ~TF_NEEDFIN;
@@ -5562,13 +5601,25 @@
 		tcp_state_change(tp, TCPS_ESTABLISHED);
 		TCP_PROBE5(accept__established, NULL, tp,
 		    mtod(m, const char *), tp, th);
+#ifdef TCP_RFC7413
+		if (tp->t_tfo_pending) {
+			tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+			tp->t_tfo_pending = NULL;
+
+			/*
+			 * Account for the ACK of our SYN prior to regular
+			 * ACK processing below.
+			 */
+			tp->snd_una++;
+		}
 		/*
 		 * TFO connections call cc_conn_init() during SYN
 		 * processing.  Calling it again here for such connections
 		 * is not harmless as it would undo the snd_cwnd reduction
 		 * that occurs when a TFO SYN|ACK is retransmitted.
 		 */
-		if (!IS_FASTOPEN(tp->t_flags))
+		if (!(tp->t_flags & TF_FASTOPEN))
+#endif
 			cc_conn_init(tp);
 	}
 	/*
@@ -5576,7 +5627,7 @@
 	 * not, do so now to pass queued data to user.
 	 */
 	if (tlen == 0 && (thflags & TH_FIN) == 0)
-		(void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
+		(void)tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
 		    (struct mbuf *)0);
 	tp->snd_wl1 = th->th_seq - 1;
 	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
@@ -5836,7 +5887,7 @@
 rack_check_data_after_close(struct mbuf *m, 
     struct tcpcb *tp, int32_t *tlen, struct tcphdr *th, struct socket *so)
 {
-	struct tcp_rack *rack;
+	struct tcp_rack *rack; 
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
@@ -6353,7 +6404,6 @@
 	rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr;
 	rack->r_ctl.rc_min_to = rack_min_to;
 	rack->r_ctl.rc_prr_inc_var = rack_inc_var;
-	rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0);
 	if (tp->snd_una != tp->snd_max) {
 		/* Create a send map for the current outstanding data */
 		struct rack_sendmap *rsm;
@@ -6375,6 +6425,8 @@
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 		rsm->r_in_tmap = 1;
 	}
+	rack_stop_all_timers(tp);
+	rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0);
 	return (0);
 }
 
@@ -6431,6 +6483,8 @@
 		uma_zfree(rack_pcb_zone, tp->t_fb_ptr);
 		tp->t_fb_ptr = NULL;
 	}
+	/* Make sure snd_nxt is correctly set */
+	tp->snd_nxt = tp->snd_max;
 }
 
 static void
@@ -6473,9 +6527,6 @@
 	case TCPS_CLOSED:
 	case TCPS_TIME_WAIT:
 	default:
-#ifdef INVARIANTS
-		panic("tcp tp:%p state:%d sees impossible state?", tp, tp->t_state);
-#endif
 		break;
 	};
 }
@@ -6585,10 +6636,6 @@
 	 * allow the tcbinfo to be in either locked or unlocked, as the
 	 * caller may have unnecessarily acquired a lock due to a race.
 	 */
-	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
-	    tp->t_state != TCPS_ESTABLISHED) {
-		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 	    __func__));
@@ -6600,37 +6647,17 @@
 		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
 		log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts;
 		log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+		log.u_bbr.flex2 = rack->r_ctl.rc_num_maps_alloced;
 		TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0,
 		    tlen, &log, true);
 	}
-	if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) {
-		way_out = 4;
-		goto done_with_input;
-	}
 	/*
-	 * If a segment with the ACK-bit set arrives in the SYN-SENT state
-	 * check SEQ.ACK first as described on page 66 of RFC 793, section 3.9.
-	 */
-	if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
-	    (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
-		rack_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
-		return;
-	}
-	/*
 	 * Segment received on connection. Reset idle time and keep-alive
 	 * timer. XXX: This should be done after segment validation to
 	 * ignore broken/spoofed segs.
 	 */
 	if  (tp->t_idle_reduce && (tp->snd_max == tp->snd_una)) {
-#ifdef NETFLIX_CWV
-		if ((tp->cwv_enabled) &&
-		    ((tp->cwv_cwnd_valid == 0) &&
-		     TCPS_HAVEESTABLISHED(tp->t_state) &&
-		     (tp->snd_cwnd > tp->snd_cwv.init_cwnd))) {
-			tcp_newcwv_nvp_closedown(tp);
-		} else 
-#endif
-		       if ((ticks - tp->t_rcvtime) >= tp->t_rxtcur) {
+		if ((ticks - tp->t_rcvtime) >= tp->t_rxtcur) {
 			counter_u64_add(rack_input_idle_reduces, 1);
 			rack_cc_after_idle(tp,
 			    (rack->r_idle_reduce_largest ? 1 :0));
@@ -6639,14 +6666,6 @@
 	rack->r_ctl.rc_rcvtime = cts;
 	tp->t_rcvtime = ticks;
 
-#ifdef NETFLIX_CWV
-	if (tp->cwv_enabled) {
-		if ((tp->cwv_cwnd_valid == 0) &&
-		    TCPS_HAVEESTABLISHED(tp->t_state) &&
-		    (tp->snd_cwnd > tp->snd_cwv.init_cwnd))
-			tcp_newcwv_nvp_closedown(tp);
-	}
-#endif
 	/*
 	 * Unscale the window into a 32-bit value. For the SYN_SENT state
 	 * the scale is zero.
@@ -6737,22 +6756,6 @@
 			if ((tp->t_flags & TF_SACK_PERMIT) &&
 			    (to.to_flags & TOF_SACKPERM) == 0)
 				tp->t_flags &= ~TF_SACK_PERMIT;
-			if (IS_FASTOPEN(tp->t_flags)) {
-				if (to.to_flags & TOF_FASTOPEN) {
-					uint16_t mss;
-
-					if (to.to_flags & TOF_MSS)
-						mss = to.to_mss;
-					else
-						if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
-							mss = TCP6_MSS;
-						else
-							mss = TCP_MSS;
-					tcp_fastopen_update_cache(tp, mss,
-					    to.to_tfo_len, to.to_tfo_cookie);
-				} else
-					tcp_fastopen_disable_path(tp);
-			}
 		}
 		/*
 		 * At this point we are at the initial call. Here we decide
@@ -6769,7 +6772,6 @@
 		/* Set the flag */
 		rack->r_is_v6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 		tcp_set_hpts(tp->t_inpcb);
-		rack_stop_all_timers(tp);
 		sack_filter_clear(&rack->r_ctl.rack_sf, th->th_ack);
 	}
 	/*
@@ -6801,24 +6803,6 @@
 		 */
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 		tcp_rack_xmit_timer_commit(rack, tp);
-		if (((tp->snd_max - tp->snd_una) > tp->snd_wnd) &&
-		    (rack->rc_in_persist == 0)){
-			/* 
-			 * The peer shrunk its window on us to the point
-			 * where we have sent too much. The only thing
-			 * we can do here is stop any timers and
-			 * enter persist. We most likely lost the last
-			 * bytes we sent but oh well, we will have to
-			 * retransmit them after the peer is caught up.
-			 */
-			if (rack->rc_inp->inp_in_hpts)
-				tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT);
-			rack_timer_cancel(tp, rack, cts, __LINE__);
-			rack_enter_persist(tp, rack, cts);
-			rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0);
-			way_out = 3;
-			goto done_with_input;
-		}
 		if (nxt_pkt == 0) {
 			if (rack->r_wanted_output != 0) {
 				did_out = 1;
@@ -6848,7 +6832,6 @@
 			rack_timer_audit(tp, rack, &so->so_snd);
 			way_out = 2;
 		}
-	done_with_input:
 		rack_log_doseg_done(rack, cts, nxt_pkt, did_out, way_out);
 		if (did_out)
 			rack->r_wanted_output = 0;
@@ -6871,7 +6854,7 @@
 #ifdef RSS
 	struct tcp_function_block *tfb;
 	struct tcp_rack *rack;
-	struct epoch_tracker et;
+	struct inpcb *inp;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	if (rack->r_state == 0) {
@@ -6879,11 +6862,9 @@
 		 * Initial input (ACK to SYN-ACK etc)lets go ahead and get
 		 * it processed
 		 */
-		INP_INFO_RLOCK_ET(&V_tcbinfo, et);
 		tcp_get_usecs(&tv);
 		rack_hpts_do_segment(m, th, so, tp, drop_hdrlen,
 		    tlen, iptos, 0, &tv);
-		INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 		return;
 	}
 	tcp_queue_to_input(tp, m, th, tlen, drop_hdrlen, iptos);
@@ -6959,13 +6940,17 @@
 #ifdef TCPDEBUG
 	struct ipovly *ipov = NULL;
 #endif
+#ifdef NETFLIX_TCP_O_UDP
 	struct udphdr *udp = NULL;
+#endif
 	struct tcp_rack *rack;
 	struct tcphdr *th;
 	uint8_t pass = 0;
-	uint8_t wanted_cookie = 0;
 	u_char opt[TCP_MAXOLEN];
-	unsigned ipoptlen, optlen, hdrlen, ulen=0;
+	unsigned ipoptlen, optlen, hdrlen;
+#ifdef NETFLIX_TCP_O_UDP
+	unsigned ulen;
+#endif	
 	uint32_t rack_seq;
 
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
@@ -7004,6 +6989,18 @@
 	if (tp->t_flags & TF_TOE)
 		return (tcp_offload_output(tp));
 #endif
+
+#ifdef TCP_RFC7413
+	/*
+	 * For TFO connections in SYN_RECEIVED, only allow the initial
+	 * SYN|ACK and those sent by the retransmit timer.
+	 */
+	if ((tp->t_flags & TF_FASTOPEN) &&
+	    (tp->t_state == TCPS_SYN_RECEIVED) &&
+	    SEQ_GT(tp->snd_max, tp->snd_una) &&	/* inital SYN|ACK sent */
+	    (tp->snd_nxt != tp->snd_una))	/* not a retransmit */
+		return (0);
+#endif
 #ifdef INET6
 	if (rack->r_state) {
 		/* Use the cache line loaded if possible */
@@ -7046,31 +7043,12 @@
 	rack->r_wanted_output = 0;
 	rack->r_timer_override = 0;
 	/*
-	 * For TFO connections in SYN_SENT or SYN_RECEIVED,
-	 * only allow the initial SYN or SYN|ACK and those sent
-	 * by the retransmit timer.
-	 */
-	if (IS_FASTOPEN(tp->t_flags) &&
-	    ((tp->t_state == TCPS_SYN_RECEIVED) ||
-	     (tp->t_state == TCPS_SYN_SENT)) &&
-	    SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
-	    (tp->t_rxtshift == 0))              /* not a retransmit */
-		return (0);
-	/*
 	 * Determine length of data that should be transmitted, and flags
 	 * that will be used. If there is some data or critical controls
 	 * (SYN, RST) to send, then transmit; otherwise, investigate
 	 * further.
 	 */
 	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
-#ifdef NETFLIX_CWV
-	if (tp->cwv_enabled) {
-		if ((tp->cwv_cwnd_valid == 0) &&
-		    TCPS_HAVEESTABLISHED(tp->t_state) &&
-		    (tp->snd_cwnd > tp->snd_cwv.init_cwnd))
-			tcp_newcwv_nvp_closedown(tp);
-	} else
-#endif
 	if (tp->t_idle_reduce) {
 		if (idle && ((ticks - tp->t_rcvtime) >= tp->t_rxtcur))
 			rack_cc_after_idle(tp,
@@ -7141,10 +7119,12 @@
 		tlen = rsm->r_end - rsm->r_start;
 		if (tlen > tp->t_maxseg)
 			tlen = tp->t_maxseg;
-		KASSERT(SEQ_LEQ(tp->snd_una, rsm->r_start),
-		    ("%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
-		    __func__, __LINE__,
-		    rsm->r_start, tp->snd_una, tp, rack, rsm));
+#ifdef INVARIANTS
+		if (SEQ_GT(tp->snd_una, rsm->r_start)) {
+			panic("tp:%p rack:%p snd_una:%u rsm:%p r_start:%u",
+			    tp, rack, tp->snd_una, rsm, rsm->r_start);
+		}
+#endif
 		sb_offset = rsm->r_start - tp->snd_una;
 		cwin = min(tp->snd_wnd, tlen);
 		len = cwin;
@@ -7155,14 +7135,12 @@
 		len = rsm->r_end - rsm->r_start;
 		sack_rxmit = 1;
 		sendalot = 0;
-		KASSERT(SEQ_LEQ(tp->snd_una, rsm->r_start),
-		    ("%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
-		    __func__, __LINE__,
-		    rsm->r_start, tp->snd_una, tp, rack, rsm));
 		sb_offset = rsm->r_start - tp->snd_una;
 		if (len >= tp->t_maxseg) {
 			len = tp->t_maxseg;
 		}
+		KASSERT(sb_offset >= 0, ("%s: sack block to the left of una : %d",
+		    __func__, sb_offset));
 	} else if ((rack->rc_in_persist == 0) &&
 	    ((rsm = tcp_rack_output(tp, rack, cts)) != NULL)) {
 		long tlen;
@@ -7187,10 +7165,6 @@
 		}
 #endif
 		tlen = rsm->r_end - rsm->r_start;
-		KASSERT(SEQ_LEQ(tp->snd_una, rsm->r_start),
-		    ("%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
-		    __func__, __LINE__,
-		    rsm->r_start, tp->snd_una, tp, rack, rsm));
 		sb_offset = rsm->r_start - tp->snd_una;
 		if (tlen > rack->r_ctl.rc_prr_sndcnt) {
 			len = rack->r_ctl.rc_prr_sndcnt;
@@ -7212,6 +7186,8 @@
 				goto just_return_nolock;
 			}
 		}
+		KASSERT(sb_offset >= 0, ("%s: sack block to the left of una : %d",
+		    __func__, sb_offset));
 		if (len > 0) {
 			sub_from_prr = 1;
 			sack_rxmit = 1;
@@ -7236,6 +7212,20 @@
 	/* For debugging */
 	rack->r_ctl.rc_rsm_at_retran = rsm;
 #endif
+	/* 
+	 * Enforce a connection sendmap count limit if set 
+	 * as long as we are not retransmiting.
+	 */
+	if ((rsm == NULL) &&
+	    (rack_map_entries_limit > 0) &&
+	    (rack->r_ctl.rc_num_maps_alloced >= rack_map_entries_limit)) {
+		counter_u64_add(rack_to_alloc_limited, 1);
+		if (!rack->alloc_limit_reported) {
+			rack->alloc_limit_reported = 1;
+			counter_u64_add(rack_alloc_limited_conns, 1);
+		}
+		goto just_return_nolock;
+	}
 	/*
 	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
 	 * state flags.
@@ -7306,7 +7296,7 @@
 		uint32_t avail;
 
 		avail = sbavail(sb);
-		if (SEQ_GT(tp->snd_nxt, tp->snd_una) && avail)
+		if (SEQ_GT(tp->snd_nxt, tp->snd_una))
 			sb_offset = tp->snd_nxt - tp->snd_una;
 		else
 			sb_offset = 0;
@@ -7347,9 +7337,18 @@
 			 * data possible so far in the scoreboard.
 			 */
 			outstanding = tp->snd_max - tp->snd_una;
-			if ((rack->r_ctl.rc_prr_sndcnt + outstanding) > tp->snd_wnd)
-				len = 0;
-			else if (avail > sb_offset)
+			if ((rack->r_ctl.rc_prr_sndcnt + outstanding) > tp->snd_wnd) {
+				if (tp->snd_wnd > outstanding) {
+					len = tp->snd_wnd - outstanding;
+					/* Check to see if we have the data */
+					if (((sb_offset + len) > avail) &&
+					    (avail > sb_offset))
+						len = avail - sb_offset;
+					else
+						len = 0;
+				} else
+					len = 0;
+			} else if (avail > sb_offset)
 				len = avail - sb_offset;
 			else
 				len = 0;
@@ -7398,18 +7397,22 @@
 	 * SYN-SENT state and if segment contains data and if we don't know
 	 * that foreign host supports TAO, suppress sending segment.
 	 */
-	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una) &&
-	    ((sack_rxmit == 0) && (tp->t_rxtshift == 0))) {
-		if (tp->t_state != TCPS_SYN_RECEIVED)
+	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
+		if ((tp->t_state != TCPS_SYN_RECEIVED) &&
+		    (tp->t_state != TCPS_SYN_SENT))
 			flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
 		/*
 		 * When sending additional segments following a TFO SYN|ACK,
 		 * do not include the SYN bit.
 		 */
-		if (IS_FASTOPEN(tp->t_flags) &&
+		if ((tp->t_flags & TF_FASTOPEN) &&
 		    (tp->t_state == TCPS_SYN_RECEIVED))
 			flags &= ~TH_SYN;
+#endif
 		sb_offset--, len++;
+		if (sbavail(sb) == 0)
+			len = 0;
 	}
 	/*
 	 * Be careful not to send data and/or FIN on SYN segments. This
@@ -7420,29 +7423,16 @@
 		len = 0;
 		flags &= ~TH_FIN;
 	}
+#ifdef TCP_RFC7413
 	/*
-	 * On TFO sockets, ensure no data is sent in the following cases:
-	 *
-	 *  - When retransmitting SYN|ACK on a passively-created socket
-	 *
-	 *  - When retransmitting SYN on an actively created socket
-	 *
-	 *  - When sending a zero-length cookie (cookie request) on an
-	 *    actively created socket
-	 *
-	 *  - When the socket is in the CLOSED state (RST is being sent)
+	 * When retransmitting SYN|ACK on a passively-created TFO socket,
+	 * don't include data, as the presence of data may have caused the
+	 * original SYN|ACK to have been dropped by a middlebox.
 	 */
-	if (IS_FASTOPEN(tp->t_flags) &&
-	    (((flags & TH_SYN) && (tp->t_rxtshift > 0)) ||
-	     ((tp->t_state == TCPS_SYN_SENT) &&
-	      (tp->t_tfo_client_cookie_len == 0)) ||
-	     (flags & TH_RST))) {
-		sack_rxmit = 0;
+	if ((tp->t_flags & TF_FASTOPEN) &&
+	    ((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)))
 		len = 0;
-	}
-	/* Without fast-open there should never be data sent on a SYN */
-	if ((flags & TH_SYN) && (!IS_FASTOPEN(tp->t_flags)))
-		len = 0;
+#endif
 	if (len <= 0) {
 		/*
 		 * If FIN has been sent but not acked, but we haven't been
@@ -7519,7 +7509,9 @@
 	ipoptlen += ipsec_optlen;
 #endif
 	if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
+#ifdef NETFLIX_TCP_O_UDP
 	    (tp->t_port == 0) &&
+#endif
 	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
 	    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
 	    ipoptlen == 0)
@@ -7688,10 +7680,13 @@
 	 * If our state indicates that FIN should be sent and we have not
 	 * yet done so, then we need to send.
 	 */
-	if ((flags & TH_FIN) &&
-	    (tp->snd_nxt == tp->snd_una)) {
-		pass = 11;
-		goto send;
+	if (flags & TH_FIN) {
+		if ((tp->t_flags & TF_SENTFIN) ||
+		    (((tp->t_flags & TF_SENTFIN) == 0) &&
+		     (tp->snd_nxt == tp->snd_una))) {
+			pass = 11;
+			goto send;
+		}
 	}
 	/*
 	 * No reason to send a segment, just return.
@@ -7750,44 +7745,27 @@
 		if (flags & TH_SYN) {
 			tp->snd_nxt = tp->iss;
 			to.to_mss = tcp_mssopt(&inp->inp_inc);
-#ifdef NETFLIX_TCPOUDP
+#ifdef NETFLIX_TCP_O_UDP
 			if (tp->t_port)
 				to.to_mss -= V_tcp_udp_tunneling_overhead;
 #endif
 			to.to_flags |= TOF_MSS;
-
+#ifdef TCP_RFC7413
 			/*
-			 * On SYN or SYN|ACK transmits on TFO connections,
-			 * only include the TFO option if it is not a
-			 * retransmit, as the presence of the TFO option may
-			 * have caused the original SYN or SYN|ACK to have
-			 * been dropped by a middlebox.
+			 * Only include the TFO option on the first
+			 * transmission of the SYN|ACK on a
+			 * passively-created TFO socket, as the presence of
+			 * the TFO option may have caused the original
+			 * SYN|ACK to have been dropped by a middlebox.
 			 */
-			if (IS_FASTOPEN(tp->t_flags) &&
+			if ((tp->t_flags & TF_FASTOPEN) &&
+			    (tp->t_state == TCPS_SYN_RECEIVED) &&
 			    (tp->t_rxtshift == 0)) {
-				if (tp->t_state == TCPS_SYN_RECEIVED) {
-					to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
-					to.to_tfo_cookie =
-					    (u_int8_t *)&tp->t_tfo_cookie.server;
-					to.to_flags |= TOF_FASTOPEN;
-					wanted_cookie = 1;
-				} else if (tp->t_state == TCPS_SYN_SENT) {
-					to.to_tfo_len =
-					    tp->t_tfo_client_cookie_len;
-					to.to_tfo_cookie =
-					    tp->t_tfo_cookie.client;
-					to.to_flags |= TOF_FASTOPEN;
-					wanted_cookie = 1;
-					/*
-					 * If we wind up having more data to
-					 * send with the SYN than can fit in
-					 * one segment, don't send any more
-					 * until the SYN|ACK comes back from
-					 * the other end.
-					 */
-					sendalot = 0;
-				}
+				to.to_tfo_len = TCP_FASTOPEN_MAX_COOKIE_LEN;
+				to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+				to.to_flags |= TOF_FASTOPEN;
 			}
+#endif
 		}
 		/* Window scaling. */
 		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -7822,15 +7800,8 @@
 
 		/* Processing the options. */
 		hdrlen += optlen = tcp_addoptions(&to, opt);
-		/*
-		 * If we wanted a TFO option to be added, but it was unable
-		 * to fit, ensure no data is sent.
-		 */
-		if (IS_FASTOPEN(tp->t_flags) && wanted_cookie &&
-		    !(to.to_flags & TOF_FASTOPEN))
-			len = 0;
 	}
-#ifdef NETFLIX_TCPOUDP
+#ifdef NETFLIX_TCP_O_UDP
 	if (tp->t_port) {
 		if (V_tcp_udp_tunneling_port == 0) {
 			/* The port was removed?? */
@@ -7996,8 +7967,8 @@
 				msb = NULL;
 			else
 				msb = sb;
-			m->m_next = tcp_m_copym(mb, moff, &len,
-			    if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb);
+			m->m_next = tcp_m_copym(/*tp, */ mb, moff, &len,
+			    if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb /*, 0, NULL*/);
 			if (len <= (tp->t_maxseg - optlen)) {
 				/* 
 				 * Must have ran out of mbufs for the copy
@@ -8031,6 +8002,8 @@
 				 * TLP should not count in retran count, but
 				 * in its own bin
 				 */
+/*				tp->t_sndtlppack++;*/
+/*				tp->t_sndtlpbyte += len;*/
 				counter_u64_add(rack_tlp_retran, 1);
 				counter_u64_add(rack_tlp_retran_bytes, len);
 			} else {
@@ -8156,7 +8129,7 @@
 #ifdef INET6
 	if (isipv6) {
 		ip6 = mtod(m, struct ip6_hdr *);
-#ifdef NETFLIX_TCPOUDP
+#ifdef NETFLIX_TCP_O_UDP
 		if (tp->t_port) {
 			udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr));
 			udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -8164,10 +8137,10 @@
 			ulen = hdrlen + len - sizeof(struct ip6_hdr);
 			udp->uh_ulen = htons(ulen);
 			th = (struct tcphdr *)(udp + 1);
-		} else
+		} else 
 #endif
 			th = (struct tcphdr *)(ip6 + 1);
-		tcpip_fillheaders(inp, ip6, th);
+		tcpip_fillheaders(inp, /*tp->t_port, */ ip6, th);
 	} else
 #endif				/* INET6 */
 	{
@@ -8175,7 +8148,7 @@
 #ifdef TCPDEBUG
 		ipov = (struct ipovly *)ip;
 #endif
-#ifdef NETFLIX_TCPOUDP
+#ifdef NETFLIX_TCP_O_UDP
 		if (tp->t_port) {
 			udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip));
 			udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -8186,7 +8159,7 @@
 		} else
 #endif
 			th = (struct tcphdr *)(ip + 1);
-		tcpip_fillheaders(inp, ip, th);
+		tcpip_fillheaders(inp,/*tp->t_port, */ ip, th);
 	}
 	/*
 	 * Fill in fields, remembering maximum advertised window for use in
@@ -8277,20 +8250,15 @@
 	/*
 	 * Calculate receive window.  Don't shrink window, but avoid silly
 	 * window syndrome.
-	 * If a RST segment is sent, advertise a window of zero.
 	 */
-	if (flags & TH_RST) {
+	if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
+	    recwin < (long)tp->t_maxseg)
 		recwin = 0;
-	} else {
-		if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
-		    recwin < (long)tp->t_maxseg)
-			recwin = 0;
-		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
-		    recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
-			recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
-		if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
-			recwin = (long)TCP_MAXWIN << tp->rcv_scale;
-	}
+	if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
+	    recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
+		recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
+	if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
+		recwin = (long)TCP_MAXWIN << tp->rcv_scale;
 
 	/*
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN> or
@@ -8357,18 +8325,23 @@
 		 * ip6_plen is not need to be filled now, and will be filled
 		 * in ip6_output.
 		 */
+#ifdef NETFLIX_TCP_O_UDP
 		if (tp->t_port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
 			th->th_sum = htons(0);
+			UDPSTAT_INC(udps_opackets);
 		} else {
+#endif
 			m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			th->th_sum = in6_cksum_pseudo(ip6,
 			    sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
 			    0);
+#ifdef NETFLIX_TCP_O_UDP
 		}
+#endif
 	}
 #endif
 #if defined(INET6) && defined(INET)
@@ -8376,19 +8349,24 @@
 #endif
 #ifdef INET
 	{
+#ifdef NETFLIX_TCP_O_UDP
 		if (tp->t_port) {
 			m->m_pkthdr.csum_flags = CSUM_UDP;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 			udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
 			   ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
 			th->th_sum = htons(0);
+			UDPSTAT_INC(udps_opackets);
 		} else {
+#endif
 			m->m_pkthdr.csum_flags = CSUM_TCP;
 			m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
 			    ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
 			    IPPROTO_TCP + len + optlen));
+#ifdef NETFLIX_TCP_O_UDP
 		}
+#endif
 		/* IP version must be set here for ipv4/ipv6 checking later */
 		KASSERT(ip->ip_v == IPVERSION,
 		    ("%s: IP version incorrect: %d", __func__, ip->ip_v));
@@ -8559,6 +8537,10 @@
 	 * retransmit.  In persist state, just set snd_max.
 	 */
 	if (error == 0) {
+/*		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+		    (tp->t_flags & TF_SACK_PERMIT) &&
+		    tp->rcv_numsacks > 0)
+		    tcp_clean_dsack_blocks(tp);*/
 		if (len == 0)
 			counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1);
 		else if (len == 1) {
@@ -8574,15 +8556,19 @@
 		}
 	}
 	if (sub_from_prr && (error == 0)) {
-		rack->r_ctl.rc_prr_sndcnt -= len;
+		if (rack->r_ctl.rc_prr_sndcnt >= len)
+			rack->r_ctl.rc_prr_sndcnt -= len;
+		else
+			rack->r_ctl.rc_prr_sndcnt = 0;
 	}
 	sub_from_prr = 0;
 	rack_log_output(tp, &to, len, rack_seq, (uint8_t) flags, error, cts,
 	    pass, rsm);
 	if ((tp->t_flags & TF_FORCEDATA) == 0 ||
 	    (rack->rc_in_persist == 0)) {
+#ifdef NETFLIX_STATS
 		tcp_seq startseq = tp->snd_nxt;
-
+#endif
 		/*
 		 * Advance snd_nxt over sequence space of this segment.
 		 */
@@ -8613,17 +8599,6 @@
 				tp->t_acktime = ticks;
 			}
 			tp->snd_max = tp->snd_nxt;
-			/*
-			 * Time this transmission if not a retransmission and
-			 * not currently timing anything.
-			 * This is only relevant in case of switching back to
-			 * the base stack.
-			 */
-			if (tp->t_rtttime == 0) {
-				tp->t_rtttime = ticks;
-				tp->t_rtseq = startseq;
-				TCPSTAT_INC(tcps_segstimed);
-			}
 #ifdef NETFLIX_STATS
 			if (!(tp->t_flags & TF_GPUTINPROG) && len) {
 				tp->t_flags |= TF_GPUTINPROG;
@@ -8996,9 +8971,7 @@
 		return (tcp_default_ctloutput(so, sopt, inp, tp));
 		break;
 	}
-#ifdef NETFLIX_STATS
-	tcp_log_socket_option(tp, sopt->sopt_name, optval, error);
-#endif
+/*	tcp_log_socket_option(tp, sopt->sopt_name, optval, error);*/
 	INP_WUNLOCK(inp);
 	return (error);
 }
@@ -9131,7 +9104,6 @@
 	.tfb_tcp_block_name = __XSTRING(STACKNAME),
 	.tfb_tcp_output = rack_output,
 	.tfb_tcp_do_segment = rack_do_segment,
-	.tfb_tcp_hpts_do_segment = rack_hpts_do_segment,
 	.tfb_tcp_ctloutput = rack_ctloutput,
 	.tfb_tcp_fb_init = rack_init,
 	.tfb_tcp_fb_fini = rack_fini,
@@ -9241,4 +9213,3 @@
 
 MODULE_VERSION(MODNAME, 1);
 DECLARE_MODULE(MODNAME, tcp_rack, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
-MODULE_DEPEND(MODNAME, tcphpts, 1, 1, 1);
Index: head/sys/netinet/tcp_stacks/rack_bbr_common.h
===================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.h
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.h
@@ -38,17 +38,8 @@
 #define TCP_MSS_ACCT_SIZE    70
 #define TCP_MSS_SMALL_MAX_SIZE_DIV (TCP_MSS_ACCT_SIZE - TCP_MSS_SMALL_SIZE_OFF)
 
+#define DUP_ACK_THRESHOLD 3
 
-/* Magic flags to tell whats cooking on the pacing wheel */
-#define PACE_PKT_OUTPUT 0x01	/* Output Packets being paced */
-#define PACE_TMR_RACK   0x02	/* RACK timer running */
-#define PACE_TMR_TLP    0x04	/* TLP timer running */
-#define PACE_TMR_RXT    0x08	/* Retransmit timer running */
-#define PACE_TMR_PERSIT 0x10	/* Persists timer running */
-#define PACE_TMR_KEEP   0x20	/* Keep alive timer running */
-#define PACE_TMR_DELACK 0x40	/* Delayed ack timer running */
-#define PACE_TMR_MASK   (PACE_TMR_KEEP|PACE_TMR_PERSIT|PACE_TMR_RXT|PACE_TMR_TLP|PACE_TMR_RACK|PACE_TMR_DELACK)
-
 /* Magic flags for tracing progress events */
 #define PROGRESS_DROP   1
 #define PROGRESS_UPDATE 2
@@ -61,8 +52,66 @@
 #define USE_RTT_LOW  1
 #define USE_RTT_AVG  2
 
+#define PACE_MAX_IP_BYTES 65536
+#define USECS_IN_SECOND 1000000
+#define MSEC_IN_SECOND 1000
+#define MS_IN_USEC 1000
+#define USEC_TO_MSEC(x) (x / MS_IN_USEC)
+#define TCP_TS_OVERHEAD 12		/* Overhead of having Timestamps on */
+
 #ifdef _KERNEL
 /* We have only 7 bits in rack so assert its true */
 CTASSERT((PACE_TMR_MASK & 0x80) == 0);
+#ifdef KERN_TLS
+uint32_t ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd);
+#endif
+int
+ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so,
+    struct mbuf *m, int has_pkt);
+int
+ctf_do_queued_segments(struct socket *so, struct tcpcb *tp, int have_pkt);
+uint32_t ctf_outstanding(struct tcpcb *tp);
+uint32_t ctf_flight_size(struct tcpcb *tp, uint32_t rc_sacked);
+int
+ctf_drop_checks(struct tcpopt *to, struct mbuf *m,
+    struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * thf,
+    int32_t * drop_hdrlen, int32_t * ret_val);
+void
+ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp,
+    struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val);
+void
+ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp,
+	struct tcphdr *th, int32_t rstreason, int32_t tlen);
+void
+ctf_do_drop(struct mbuf *m, struct tcpcb *tp);
+
+int
+ctf_process_rst(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp);
+
+void
+ctf_challenge_ack(struct mbuf *m, struct tcphdr *th,
+    struct tcpcb *tp, int32_t * ret_val);
+
+int
+ctf_ts_check(struct mbuf *m, struct tcphdr *th,
+    struct tcpcb *tp, int32_t tlen, int32_t thflags, int32_t * ret_val);
+
+void
+ctf_calc_rwin(struct socket *so, struct tcpcb *tp);
+
+void
+ctf_do_dropwithreset_conn(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
+    int32_t rstreason, int32_t tlen);
+
+uint32_t 
+ctf_fixed_maxseg(struct tcpcb *tp);
+
+void
+ctf_log_sack_filter(struct tcpcb *tp, int num_sack_blks, struct sackblk *sack_blocks);
+
+uint32_t 
+ctf_decay_count(uint32_t count, uint32_t decay_percentage);
+
 #endif
 #endif
Index: head/sys/netinet/tcp_stacks/rack_bbr_common.c
===================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -0,0 +1,859 @@
+/*-
+ * Copyright (c) 2016-2018
+ *	Netflix Inc.
+ *      All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * Author: Randall Stewart <rrs@netflix.com>
+ * This work is based on the ACM Queue paper
+ * BBR - Congestion Based Congestion Control
+ * and also numerous discussions with Neal, Yuchung and Van.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_tcpdebug.h"
+#include "opt_ratelimit.h"
+/*#include "opt_kern_tls.h"*/
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#ifdef TCP_HHOOK
+#include <sys/hhook.h>
+#endif
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#ifdef KERN_TLS
+#include <sys/sockbuf_tls.h>
+#endif
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/tree.h>
+#include <sys/refcount.h>
+#include <sys/queue.h>
+#include <sys/smp.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/time.h>
+#include <vm/uma.h>
+#include <sys/kern_prefetch.h>
+
+#include <net/route.h>
+#include <net/vnet.h>
+#include <net/ethernet.h>
+#include <net/bpf.h>
+
+#define TCPSTATES		/* for logging */
+
+#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
+#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/ip6_var.h>
+#define	TCPOUTFLAGS
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_hpts.h>
+#include <netinet/cc/cc.h>
+#include <netinet/tcp_log_buf.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif				/* TCPDEBUG */
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+#include <netinet/tcp_fastopen.h>
+
+#include <netipsec/ipsec_support.h>
+#include <net/if.h>
+#include <net/if_var.h>
+
+#if defined(IPSEC) || defined(IPSEC_SUPPORT)
+#include <netipsec/ipsec.h>
+#include <netipsec/ipsec6.h>
+#endif				/* IPSEC */
+
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <machine/in_cksum.h>
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+#include "rack_bbr_common.h"
+
+/*
+ * Common TCP Functions - These are shared by borth
+ * rack and BBR.
+ */
+
+
+#ifdef KERN_TLS
+uint32_t
+ctf_get_opt_tls_size(struct socket *so, uint32_t rwnd)
+{
+	struct sbtls_info *tls;
+	uint32_t len;
+
+again:
+	tls = so->so_snd.sb_tls_info;
+	len = tls->sb_params.sb_maxlen;         /* max tls payload */
+	len += tls->sb_params.sb_tls_hlen;      /* tls header len  */
+	len += tls->sb_params.sb_tls_tlen;      /* tls trailer len */
+	if ((len * 4) > rwnd) {
+		/*
+		 * Stroke this will suck counter and what
+		 * else should we do Drew? From the
+		 * TCP perspective I am not sure
+		 * what should be done...
+		 */
+		if (tls->sb_params.sb_maxlen > 4096) {
+			tls->sb_params.sb_maxlen -= 4096;
+			if (tls->sb_params.sb_maxlen < 4096)
+				tls->sb_params.sb_maxlen = 4096;
+			goto again;
+		}
+	}
+	return (len);
+}
+#endif
+
+int
+ctf_process_inbound_raw(struct tcpcb *tp, struct socket *so, struct mbuf *m, int has_pkt)
+{
+	/*
+	 * We are passed a raw change of mbuf packets
+	 * that arrived in LRO. They are linked via
+	 * the m_nextpkt link in the pkt-headers.
+	 *
+	 * We process each one by:
+	 * a) saving off the next
+	 * b) stripping off the ether-header
+	 * c) formulating the arguments for
+	 *    the tfb_tcp_hpts_do_segment
+	 * d) calling each mbuf to tfb_tcp_hpts_do_segment
+	 *    after adjusting the time to match the arrival time.
+	 * Note that the LRO code assures no IP options are present.
+	 *
+	 * The symantics for calling tfb_tcp_hpts_do_segment are the 
+	 * following:
+	 * 1) It returns 0 if all went well and you (the caller) need
+	 *    to release the lock.
+	 * 2) If nxt_pkt is set, then the function will surpress calls
+	 *    to tfb_tcp_output() since you are promising to call again
+	 *    with another packet.
+	 * 3) If it returns 1, then you must free all the packets being
+	 *    shipped in, the tcb has been destroyed (or about to be destroyed).
+	 */
+	struct mbuf *m_save;
+	struct ether_header *eh;
+	struct epoch_tracker et;
+	struct tcphdr *th;
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
+#endif
+#ifdef INET
+	struct ip *ip = NULL;		/* Keep compiler happy. */
+#endif
+	struct ifnet *ifp;
+	struct timeval tv;
+	int32_t retval, nxt_pkt, tlen, off;
+	uint16_t etype;
+	uint16_t drop_hdrlen;
+	uint8_t iptos, no_vn=0, bpf_req=0;
+
+	/* 
+	 * This is a bit deceptive, we get the
+	 * "info epoch" which is really the network
+	 * epoch. This covers us on both any INP
+	 * type change but also if the ifp goes
+	 * away it covers us as well.
+	 */
+	INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+	if (m && m->m_pkthdr.rcvif)
+		ifp = m->m_pkthdr.rcvif;
+	else
+		ifp = NULL;
+	if (ifp) {
+		bpf_req = bpf_peers_present(ifp->if_bpf);
+	} else  {
+		/* 
+		 * We probably should not work around
+		 * but kassert, since lro alwasy sets rcvif.
+		 */
+		no_vn = 1;
+		goto skip_vnet;
+	}
+	CURVNET_SET(ifp->if_vnet);
+skip_vnet:
+	while (m) {
+		m_save = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+		/* Now lets get the ether header */
+		eh = mtod(m, struct ether_header *);
+		etype = ntohs(eh->ether_type);
+		/* Let the BPF see the packet */
+		if (bpf_req && ifp)
+			ETHER_BPF_MTAP(ifp, m);
+		m_adj(m,  sizeof(*eh));
+		/* Trim off the ethernet header */
+		switch (etype) {
+#ifdef INET6
+		case ETHERTYPE_IPV6:
+		{
+			if (m->m_len < (sizeof(*ip6) + sizeof(*th))) {
+				m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
+				if (m == NULL) {
+					TCPSTAT_INC(tcps_rcvshort);
+					m_freem(m);
+					goto skipped_pkt;
+				}
+			}
+			ip6 = (struct ip6_hdr *)(eh + 1);
+			th = (struct tcphdr *)(ip6 + 1);
+			tlen = ntohs(ip6->ip6_plen);
+			drop_hdrlen = sizeof(*ip6);
+			if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+				if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+					th->th_sum = m->m_pkthdr.csum_data;
+				else
+					th->th_sum = in6_cksum_pseudo(ip6, tlen,
+								      IPPROTO_TCP, m->m_pkthdr.csum_data);
+				th->th_sum ^= 0xffff;
+			} else
+				th->th_sum = in6_cksum(m, IPPROTO_TCP, drop_hdrlen, tlen);
+			if (th->th_sum) {
+				TCPSTAT_INC(tcps_rcvbadsum);
+				m_freem(m);
+				goto skipped_pkt;
+			}
+			/*
+			 * Be proactive about unspecified IPv6 address in source.
+			 * As we use all-zero to indicate unbounded/unconnected pcb,
+			 * unspecified IPv6 address can be used to confuse us.
+			 *
+			 * Note that packets with unspecified IPv6 destination is
+			 * already dropped in ip6_input.
+			 */
+			if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
+				/* XXX stat */
+				m_freem(m);
+				goto skipped_pkt;
+			}
+			iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+			break;
+		}
+#endif
+#ifdef INET
+		case ETHERTYPE_IP:
+		{
+			if (m->m_len < sizeof (struct tcpiphdr)) {
+				if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
+				    == NULL) {
+					TCPSTAT_INC(tcps_rcvshort);
+					m_freem(m);
+					goto skipped_pkt;
+				}
+			}
+			ip = (struct ip *)(eh + 1);
+			th = (struct tcphdr *)(ip + 1);
+			drop_hdrlen = sizeof(*ip);
+			iptos = ip->ip_tos;
+			tlen = ntohs(ip->ip_len) - sizeof(struct ip);
+			if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+				if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+					th->th_sum = m->m_pkthdr.csum_data;
+				else
+					th->th_sum = in_pseudo(ip->ip_src.s_addr,
+							       ip->ip_dst.s_addr,
+							       htonl(m->m_pkthdr.csum_data + tlen +
+								     IPPROTO_TCP));
+				th->th_sum ^= 0xffff;
+			} else {
+				int len;
+				struct ipovly *ipov = (struct ipovly *)ip;
+				/*
+				 * Checksum extended TCP header and data.
+				 */
+				len = drop_hdrlen + tlen;
+				bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
+				ipov->ih_len = htons(tlen);
+				th->th_sum = in_cksum(m, len);
+				/* Reset length for SDT probes. */
+				ip->ip_len = htons(len);
+				/* Reset TOS bits */
+				ip->ip_tos = iptos;
+				/* Re-initialization for later version check */
+				ip->ip_v = IPVERSION;
+				ip->ip_hl = sizeof(*ip) >> 2;
+			}
+			if (th->th_sum) {
+				TCPSTAT_INC(tcps_rcvbadsum);
+				m_freem(m);
+				goto skipped_pkt;
+			}
+			break;
+		}
+#endif
+		}
+		/*
+		 * Convert TCP protocol specific fields to host format.
+		 */
+		tcp_fields_to_host(th);
+
+		off = th->th_off << 2;
+		if (off < sizeof (struct tcphdr) || off > tlen) {
+			TCPSTAT_INC(tcps_rcvbadoff);
+				m_freem(m);
+				goto skipped_pkt;
+		}
+		tlen -= off;
+		drop_hdrlen += off;
+		/* 
+		 * Now lets setup the timeval to be when we should
+		 * have been called (if we can).
+		 */
+		m->m_pkthdr.lro_nsegs = 1;
+		if (m->m_flags & M_TSTMP_LRO) {
+			tv.tv_sec = m->m_pkthdr.rcv_tstmp / 1000000000;
+			tv.tv_usec = (m->m_pkthdr.rcv_tstmp % 1000000000) / 1000;
+		} else {
+			/* Should not be should we kassert instead? */
+			tcp_get_usecs(&tv);
+		}
+		/* Now what about next packet? */
+		if (m_save || has_pkt)
+			nxt_pkt = 1;
+		else
+			nxt_pkt = 0;
+		retval = (*tp->t_fb->tfb_do_segment_nounlock)(m, th, so, tp, drop_hdrlen, tlen,
+							      iptos, nxt_pkt, &tv);
+		if (retval) {
+			/* We lost the lock and tcb probably */
+			m = m_save;
+			while (m) {
+				m_save = m->m_nextpkt;
+				m->m_nextpkt = NULL;
+				m_freem(m);
+				m = m_save;
+			}
+			if (no_vn == 0)
+				CURVNET_RESTORE();
+			INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+			return (retval);
+		}
+skipped_pkt:
+		m = m_save;
+	}
+	if (no_vn == 0)
+		CURVNET_RESTORE();
+	INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+	return (retval);
+}
+
+int
+ctf_do_queued_segments(struct socket *so, struct tcpcb *tp, int have_pkt)
+{
+	struct mbuf *m;
+
+	/* First lets see if we have old packets */
+	if (tp->t_in_pkt) {
+		m = tp->t_in_pkt;
+		tp->t_in_pkt = NULL;
+		tp->t_tail_pkt = NULL;
+		if (ctf_process_inbound_raw(tp, so, m, have_pkt)) {
+			/* We lost the tcpcb (maybe a RST came in)? */
+			return (1);
+		}
+	}
+	return (0);
+}
+
+uint32_t
+ctf_outstanding(struct tcpcb *tp)
+{
+	return (tp->snd_max - tp->snd_una);
+}
+
+uint32_t 
+ctf_flight_size(struct tcpcb *tp, uint32_t rc_sacked)
+{
+	if (rc_sacked <= ctf_outstanding(tp))
+		return (ctf_outstanding(tp) - rc_sacked);
+	else {
+		/* TSNH */
+#ifdef INVARIANTS
+		panic("tp:%p rc_sacked:%d > out:%d",
+		      tp, rc_sacked, ctf_outstanding(tp));
+#endif		
+		return (0);
+	}
+}
+
+void
+ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
+    int32_t rstreason, int32_t tlen)
+{
+	if (tp != NULL) {
+		tcp_dropwithreset(m, th, tp, tlen, rstreason);
+		INP_WUNLOCK(tp->t_inpcb);
+	} else
+		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
+}
+
+/*
+ * ctf_drop_checks returns 1 for you should not proceed. It places
+ * in ret_val what should be returned 1/0 by the caller. The 1 indicates
+ * that the TCB is unlocked and probably dropped. The 0 indicates the
+ * TCB is still valid and locked.
+ */
+int
+ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp,  int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val)
+{
+	int32_t todrop;
+	int32_t thflags;
+	int32_t tlen;
+
+	thflags = *thf;
+	tlen = *tlenp;
+	todrop = tp->rcv_nxt - th->th_seq;
+	if (todrop > 0) {
+		if (thflags & TH_SYN) {
+			thflags &= ~TH_SYN;
+			th->th_seq++;
+			if (th->th_urp > 1)
+				th->th_urp--;
+			else
+				thflags &= ~TH_URG;
+			todrop--;
+		}
+		/*
+		 * Following if statement from Stevens, vol. 2, p. 960.
+		 */
+		if (todrop > tlen
+		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
+			/*
+			 * Any valid FIN must be to the left of the window.
+			 * At this point the FIN must be a duplicate or out
+			 * of sequence; drop it.
+			 */
+			thflags &= ~TH_FIN;
+			/*
+			 * Send an ACK to resynchronize and drop any data.
+			 * But keep on processing for RST or ACK.
+			 */
+			tp->t_flags |= TF_ACKNOW;
+			todrop = tlen;
+			TCPSTAT_INC(tcps_rcvduppack);
+			TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
+		} else {
+			TCPSTAT_INC(tcps_rcvpartduppack);
+			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
+		}
+		/*
+		 * DSACK - add SACK block for dropped range
+		 */
+		if (tp->t_flags & TF_SACK_PERMIT) {
+			tcp_update_sack_list(tp, th->th_seq, th->th_seq + tlen);
+			/*
+			 * ACK now, as the next in-sequence segment
+			 * will clear the DSACK block again
+			 */
+			tp->t_flags |= TF_ACKNOW;
+		}
+		*drop_hdrlen += todrop;	/* drop from the top afterwards */
+		th->th_seq += todrop;
+		tlen -= todrop;
+		if (th->th_urp > todrop)
+			th->th_urp -= todrop;
+		else {
+			thflags &= ~TH_URG;
+			th->th_urp = 0;
+		}
+	}
+	/*
+	 * If segment ends after window, drop trailing data (and PUSH and
+	 * FIN); if nothing left, just ACK.
+	 */
+	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
+	if (todrop > 0) {
+		TCPSTAT_INC(tcps_rcvpackafterwin);
+		if (todrop >= tlen) {
+			TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
+			/*
+			 * If window is closed can only take segments at
+			 * window edge, and have to drop data and PUSH from
+			 * incoming segments.  Continue processing, but
+			 * remember to ack.  Otherwise, drop segment and
+			 * ack.
+			 */
+			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
+				tp->t_flags |= TF_ACKNOW;
+				TCPSTAT_INC(tcps_rcvwinprobe);
+			} else {
+				ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
+				return (1);
+			}
+		} else
+			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
+		m_adj(m, -todrop);
+		tlen -= todrop;
+		thflags &= ~(TH_PUSH | TH_FIN);
+	}
+	*thf = thflags;
+	*tlenp = tlen;
+	return (0);
+}
+
+/*
+ * The value in ret_val informs the caller
+ * if we dropped the tcb (and lock) or not.
+ * 1 = we dropped it, 0 = the TCB is still locked
+ * and valid.
+ */
+void
+ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t * ret_val)
+{
+	/*
+	 * Generate an ACK dropping incoming segment if it occupies sequence
+	 * space, where the ACK reflects our state.
+	 *
+	 * We can now skip the test for the RST flag since all paths to this
+	 * code happen after packets containing RST have been dropped.
+	 *
+	 * In the SYN-RECEIVED state, don't send an ACK unless the segment
+	 * we received passes the SYN-RECEIVED ACK test. If it fails send a
+	 * RST.  This breaks the loop in the "LAND" DoS attack, and also
+	 * prevents an ACK storm between two listening ports that have been
+	 * sent forged SYN segments, each with the source address of the
+	 * other.
+	 */
+	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
+	    (SEQ_GT(tp->snd_una, th->th_ack) ||
+	    SEQ_GT(th->th_ack, tp->snd_max))) {
+		*ret_val = 1;
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		return;
+	} else
+		*ret_val = 0;
+	tp->t_flags |= TF_ACKNOW;
+	if (m)
+		m_freem(m);
+}
+
+void
+ctf_do_drop(struct mbuf *m, struct tcpcb *tp)
+{
+
+	/*
+	 * Drop space held by incoming segment and return.
+	 */
+	if (tp != NULL)
+		INP_WUNLOCK(tp->t_inpcb);
+	if (m)
+		m_freem(m);
+}
+
+int
+ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp)
+{
+	/*
+	 * RFC5961 Section 3.2
+	 *
+	 * - RST drops connection only if SEG.SEQ == RCV.NXT. - If RST is in
+	 * window, we send challenge ACK.
+	 *
+	 * Note: to take into account delayed ACKs, we should test against
+	 * last_ack_sent instead of rcv_nxt. Note 2: we handle special case
+	 * of closed window, not covered by the RFC.
+	 */
+	int dropped = 0;
+
+	if ((SEQ_GEQ(th->th_seq, (tp->last_ack_sent - 1)) &&
+	    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
+	    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
+
+		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+		KASSERT(tp->t_state != TCPS_SYN_SENT,
+		    ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
+		    __func__, th, tp));
+
+		if (V_tcp_insecure_rst ||
+		    (tp->last_ack_sent == th->th_seq) ||
+		    (tp->rcv_nxt == th->th_seq) ||
+		    ((tp->last_ack_sent - 1) == th->th_seq)) {
+			TCPSTAT_INC(tcps_drops);
+			/* Drop the connection. */
+			switch (tp->t_state) {
+			case TCPS_SYN_RECEIVED:
+				so->so_error = ECONNREFUSED;
+				goto close;
+			case TCPS_ESTABLISHED:
+			case TCPS_FIN_WAIT_1:
+			case TCPS_FIN_WAIT_2:
+			case TCPS_CLOSE_WAIT:
+			case TCPS_CLOSING:
+			case TCPS_LAST_ACK:
+				so->so_error = ECONNRESET;
+		close:
+				tcp_state_change(tp, TCPS_CLOSED);
+				/* FALLTHROUGH */
+			default:
+				tp = tcp_close(tp);
+			}
+			dropped = 1;
+			ctf_do_drop(m, tp);
+		} else {
+			TCPSTAT_INC(tcps_badrst);
+			/* Send challenge ACK. */
+			tcp_respond(tp, mtod(m, void *), th, m,
+			    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
+			tp->last_ack_sent = tp->rcv_nxt;
+		}
+	} else {
+		m_freem(m);
+	}
+	return (dropped);
+}
+
+/*
+ * The value in ret_val informs the caller
+ * if we dropped the tcb (and lock) or not.
+ * 1 = we dropped it, 0 = the TCB is still locked
+ * and valid.
+ */
+void
+ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val)
+{
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+	TCPSTAT_INC(tcps_badsyn);
+	if (V_tcp_insecure_syn &&
+	    SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
+	    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+		tp = tcp_drop(tp, ECONNRESET);
+		*ret_val = 1;
+		ctf_do_drop(m, tp);
+	} else {
+		/* Send challenge ACK. */
+		tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
+		    tp->snd_nxt, TH_ACK);
+		tp->last_ack_sent = tp->rcv_nxt;
+		m = NULL;
+		*ret_val = 0;
+		ctf_do_drop(m, NULL);
+	}
+}
+
+/*
+ * bbr_ts_check returns 1 for you should not proceed, the state
+ * machine should return. It places in ret_val what should
+ * be returned 1/0 by the caller (hpts_do_segment). The 1 indicates
+ * that the TCB is unlocked and probably dropped. The 0 indicates the
+ * TCB is still valid and locked.
+ */
+int
+ctf_ts_check(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
+    int32_t tlen, int32_t thflags, int32_t * ret_val)
+{
+
+	if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
+		/*
+		 * Invalidate ts_recent.  If this segment updates ts_recent,
+		 * the age will be reset later and ts_recent will get a
+		 * valid value.  If it does not, setting ts_recent to zero
+		 * will at least satisfy the requirement that zero be placed
+		 * in the timestamp echo reply when ts_recent isn't valid.
+		 * The age isn't reset until we get a valid ts_recent
+		 * because we don't want out-of-order segments to be dropped
+		 * when ts_recent is old.
+		 */
+		tp->ts_recent = 0;
+	} else {
+		TCPSTAT_INC(tcps_rcvduppack);
+		TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
+		TCPSTAT_INC(tcps_pawsdrop);
+		*ret_val = 0;
+		if (tlen) {
+			ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
+		} else {
+			ctf_do_drop(m, NULL);
+		}
+		return (1);
+	}
+	return (0);
+}
+
+void
+ctf_calc_rwin(struct socket *so, struct tcpcb *tp)
+{
+	int32_t win;
+
+	/*
+	 * Calculate amount of space in receive window, and then do TCP
+	 * input processing. Receive window is amount of space in rcv queue,
+	 * but not less than advertised window.
+	 */
+	win = sbspace(&so->so_rcv);
+	if (win < 0)
+		win = 0;
+	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+}
+
+void
+ctf_do_dropwithreset_conn(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
+    int32_t rstreason, int32_t tlen)
+{
+
+	if (tp->t_inpcb) {
+		tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
+	}
+	tcp_dropwithreset(m, th, tp, tlen, rstreason);
+	INP_WUNLOCK(tp->t_inpcb);
+}
+
+uint32_t
+ctf_fixed_maxseg(struct tcpcb *tp)
+{
+	int optlen;
+
+	if (tp->t_flags & TF_NOOPT)
+		return (tp->t_maxseg);
+
+	/*
+	 * Here we have a simplified code from tcp_addoptions(),
+	 * without a proper loop, and having most of paddings hardcoded.
+	 * We only consider fixed options that we would send every
+	 * time I.e. SACK is not considered.
+	 * 
+	 */
+#define	PAD(len)	((((len) / 4) + !!((len) % 4)) * 4)
+	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
+		if (tp->t_flags & TF_RCVD_TSTMP)
+			optlen = TCPOLEN_TSTAMP_APPA;
+		else
+			optlen = 0;
+#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
+		if (tp->t_flags & TF_SIGNATURE)
+			optlen += PAD(TCPOLEN_SIGNATURE);
+#endif
+	} else {
+		if (tp->t_flags & TF_REQ_TSTMP)
+			optlen = TCPOLEN_TSTAMP_APPA;
+		else
+			optlen = PAD(TCPOLEN_MAXSEG);
+		if (tp->t_flags & TF_REQ_SCALE)
+			optlen += PAD(TCPOLEN_WINDOW);
+#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
+		if (tp->t_flags & TF_SIGNATURE)
+			optlen += PAD(TCPOLEN_SIGNATURE);
+#endif
+		if (tp->t_flags & TF_SACK_PERMIT)
+			optlen += PAD(TCPOLEN_SACK_PERMITTED);
+	}
+#undef PAD
+	optlen = min(optlen, TCP_MAXOLEN);
+	return (tp->t_maxseg - optlen);
+}
+
+void
+ctf_log_sack_filter(struct tcpcb *tp, int num_sack_blks, struct sackblk *sack_blocks)
+{
+	if (tp->t_logstate != TCP_LOG_STATE_OFF) {
+		union tcp_log_stackspecific log;
+		struct timeval tv;
+
+		memset(&log, 0, sizeof(log));
+		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+		log.u_bbr.flex8 = num_sack_blks;
+		if (num_sack_blks > 0) {
+			log.u_bbr.flex1 = sack_blocks[0].start;
+			log.u_bbr.flex2 = sack_blocks[0].end;
+		}
+		if (num_sack_blks > 1) {
+			log.u_bbr.flex3 = sack_blocks[1].start;
+			log.u_bbr.flex4 = sack_blocks[1].end;
+		}
+		if (num_sack_blks > 2) {
+			log.u_bbr.flex5 = sack_blocks[2].start;
+			log.u_bbr.flex6 = sack_blocks[2].end;
+		}
+		if (num_sack_blks > 3) {
+			log.u_bbr.applimited = sack_blocks[3].start;
+			log.u_bbr.pkts_out = sack_blocks[3].end;
+		}
+		TCP_LOG_EVENTP(tp, NULL,
+		    &tp->t_inpcb->inp_socket->so_rcv,
+		    &tp->t_inpcb->inp_socket->so_snd,
+		    TCP_SACK_FILTER_RES, 0,
+		    0, &log, false, &tv);
+	}
+}
+
+uint32_t 
+ctf_decay_count(uint32_t count, uint32_t decay)
+{
+	/*
+	 * Given a count, decay it by a set percentage. The
+	 * percentage is in thousands i.e. 100% = 1000, 
+	 * 19.3% = 193.
+	 */
+	uint64_t perc_count, decay_per;
+	uint32_t decayed_count;
+	if (decay > 1000) {
+		/* We don't raise it */
+		return (count);
+	}
+	perc_count = count;
+	decay_per = decay;
+	perc_count *= decay_per;
+	perc_count /= 1000;
+	/* 
+	 * So now perc_count holds the 
+	 * count decay value.
+	 */
+	decayed_count = count - (uint32_t)perc_count;
+	return (decayed_count);
+}
Index: head/sys/netinet/tcp_var.h
===================================================================
--- head/sys/netinet/tcp_var.h
+++ head/sys/netinet/tcp_var.h
@@ -102,7 +102,8 @@
 		t_state:4,		/* state of this connection */
 		t_idle_reduce : 1,
 		t_delayed_ack: 7,	/* Delayed ack variable */
-		bits_spare : 4;
+		t_fin_is_rst: 1,	/* Are fin's treated as resets */
+		bits_spare : 3;
 	u_int	t_flags;
 	tcp_seq	snd_una;		/* sent but unacknowledged */
 	tcp_seq	snd_max;		/* highest sequence number sent;
@@ -271,6 +272,11 @@
 	void	(*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
 			    struct socket *, struct tcpcb *,
 		        int, int, uint8_t);
+	int     (*tfb_do_queued_segments)(struct socket *, struct tcpcb *, int);
+	int      (*tfb_do_segment_nounlock)(struct mbuf *, struct tcphdr *,
+			    struct socket *, struct tcpcb *,
+			    int, int, uint8_t,
+			    int, struct timeval *);
 	void	(*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *,
 			    struct socket *, struct tcpcb *,
 			    int, int, uint8_t,
Index: head/sys/sys/mbuf.h
===================================================================
--- head/sys/sys/mbuf.h
+++ head/sys/sys/mbuf.h
@@ -407,6 +407,7 @@
 #define	M_TSTMP_HPREC	0x00000800 /* rcv_tstmp is high-prec, typically
 				      hw-stamped on port (useful for IEEE 1588
 				      and 802.1AS) */
+#define M_TSTMP_LRO	0x00001000 /* Time LRO pushed in pkt is valid in (PH_loc) */
 
 #define	M_PROTO1	0x00001000 /* protocol-specific */
 #define	M_PROTO2	0x00002000 /* protocol-specific */