diff --git a/sys/modules/tcp/rack/Makefile b/sys/modules/tcp/rack/Makefile
--- a/sys/modules/tcp/rack/Makefile
+++ b/sys/modules/tcp/rack/Makefile
@@ -5,7 +5,7 @@
 
 STACKNAME=	rack
 KMOD=	tcp_${STACKNAME}
-SRCS=	rack.c sack_filter.c rack_bbr_common.c tailq_hash.c
+SRCS=	rack.c sack_filter.c rack_bbr_common.c tailq_hash.c rack_pcm.c
 
 SRCS+=	opt_inet.h opt_inet6.h opt_ipsec.h
 SRCS+=	opt_kern_tls.h
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -334,9 +334,22 @@
 #define TCP_RACK_PACING_DIVISOR 1146 /* Pacing divisor given to rate-limit code for burst sizing */
 #define TCP_RACK_PACE_MIN_SEG 1147	/* Pacing min seg size rack will use */
 #define TCP_RACK_DGP_IN_REC 1148	/* Do we use full DGP in recovery? */
-#define TCP_RXT_CLAMP 1149 /* Do we apply a threshold to rack so if excess rxt clamp cwnd? */
+#define TCP_POLICER_DETECT 1149 	/* Do we apply a thresholds to rack to detect and compensate for policers? */
+#define TCP_RXT_CLAMP TCP_POLICER_DETECT
 #define TCP_HYBRID_PACING   1150	/* Hybrid pacing enablement */
 #define TCP_PACING_DND	    1151	/* When pacing with rr_config=3 can sacks disturb us */
+#define TCP_SS_EEXIT        1152	/* Do we do early exit from slowtart if no  b/w growth */
+#define TCP_DGP_UPPER_BOUNDS 1153	/* SS and CA upper bound in percentage */
+#define TCP_NO_TIMELY	    1154	/* Disable/enable Timely */
+#define TCP_HONOR_HPTS_MIN  1155	/* Do we honor hpts min to */
+#define TCP_REC_IS_DYN      1156	/* Do we allow timely to change recovery multiplier? */
+#define TCP_SIDECHAN_DIS    1157	/* Disable/enable the side-channel */
+#define TCP_FILLCW_RATE_CAP 1158	/* Set a cap for DGP's fillcw */
+#define TCP_POLICER_MSS     1159	/* Policer MSS requirement */
+#define TCP_STACK_SPEC_INFO 1160	/* Get stack specific information (if present) */
+#define RACK_CSPR_IS_FCC    1161
+#define TCP_GP_USE_LTBW     1162	/* how we use lt_bw 0=not, 1=min, 2=max */
+
 
 /* Start of reserved space for third-party user-settable options. */
 #define	TCP_VENDOR	SO_VENDOR
@@ -447,6 +460,7 @@
 	u_int32_t	tcpi_rcv_adv;		/* Peer advertised window */
 	u_int32_t	tcpi_dupacks;		/* Consecutive dup ACKs recvd */
 
+	u_int32_t	tcpi_rttmin;		/* Min observed RTT */
 	/* Padding to grow without breaking ABI. */
 	u_int32_t	__tcpi_pad[14];		/* Padding. */
 };
@@ -463,6 +477,20 @@
 
 #define TCP_FUNCTION_NAME_LEN_MAX 32
 
+struct stack_specific_info {
+	char stack_name[TCP_FUNCTION_NAME_LEN_MAX];
+	uint64_t policer_last_bw;	/* Only valid if detection enabled and policer detected */
+	uint64_t bytes_transmitted;
+	uint64_t bytes_retransmitted;
+	uint32_t policer_detection_enabled: 1,
+		 policer_detected : 1,  /* transport thinks a policer is on path */
+		 highly_buffered : 1,	/* transport considers the path highly buffered */
+		 spare : 29;
+	uint32_t policer_bucket_size;	/* Only valid if detection enabled and policer detected */
+	uint32_t current_round;
+	uint32_t _rack_i_pad[18];
+};
+
 struct tcp_function_set {
 	char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
 	uint32_t pcbcnt;
@@ -488,6 +516,7 @@
 	uint64_t start;
 	uint64_t end;
 	uint32_t flags;
+	uint32_t playout_ms;
 };
 
 union tcp_log_userdata {
@@ -518,9 +547,12 @@
 #define TCP_HYBRID_PACING_H_MS		0x0008		/* A client hint for maxseg is present  */
 #define TCP_HYBRID_PACING_ENABLE	0x0010		/* We are enabling hybrid pacing else disable */
 #define TCP_HYBRID_PACING_S_MSS		0x0020		/* Clent wants us to set the mss overriding gp est in CU */
-#define TCP_HYBRID_PACING_SETMSS	0x1000		/* Internal flag that tellsus we set the mss on this entry */
+#define TCP_HAS_PLAYOUT_MS		0x0040		/* The client included the chunk playout milliseconds: deprecate */
+/* the below are internal only flags */
+#define TCP_HYBRID_PACING_USER_MASK	0x0FFF		/* Non-internal flags mask */
+#define TCP_HYBRID_PACING_SETMSS	0x1000		/* Internal flag that tells us we set the mss on this entry */
 #define TCP_HYBRID_PACING_WASSET	0x2000		/* We init to this to know if a hybrid command was issued */
-
+#define TCP_HYBRID_PACING_SENDTIME	0x4000		/* Duplicate tm to last, use sendtime for catch up mode */
 
 struct tcp_hybrid_req {
 	struct tcp_snd_req req;
diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h
--- a/sys/netinet/tcp_log_buf.h
+++ b/sys/netinet/tcp_log_buf.h
@@ -267,7 +267,9 @@
 	TCP_RACK_TP_TRIGGERED,	/* A rack tracepoint is triggered 68 */
 	TCP_HYBRID_PACING_LOG,	/* Hybrid pacing log 69 */
 	TCP_LOG_PRU,		/* TCP protocol user request        70 */
-	TCP_LOG_END		/* End (keep at end)                71 */
+	TCP_POLICER_DET,	/* TCP Policer detectionn 71 */
+	TCP_PCM_MEASURE,	/* TCP Path Capacity Measurement 72 */
+	TCP_LOG_END		/* End (keep at end)                72 */
 };
 
 enum tcp_log_states {
@@ -371,10 +373,11 @@
 #define TCP_TP_COLLAPSED_RXT	0x00000004	/* When we actually retransmit a collapsed window rsm */
 #define TCP_TP_REQ_LOG_FAIL	0x00000005	/* We tried to allocate a Request log but had no space */
 #define TCP_TP_RESET_RCV	0x00000006	/* Triggers when we receive a RST */
-#define TCP_TP_EXCESS_RXT	0x00000007	/* When we get excess RXT's clamping the cwnd */
+#define TCP_TP_POLICER_DET	0x00000007	/* When we detect a policer */
+#define TCP_TP_EXCESS_RXT	TCP_TP_POLICER_DET	/* alias */
 #define TCP_TP_SAD_TRIGGERED	0x00000008	/* Sack Attack Detection triggers */
-
 #define TCP_TP_SAD_SUSPECT	0x0000000a	/* A sack has supicious information in it */
+#define TCP_TP_PACED_BOTTOM	0x0000000b	/* We have paced at the bottom */
 
 #ifdef _KERNEL
 
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -11529,7 +11529,9 @@
 			bbr_set_pktepoch(bbr, cts, __LINE__);
 		bbr_check_bbr_for_state(bbr, cts, __LINE__, (bbr->r_ctl.rc_lost - lost));
 		if (nxt_pkt == 0) {
-			if (bbr->r_wanted_output != 0) {
+			if ((bbr->r_wanted_output != 0) ||
+			    (tp->t_flags & TF_ACKNOW)) { 
+
 				bbr->rc_output_starts_timer = 0;
 				did_out = 1;
 				if (tcp_output(tp) < 0)
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -142,9 +142,12 @@
 #define V_newreno_beta VNET(newreno_beta)
 #define V_newreno_beta_ecn VNET(newreno_beta_ecn)
 
+#define	M_TCPFSB	__CONCAT(M_TCPFSB, STACKNAME)
+#define	M_TCPDO		__CONCAT(M_TCPDO, STACKNAME)
 
-MALLOC_DEFINE(M_TCPFSB, "tcp_fsb", "TCP fast send block");
-MALLOC_DEFINE(M_TCPDO, "tcp_do", "TCP deferred options");
+MALLOC_DEFINE(M_TCPFSB, "tcp_fsb_" __XSTRING(STACKNAME), "TCP fast send block");
+MALLOC_DEFINE(M_TCPDO, "tcp_do_" __XSTRING(STACKNAME), "TCP deferred options");
+MALLOC_DEFINE(M_TCPPCM, "tcp_pcm_" __XSTRING(STACKNAME), "TCP PCM measurement information");
 
 struct sysctl_ctx_list rack_sysctl_ctx;
 struct sysctl_oid *rack_sysctl_root;
@@ -190,12 +193,24 @@
 static int32_t rack_reorder_thresh = 2;
 static int32_t rack_reorder_fade = 60000000;	/* 0 - never fade, def 60,000,000
 						 * - 60 seconds */
-static uint32_t rack_clamp_ss_upper = 110;
-static uint32_t rack_clamp_ca_upper = 105;
-static uint32_t rack_rxt_min_rnds = 10;	/* Min rounds if drastic rxt clamp is in place */
-static uint32_t rack_unclamp_round_thresh = 100;	/* number of perfect rounds before we unclamp */
-static uint32_t rack_unclamp_rxt_thresh = 5;	/* .5%  and under */
-static uint64_t rack_rxt_clamp_thresh = 0;	/* Do we do the rxt clamp thing */
+static uint16_t rack_policer_rxt_thresh= 0;	/* 499 = 49.9%, 0 is off  */
+static uint8_t rack_policer_avg_thresh = 0; /* 3.2 */
+static uint8_t rack_policer_med_thresh = 0; /* 1 - 16 */
+static uint16_t rack_policer_bucket_reserve = 20; /* How much % is reserved in the bucket */
+static uint64_t rack_pol_min_bw = 125000;	/* 1mbps in Bytes per sec */
+static uint32_t rack_policer_data_thresh = 64000;	/* 64,000 bytes must be sent before we engage */
+static uint32_t rack_policing_do_bw_comp = 1;
+static uint32_t rack_pcm_every_n_rounds = 100;
+static uint32_t rack_pcm_blast = 0;
+static uint32_t rack_pcm_is_enabled = 1;
+static uint8_t rack_req_del_mss = 18;	/* How many segments need to be sent in a recovery episode to do policer_detection */
+static uint8_t rack_ssthresh_rest_rto_rec = 0; /* Do we restore ssthresh when we have rec -> rto -> rec */
+
+static uint32_t rack_gp_gain_req = 1200;		/* Amount percent wise required to gain to record a round has "gaining" */
+static uint32_t rack_rnd_cnt_req = 0x10005;		/* Default number of rounds if we are below rack_gp_gain_req where we exit ss */
+
+
+static int32_t rack_rxt_scoreboard_clear_thresh = 2;
 static int32_t rack_dnd_default = 0;		/* For rr_conf = 3, what is the default for dnd */
 static int32_t rack_rxt_controls = 0;
 static int32_t rack_fill_cw_state = 0;
@@ -217,9 +232,8 @@
 static int32_t rack_apply_rtt_with_reduced_conf = 0;
 static int32_t rack_hibeta_setting = 0;
 static int32_t rack_default_pacing_divisor = 250;
-static int32_t rack_uses_full_dgp_in_rec = 1;
 static uint16_t rack_pacing_min_seg = 0;
-
+static int32_t rack_timely_off = 0;
 
 static uint32_t sad_seg_size_per = 800;	/* 80.0 % */
 static int32_t rack_pkt_delay = 1000;
@@ -235,7 +249,7 @@
 static int32_t rack_max_abc_post_recovery = 2;
 static int32_t rack_client_low_buf = 0;
 static int32_t rack_dsack_std_based = 0x3;	/* bit field bit 1 sets rc_rack_tmr_std_based and bit 2 sets rc_rack_use_dsack */
-static int32_t rack_bw_multipler = 2;		/* Limit on fill cw's jump up to be this x gp_est */
+static int32_t rack_bw_multipler = 0;		/* Limit on fill cw's jump up to be this x gp_est */
 #ifdef TCP_ACCOUNTING
 static int32_t rack_tcp_accounting = 0;
 #endif
@@ -247,8 +261,9 @@
 static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the configured rate (ss/ca)? */
 static int32_t rack_persist_min = 250000;	/* 250usec */
 static int32_t rack_persist_max = 2000000;	/* 2 Second in usec's */
+static int32_t rack_honors_hpts_min_to =  1;	/* Do we honor the hpts minimum time out for pacing timers */
+static uint32_t rack_max_reduce = 10;		/* Percent we can reduce slot by */
 static int32_t rack_sack_not_required = 1;	/* set to one to allow non-sack to use rack */
-static int32_t rack_default_init_window = 0;	/* Use system default */
 static int32_t rack_limit_time_with_srtt = 0;
 static int32_t rack_autosndbuf_inc = 20;	/* In percentage form */
 static int32_t rack_enobuf_hw_boost_mult = 0;	/* How many times the hw rate we boost slot using time_between */
@@ -282,7 +297,6 @@
 static int32_t rack_def_profile = 0;
 
 static int32_t rack_lower_cwnd_at_tlp = 0;
-static int32_t rack_limited_retran = 0;
 static int32_t rack_always_send_oldest = 0;
 static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
 
@@ -356,6 +370,7 @@
 static int32_t rack_down_raise_thresh = 100;
 static int32_t rack_req_segs = 1;
 static uint64_t rack_bw_rate_cap = 0;
+static uint64_t rack_fillcw_bw_cap = 3750000;	/* Cap fillcw at 30Mbps */
 
 
 /* Rack specific counters */
@@ -377,6 +392,7 @@
 counter_u64_t rack_tlp_retran_bytes;
 counter_u64_t rack_to_tot;
 counter_u64_t rack_hot_alloc;
+counter_u64_t tcp_policer_detected;
 counter_u64_t rack_to_alloc;
 counter_u64_t rack_to_alloc_hard;
 counter_u64_t rack_to_alloc_emerg;
@@ -440,7 +456,7 @@
 static int
 rack_process_ack(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to,
-    uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val);
+    uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val, int32_t orig_tlen);
 static int
 rack_process_data(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
@@ -454,6 +470,8 @@
 static struct rack_sendmap *
 rack_check_recovery_mode(struct tcpcb *tp,
     uint32_t tsused);
+static uint32_t
+rack_grab_rtt(struct tcpcb *tp, struct tcp_rack *rack);
 static void
 rack_cong_signal(struct tcpcb *tp,
 		 uint32_t type, uint32_t ack, int );
@@ -504,13 +522,14 @@
 static void
 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
     uint32_t seq_out, uint16_t th_flags, int32_t err, uint64_t ts,
-    struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff, int hw_tls, int segsiz);
+    struct rack_sendmap *hintrsm, uint32_t add_flags, struct mbuf *s_mb, uint32_t s_moff, int hw_tls, int segsiz);
 
 static uint64_t rack_get_gp_est(struct tcp_rack *rack);
 
+
 static void
 rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack,
-    struct rack_sendmap *rsm);
+    struct rack_sendmap *rsm, uint32_t cts);
 static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rsm);
 static int32_t rack_output(struct tcpcb *tp);
 
@@ -526,10 +545,10 @@
 static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line);
 static uint32_t
 rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
-    struct rack_sendmap *rsm, uint64_t ts, int32_t * lenp, uint16_t add_flag, int segsiz);
+    struct rack_sendmap *rsm, uint64_t ts, int32_t * lenp, uint32_t add_flag, int segsiz);
 static void
 rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
-    struct rack_sendmap *rsm, uint64_t ts, uint16_t add_flag, int segsiz);
+    struct rack_sendmap *rsm, uint64_t ts, uint32_t add_flag, int segsiz);
 static int
 rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type, tcp_seq th_ack);
@@ -538,6 +557,10 @@
 rack_do_close_wait(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos);
+
+static void
+rack_peg_rxt(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t segsiz);
+
 static int
 rack_do_closing(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
@@ -720,6 +743,22 @@
 	rack_swap_beta_values(rack, 4);
 }
 
+static void
+rack_remove_pacing(struct tcp_rack *rack)
+{
+	if (rack->rc_pacing_cc_set)
+		rack_undo_cc_pacing(rack);
+	if (rack->r_ctl.pacing_method & RACK_REG_PACING)
+		tcp_decrement_paced_conn();
+	if (rack->r_ctl.pacing_method & RACK_DGP_PACING)
+		tcp_dec_dgp_pacing_cnt();
+	rack->rc_always_pace = 0;
+	rack->r_ctl.pacing_method = RACK_PACING_NONE;
+	rack->dgp_on = 0;
+	rack->rc_hybrid_mode = 0;
+	rack->use_fixed_rate = 0;
+}
+
 static void
 rack_log_gpset(struct tcp_rack *rack, uint32_t seq_end, uint32_t ack_end_t,
 	       uint32_t send_end_t, int line, uint8_t mode, struct rack_sendmap *rsm)
@@ -742,6 +781,8 @@
 		log.u_bbr.pkts_out = line;
 		log.u_bbr.cwnd_gain = rack->app_limited_needs_set;
 		log.u_bbr.pkt_epoch = rack->r_ctl.rc_app_limited_cnt;
+		log.u_bbr.epoch = rack->r_ctl.current_round;
+		log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
 		if (rsm != NULL) {
 			log.u_bbr.applimited = rsm->r_start;
 			log.u_bbr.delivered = rsm->r_end;
@@ -857,6 +898,7 @@
 	struct sysctl_oid *rack_measure;
 	struct sysctl_oid *rack_probertt;
 	struct sysctl_oid *rack_hw_pacing;
+	struct sysctl_oid *rack_policing;
 
 	rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
@@ -994,11 +1036,36 @@
 	    "pacing",
 	    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 	    "Pacing related Controls");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_pacing),
+	    OID_AUTO, "pcm_enabled", CTLFLAG_RW,
+	    &rack_pcm_is_enabled, 1,
+	    "Do we by default do PCM measurements?");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_pacing),
+	    OID_AUTO, "pcm_rnds", CTLFLAG_RW,
+	    &rack_pcm_every_n_rounds, 100,
+	    "How many rounds before we need to do a PCM measurement");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_pacing),
+	    OID_AUTO, "pcm_blast", CTLFLAG_RW,
+	    &rack_pcm_blast, 0,
+	    "Blast out the full cwnd/rwnd when doing a PCM measurement");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_pacing),
+	    OID_AUTO, "rnd_gp_gain", CTLFLAG_RW,
+	    &rack_gp_gain_req, 1200,
+	    "How much do we have to increase the GP to record the round 1200 = 120.0");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_pacing),
+	    OID_AUTO, "dgp_out_of_ss_at", CTLFLAG_RW,
+	    &rack_rnd_cnt_req, 0x10005,
+	    "How many rounds less than rnd_gp_gain will drop us out of SS");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
-	    OID_AUTO, "fulldgpinrec", CTLFLAG_RW,
-	    &rack_uses_full_dgp_in_rec, 1,
-	    "Do we use all DGP features in recovery (fillcw, timely et.al.)?");
+	    OID_AUTO, "no_timely", CTLFLAG_RW,
+	    &rack_timely_off, 0,
+	    "Do we not use timely in DGP?");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
 	    OID_AUTO, "fullbufdisc", CTLFLAG_RW,
@@ -1017,13 +1084,13 @@
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
 	    OID_AUTO, "divisor", CTLFLAG_RW,
-	    &rack_default_pacing_divisor, 4,
+	    &rack_default_pacing_divisor, 250,
 	    "What is the default divisor given to the rl code?");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
 	    OID_AUTO, "fillcw_max_mult", CTLFLAG_RW,
-	    &rack_bw_multipler, 2,
-	    "What is the multiplier of the current gp_est that fillcw can increase the b/w too?");
+	    &rack_bw_multipler, 0,
+	    "What is the limit multiplier of the current gp_est that fillcw can increase the b/w too, 200 == 200% (0 = off)?");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
 	    OID_AUTO, "max_pace_over", CTLFLAG_RW,
@@ -1039,11 +1106,6 @@
 	    OID_AUTO, "limit_wsrtt", CTLFLAG_RW,
 	    &rack_limit_time_with_srtt, 0,
 	    "Do we limit pacing time based on srtt");
-	SYSCTL_ADD_S32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_pacing),
-	    OID_AUTO, "init_win", CTLFLAG_RW,
-	    &rack_default_init_window, 0,
-	    "Do we have a rack initial window 0 = system default");
 	SYSCTL_ADD_U16(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
 	    OID_AUTO, "gp_per_ss", CTLFLAG_RW,
@@ -1079,6 +1141,11 @@
 	    OID_AUTO, "rate_cap", CTLFLAG_RW,
 	    &rack_bw_rate_cap, 0,
 	    "If set we apply this value to the absolute rate cap used by pacing");
+	SYSCTL_ADD_U64(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_pacing),
+	    OID_AUTO, "fillcw_cap", CTLFLAG_RW,
+	    &rack_fillcw_bw_cap, 3750000,
+	    "Do we have an absolute cap on the amount of b/w fillcw can specify (0 = no)?");
 	SYSCTL_ADD_U8(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO, "req_measure_cnt", CTLFLAG_RW,
@@ -1317,11 +1384,6 @@
 	    OID_AUTO, "send_oldest", CTLFLAG_RW,
 	    &rack_always_send_oldest, 0,
 	    "Should we always send the oldest TLP and RACK-TLP");
-	SYSCTL_ADD_S32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_tlp),
-	    OID_AUTO, "rack_tlimit", CTLFLAG_RW,
-	    &rack_limited_retran, 0,
-	    "How many times can a rack timeout drive out sends");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_tlp),
 	    OID_AUTO, "tlp_cwnd_flag", CTLFLAG_RW,
@@ -1355,6 +1417,26 @@
 	    "timers",
 	    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 	    "Timer related controls");
+	SYSCTL_ADD_U8(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_timers),
+	    OID_AUTO, "reset_ssth_rec_rto", CTLFLAG_RW,
+	    &rack_ssthresh_rest_rto_rec, 0,
+	    "When doing recovery -> rto -> recovery do we reset SSthresh?");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_timers),
+	    OID_AUTO, "scoreboard_thresh", CTLFLAG_RW,
+	    &rack_rxt_scoreboard_clear_thresh, 2,
+	    "How many RTO's are allowed before we clear the scoreboard");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_timers),
+	    OID_AUTO, "honor_hpts_min", CTLFLAG_RW,
+	    &rack_honors_hpts_min_to, 1,
+	    "Do rack pacing timers honor hpts min timeout");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_timers),
+	    OID_AUTO, "hpts_max_reduce", CTLFLAG_RW,
+	    &rack_max_reduce, 10,
+	    "Max percentage we will reduce slot by for pacing when we are behind");
 	SYSCTL_ADD_U32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_timers),
 	    OID_AUTO, "persmin", CTLFLAG_RW,
@@ -1434,11 +1516,6 @@
 	    "features",
 	    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 	    "Feature controls");
-	SYSCTL_ADD_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_features),
-	    OID_AUTO, "rxt_clamp_thresh", CTLFLAG_RW,
-	    &rack_rxt_clamp_thresh, 0,
-	    "Bit encoded clamping setup bits CCCC CCCCC UUUU UULF PPPP PPPP PPPP PPPP");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_features),
 	    OID_AUTO, "hybrid_set_maxseg", CTLFLAG_RW,
@@ -1474,6 +1551,53 @@
 	    OID_AUTO, "hystartplusplus", CTLFLAG_RW,
 	    &rack_do_hystart, 0,
 	    "Should RACK enable HyStart++ on connections?");
+	/* Policer detection */
+	rack_policing = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO,
+	    "policing",
+	    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+	    "policer detection");
+	SYSCTL_ADD_U16(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_policing),
+	    OID_AUTO, "rxt_thresh", CTLFLAG_RW,
+	    &rack_policer_rxt_thresh, 0,
+	   "Percentage of retransmits we need to be a possible policer (499 = 49.9 percent)");
+	SYSCTL_ADD_U8(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_policing),
+	    OID_AUTO, "avg_thresh", CTLFLAG_RW,
+	    &rack_policer_avg_thresh, 0,
+	    "What threshold of average retransmits needed to recover a lost packet (1 - 169 aka 21 = 2.1)?");
+	SYSCTL_ADD_U8(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_policing),
+	    OID_AUTO, "med_thresh", CTLFLAG_RW,
+	    &rack_policer_med_thresh, 0,
+	    "What threshold of Median retransmits needed to recover a lost packet (1 - 16)?");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_policing),
+	    OID_AUTO, "data_thresh", CTLFLAG_RW,
+	    &rack_policer_data_thresh, 64000,
+	    "How many bytes must have gotten through before we can start doing policer detection?");
+	SYSCTL_ADD_U32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_policing),
+	    OID_AUTO, "bwcomp", CTLFLAG_RW,
+	    &rack_policing_do_bw_comp, 1,
+	    "Do we raise up low b/w so that at least pace_max_seg can be sent in the srtt?");
+	SYSCTL_ADD_U8(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_policing),
+	    OID_AUTO, "recmss", CTLFLAG_RW,
+	    &rack_req_del_mss, 18,
+	    "How many MSS must be delivered during recovery to engage policer detection?");
+	SYSCTL_ADD_U16(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_policing),
+	    OID_AUTO, "res_div", CTLFLAG_RW,
+	    &rack_policer_bucket_reserve, 20,
+	    "What percentage is reserved in the policer bucket?");
+	SYSCTL_ADD_U64(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_policing),
+	    OID_AUTO, "min_comp_bw", CTLFLAG_RW,
+	    &rack_pol_min_bw, 125000,
+	    "Do we have a min b/w for b/w compensation (0 = no)?");
 	/* Misc rack controls */
 	rack_misc = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
@@ -1578,31 +1702,8 @@
 	    OID_AUTO, "autoscale", CTLFLAG_RW,
 	    &rack_autosndbuf_inc, 20,
 	    "What percentage should rack scale up its snd buffer by?");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_misc),
-	    OID_AUTO, "rnds_for_rxt_clamp", CTLFLAG_RW,
-	    &rack_rxt_min_rnds, 10,
-	    "Number of rounds needed between RTT clamps due to high loss rates");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_misc),
-	    OID_AUTO, "rnds_for_unclamp", CTLFLAG_RW,
-	    &rack_unclamp_round_thresh, 100,
-	    "Number of rounds needed with no loss to unclamp");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_misc),
-	    OID_AUTO, "rxt_threshs_for_unclamp", CTLFLAG_RW,
-	    &rack_unclamp_rxt_thresh, 5,
-	   "Percentage of retransmits we need to be under to unclamp (5 = .5 percent)\n");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_misc),
-	    OID_AUTO, "clamp_ss_upper", CTLFLAG_RW,
-	    &rack_clamp_ss_upper, 110,
-	    "Clamp percentage ceiling in SS?");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_misc),
-	    OID_AUTO, "clamp_ca_upper", CTLFLAG_RW,
-	    &rack_clamp_ca_upper, 110,
-	    "Clamp percentage ceiling in CA?");
+
+
 	/* Sack Attacker detection stuff */
 	SYSCTL_ADD_U32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_attack),
@@ -1779,6 +1880,13 @@
 	    OID_AUTO, "alloc_hot", CTLFLAG_RD,
 	    &rack_hot_alloc,
 	    "Total allocations from the top of our list");
+	tcp_policer_detected = counter_u64_alloc(M_WAITOK);
+	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_counters),
+	    OID_AUTO, "policer_detected", CTLFLAG_RD,
+	    &tcp_policer_detected,
+	    "Total policer_detections");
+
 	rack_to_alloc = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_counters),
@@ -1957,17 +2065,8 @@
 static uint32_t
 rc_init_window(struct tcp_rack *rack)
 {
-	uint32_t win;
+	return (tcp_compute_initwnd(tcp_maxseg(rack->rc_tp)));
 
-	if (rack->rc_init_win == 0) {
-		/*
-		 * Nothing set by the user, use the system stack
-		 * default.
-		 */
-		return (tcp_compute_initwnd(tcp_maxseg(rack->rc_tp)));
-	}
-	win = ctf_fixed_maxseg(rack->rc_tp) * rack->rc_init_win;
-	return (win);
 }
 
 static uint64_t
@@ -2071,6 +2170,7 @@
 			off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
 			log.u_bbr.bbr_substate = (uint8_t)(off / sizeof(struct tcp_sendfile_track));
 #endif
+			log.u_bbr.inhpts = 1;
 			log.u_bbr.flex4 = (uint32_t)(rack->rc_tp->t_sndbytes - cur->sent_at_fs);
 			log.u_bbr.flex5 = (uint32_t)(rack->rc_tp->t_snd_rxt_bytes - cur->rxt_at_fs);
 			log.u_bbr.flex7 = (uint16_t)cur->hybrid_flags;
@@ -2116,9 +2216,24 @@
 		memset(&log, 0, sizeof(log));
 
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
-		log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
 		log.u_bbr.delRate = cur->sent_at_fs;
-		log.u_bbr.rttProp = rack->rc_tp->t_snd_rxt_bytes;
+
+		if ((cur->flags & TCP_TRK_TRACK_FLG_LSND) == 0) {
+			/*
+			 * We did not get a new Rules Applied to set so
+			 * no overlapping send occured, this means the
+			 * current byte counts are correct.
+			 */
+			log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
+			log.u_bbr.rttProp = rack->rc_tp->t_snd_rxt_bytes;
+		} else {
+			/*
+			 * Overlapping send case, we switched to a new
+			 * send and did a rules applied.
+			 */
+			log.u_bbr.cur_del_rate = cur->sent_at_ls;
+			log.u_bbr.rttProp = cur->rxt_at_ls;
+		}
 		log.u_bbr.bw_inuse = cur->rxt_at_fs;
 		log.u_bbr.cwnd_gain = line;
 		off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
@@ -2138,6 +2253,7 @@
 		log.u_bbr.lt_epoch = (uint32_t)((cur->timestamp >> 32) & 0x00000000ffffffff);
 		/* now set all the flags in */
 		log.u_bbr.pkts_out = cur->hybrid_flags;
+		log.u_bbr.lost = cur->playout_ms;
 		log.u_bbr.flex6 = cur->flags;
 		/*
 		 * Last send time  = <flex5 | pkt_epoch>  note we do not distinguish cases
@@ -2146,6 +2262,20 @@
 		 */
 		log.u_bbr.pkt_epoch = (uint32_t)(rack->r_ctl.last_tmit_time_acked & 0x00000000ffffffff);
 		log.u_bbr.flex5 = (uint32_t)((rack->r_ctl.last_tmit_time_acked >> 32) & 0x00000000ffffffff);
+		/*
+		 * Compose bbr_state to be a bit wise 0000ADHF
+		 * where A is the always_pace flag
+		 * where D is the dgp_on flag
+		 * where H is the hybrid_mode on flag
+		 * where F is the use_fixed_rate flag.
+		 */
+		log.u_bbr.bbr_state = rack->rc_always_pace;
+		log.u_bbr.bbr_state <<= 1;
+		log.u_bbr.bbr_state |= rack->dgp_on;
+		log.u_bbr.bbr_state <<= 1;
+		log.u_bbr.bbr_state |= rack->rc_hybrid_mode;
+		log.u_bbr.bbr_state <<= 1;
+		log.u_bbr.bbr_state |= rack->use_fixed_rate;
 
 		log.u_bbr.flex8 = HYBRID_LOG_SENT_LOST;
 		tcp_log_event(rack->rc_tp, NULL,
@@ -2299,6 +2429,7 @@
 #ifdef TCP_REQUEST_TRK
 		if (rack->rc_hybrid_mode &&
 		    rack->rc_catch_up &&
+		    (rack->r_ctl.rc_last_sft != NULL) &&
 		    (rack->r_ctl.rc_last_sft->hybrid_flags & TCP_HYBRID_PACING_S_MSS) &&
 		    (rack_hybrid_allow_set_maxseg == 1) &&
 		    ((rack->r_ctl.rc_last_sft->hybrid_flags & TCP_HYBRID_PACING_SETMSS) == 0)) {
@@ -2338,7 +2469,10 @@
 		 */
 		uint64_t srtt;
 
-		lt_bw = rack_get_lt_bw(rack);
+		if (rack->dis_lt_bw == 1)
+			lt_bw = 0;
+		else
+			lt_bw = rack_get_lt_bw(rack);
 		if (lt_bw) {
 			/*
 			 * No goodput bw but a long-term b/w does exist
@@ -2374,19 +2508,22 @@
 		/* Still doing initial average must calculate */
 		bw = rack->r_ctl.gp_bw / max(rack->r_ctl.num_measurements, 1);
 	}
+	if (rack->dis_lt_bw) {
+		/* We are not using lt-bw */
+		ret_bw = bw;
+		goto compensate;
+	}
 	lt_bw = rack_get_lt_bw(rack);
 	if (lt_bw == 0) {
 		/* If we don't have one then equate it to the gp_bw */
 		lt_bw = rack->r_ctl.gp_bw;
 	}
-	if ((rack->r_cwnd_was_clamped == 1) && (rack->r_clamped_gets_lower > 0)){
-		/*  if clamped take the lowest */
+	if (rack->use_lesser_lt_bw) {
 		if (lt_bw < bw)
 			ret_bw = lt_bw;
 		else
 			ret_bw = bw;
 	} else {
-		/* If not set for clamped to get lowest, take the highest */
 		if (lt_bw > bw)
 			ret_bw = lt_bw;
 		else
@@ -2487,6 +2624,8 @@
 		log.u_bbr.flex7 = rack->r_ctl.dsack_persist;
 		log.u_bbr.flex8 = mod;
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+		log.u_bbr.epoch = rack->r_ctl.current_round;
+		log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -2535,6 +2674,8 @@
 		else
 			log.u_bbr.cur_del_rate = 0;
 		log.u_bbr.rttProp = rack->r_ctl.last_hw_bw_req;
+		log.u_bbr.epoch = rack->r_ctl.current_round;
+		log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -2552,28 +2693,9 @@
 	uint64_t bw_est, high_rate;
 	uint64_t gain;
 
-	if ((rack->r_pacing_discount == 0) ||
-	    (rack_full_buffer_discount == 0)) {
-		/*
-		 * No buffer level based discount from client buffer
-		 * level is enabled or the feature is disabled.
-		 */
-		gain = (uint64_t)rack_get_output_gain(rack, rsm);
-		bw_est = bw * gain;
-		bw_est /= (uint64_t)100;
-	} else {
-		/*
-		 * We have a discount in place apply it with
-		 * just a 100% gain (we get no boost if the buffer
-		 * is full).
-		 */
-		uint64_t discount;
-
-		discount = bw * (uint64_t)(rack_full_buffer_discount * rack->r_ctl.pacing_discount_amm);
-		discount /= 100;
-		/* What %% of the b/w do we discount */
-		bw_est = bw - discount;
-	}
+	gain = (uint64_t)rack_get_output_gain(rack, rsm);
+	bw_est = bw * gain;
+	bw_est /= (uint64_t)100;
 	/* Never fall below the minimum (def 64kbps) */
 	if (bw_est < RACK_MIN_BW)
 		bw_est = RACK_MIN_BW;
@@ -2659,6 +2781,8 @@
 		log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
 		log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
 		log.u_bbr.pacing_gain = rack->r_must_retran;
+		log.u_bbr.epoch = rack->r_ctl.current_round;
+		log.u_bbr.lt_epoch = rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -2698,6 +2822,10 @@
 		log.u_bbr.lt_epoch = rack->rc_tp->t_rxtshift;
 		log.u_bbr.lost = rack_rto_min;
 		log.u_bbr.epoch = rack->r_ctl.roundends;
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
+		log.u_bbr.applimited = rack->rc_tp->t_flags2;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -2731,6 +2859,9 @@
 		log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
 		log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
 		log.u_bbr.pacing_gain = rack->r_must_retran;
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -2780,6 +2911,9 @@
 			log.u_bbr.lost = 0;
 		else
 			log.u_bbr.lost = rack->r_ctl.rc_prr_sndcnt;
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -2927,6 +3061,9 @@
 		log.u_bbr.flex4 = where;
 		log.u_bbr.flex7 = 2;
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -2939,7 +3076,7 @@
 static void
 rack_log_rtt_sendmap(struct tcp_rack *rack, uint32_t idx, uint64_t tsv, uint32_t tsecho)
 {
-	if (tcp_bblogging_on(rack->rc_tp)) {
+	if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
@@ -2951,6 +3088,9 @@
 		log.u_bbr.flex7 = 3;
 		log.u_bbr.rttProp = tsv;
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -2979,6 +3119,9 @@
 		log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
 		log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
 		log.u_bbr.pacing_gain = rack->r_must_retran;
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -3051,6 +3194,13 @@
 		log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
 		log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
 		log.u_bbr.pacing_gain = rack->r_must_retran;
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
+		log.u_bbr.epoch = rack->rc_inp->inp_socket->so_snd.sb_hiwat;
+		log.u_bbr.lt_epoch = rack->rc_inp->inp_socket->so_rcv.sb_hiwat;
+		log.u_bbr.lost = rack->rc_tp->t_srtt;
+		log.u_bbr.pkt_epoch = rack->rc_tp->rfbuf_cnt;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -3112,6 +3262,9 @@
 		log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
 		log.u_bbr.pacing_gain = rack->r_must_retran;
 		log.u_bbr.cwnd_gain = rack->rc_has_collapsed;
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -3146,6 +3299,9 @@
 		log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
 		log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
 		log.u_bbr.pacing_gain = rack->r_must_retran;
+		log.u_bbr.bw_inuse = rack->r_ctl.current_round;
+		log.u_bbr.bw_inuse <<= 32;
+		log.u_bbr.bw_inuse |= rack->r_ctl.rc_considered_lost;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -3314,6 +3470,7 @@
 	counter_u64_free(rack_saw_enobuf_hw);
 	counter_u64_free(rack_saw_enetunreach);
 	counter_u64_free(rack_hot_alloc);
+	counter_u64_free(tcp_policer_detected);
 	counter_u64_free(rack_to_alloc);
 	counter_u64_free(rack_to_alloc_hard);
 	counter_u64_free(rack_to_alloc_emerg);
@@ -3475,6 +3632,8 @@
 		rack->r_ctl.rc_num_split_allocs--;
 	}
 	if (rsm == rack->r_ctl.rc_first_appl) {
+		rack->r_ctl.cleared_app_ack_seq = rsm->r_start + (rsm->r_end - rsm->r_start);
+		rack->r_ctl.cleared_app_ack = 1;
 		if (rack->r_ctl.rc_app_limited_cnt == 0)
 			rack->r_ctl.rc_first_appl = NULL;
 		else
@@ -3490,7 +3649,7 @@
 		rack->r_ctl.rc_sacklast = NULL;
 	memset(rsm, 0, sizeof(struct rack_sendmap));
 	/* Make sure we are not going to overrun our count limit of 0xff */
-	if ((rack->rc_free_cnt + 1) > 0xff) {
+	if ((rack->rc_free_cnt + 1) > RACK_FREE_CNT_MAX) {
 		rack_free_trim(rack);
 	}
 	TAILQ_INSERT_HEAD(&rack->r_ctl.rc_free, rsm, r_tnext);
@@ -3806,6 +3965,8 @@
 
 	logged = 0;
 
+	if (rack->rc_skip_timely)
+		return;
 	if (override) {
 		/*
 		 * override is passed when we are
@@ -3976,6 +4137,8 @@
 	uint64_t logvar, logvar2, logvar3;
 	uint32_t logged, new_per, ss_red, ca_red, rec_red, alt, val;
 
+	if (rack->rc_skip_timely)
+		return;
 	if (rack->rc_gp_incr) {
 		/* Turn off increment counting */
 		rack->rc_gp_incr = 0;
@@ -4177,6 +4340,7 @@
 	 */
 	uint32_t segsiz;
 
+	rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
 	if (rack->rc_gp_dyn_mul == 0)
 		return;
 
@@ -4203,7 +4367,6 @@
 		     rack->r_ctl.rc_pace_min_segs);
 	rack->in_probe_rtt = 1;
 	rack->measure_saw_probe_rtt = 1;
-	rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
 	rack->r_ctl.rc_time_probertt_starts = 0;
 	rack->r_ctl.rc_entry_gp_rtt = rack->r_ctl.rc_gp_srtt;
 	if (rack_probertt_use_min_rtt_entry)
@@ -4387,6 +4550,7 @@
 rack_check_probe_rtt(struct tcp_rack *rack, uint32_t us_cts)
 {
 	/* Check in on probe-rtt */
+
 	if (rack->rc_gp_filled == 0) {
 		/* We do not do p-rtt unless we have gp measurements */
 		return;
@@ -4431,7 +4595,10 @@
 			if (calc) {
 				/* Maybe */
 				calc *= rack_per_of_gp_probertt_reduce;
-				rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt - calc;
+				if (calc > rack_per_of_gp_probertt)
+					rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_lowthresh;
+				else
+					rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt - calc;
 				/* Limit it too */
 				if (rack->r_ctl.rack_per_of_gp_probertt < rack_per_of_gp_lowthresh)
 					rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_lowthresh;
@@ -4472,7 +4639,9 @@
 			rack_exit_probertt(rack, us_cts);
 		}
 
-	} else if ((us_cts - rack->r_ctl.rc_lower_rtt_us_cts) >= rack_time_between_probertt) {
+	} else if ((rack->rc_skip_timely == 0) &&
+		   (TSTMP_GT(us_cts, rack->r_ctl.rc_lower_rtt_us_cts)) &&
+		   ((us_cts - rack->r_ctl.rc_lower_rtt_us_cts) >= rack_time_between_probertt)) {
 		/* Go into probertt, its been too long since we went lower */
 		rack_enter_probertt(rack, us_cts);
 	}
@@ -4831,6 +5000,32 @@
 	}
 }
 
+static void
+rack_log_gp_calc(struct tcp_rack *rack, uint32_t add_part, uint32_t sub_part, uint32_t srtt, uint64_t meas_bw, uint64_t utim, uint8_t meth, uint32_t line)
+{
+	if (tcp_bblogging_on(rack->rc_tp)) {
+		union tcp_log_stackspecific log;
+		struct timeval tv;
+
+		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+		log.u_bbr.flex1 = add_part;
+		log.u_bbr.flex2 = sub_part;
+		log.u_bbr.flex3 = rack_wma_divisor;
+		log.u_bbr.flex4 = srtt;
+		log.u_bbr.flex7 = (uint16_t)line;
+		log.u_bbr.flex8 = meth;
+		log.u_bbr.delRate = rack->r_ctl.gp_bw;
+		log.u_bbr.cur_del_rate = meas_bw;
+		log.u_bbr.rttProp = utim;
+		TCP_LOG_EVENTP(rack->rc_tp, NULL,
+		    &rack->rc_inp->inp_socket->so_rcv,
+		    &rack->rc_inp->inp_socket->so_snd,
+		    BBR_LOG_THRESH_CALC, 0,
+		    0, &log, false, &rack->r_ctl.act_rcv_time);
+	}
+}
+
 static void
 rack_do_goodput_measurement(struct tcpcb *tp, struct tcp_rack *rack,
 			    tcp_seq th_ack, int line, uint8_t quality)
@@ -5046,6 +5241,8 @@
 		 * other hand if we get a measurement over 1ms with a
 		 * 10ms rtt we only want to take a much smaller portion.
 		 */
+		uint8_t meth;
+
 		if (rack->r_ctl.num_measurements < 0xff) {
 			rack->r_ctl.num_measurements++;
 		}
@@ -5086,6 +5283,7 @@
 				 */
 				addpart = bytes_ps * utim;
 				addpart /= (srtt * 8);
+				meth = 1;
 			} else {
 				/*
 				 * Don't allow a single measurement
@@ -5098,7 +5296,9 @@
 				 */
 				subpart = rack->r_ctl.gp_bw / 2;
 				addpart = bytes_ps / 2;
+				meth = 2;
 			}
+			rack_log_gp_calc(rack, addpart, subpart, srtt, bytes_ps, utim, meth, __LINE__);
 			resid_bw = rack->r_ctl.gp_bw - subpart;
 			rack->r_ctl.gp_bw = resid_bw + addpart;
 			did_add = 1;
@@ -5116,6 +5316,7 @@
 				subpart /= (srtt * rack_wma_divisor);
 				addpart = bytes_ps * utim;
 				addpart /= (srtt * rack_wma_divisor);
+				meth = 3;
 			} else {
 				/*
 				 * The scaled measurement was long
@@ -5124,6 +5325,7 @@
 				 */
 				subpart = rack->r_ctl.gp_bw / rack_wma_divisor;
 				addpart = bytes_ps / rack_wma_divisor;
+				meth = 4;
 			}
 			if ((rack->measure_saw_probe_rtt == 0) ||
 		            (bytes_ps > rack->r_ctl.gp_bw)) {
@@ -5133,12 +5335,83 @@
 				 * add in.
 				 */
 				did_add = 1;
+				rack_log_gp_calc(rack, addpart, subpart, srtt, bytes_ps, utim, meth, __LINE__);
 				resid_bw = rack->r_ctl.gp_bw - subpart;
 				rack->r_ctl.gp_bw = resid_bw + addpart;
 			}
 		}
 		rack_set_pace_segments(tp, rack, __LINE__, NULL);
 	}
+	/*
+	 * We only watch the growth of the GP during the initial startup
+	 * or first-slowstart that ensues. If we ever needed to watch
+	 * growth of gp outside of that period all we need to do is
+	 * remove the first clause of this if (rc_initial_ss_comp).
+	 */
+	if ((rack->rc_initial_ss_comp == 0) &&
+	    (rack->r_ctl.num_measurements >= RACK_REQ_AVG)) {
+		uint64_t gp_est;
+
+		gp_est = bytes_ps;
+		if (tcp_bblogging_on(rack->rc_tp)) {
+			union tcp_log_stackspecific log;
+			struct timeval tv;
+
+			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+			log.u_bbr.flex1 = rack->r_ctl.current_round;
+			log.u_bbr.flex2 = rack->r_ctl.last_rnd_of_gp_rise;
+			log.u_bbr.delRate = gp_est;
+			log.u_bbr.cur_del_rate = rack->r_ctl.last_gpest;
+			log.u_bbr.flex8 = 41;
+			(void)tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0,
+					    0, &log, false, NULL, __func__, __LINE__,&tv);
+		}
+		if ((rack->r_ctl.num_measurements == RACK_REQ_AVG) ||
+		    (rack->r_ctl.last_gpest == 0)) {
+			/*
+			 * The round we get our measurement averaging going
+			 * is the base round so it always is the source point
+			 * for when we had our first increment. From there on
+			 * we only record the round that had a rise.
+			 */
+			rack->r_ctl.last_rnd_of_gp_rise = rack->r_ctl.current_round;
+			rack->r_ctl.last_gpest = rack->r_ctl.gp_bw;
+		} else if (gp_est >= rack->r_ctl.last_gpest) {
+			/*
+			 * Test to see if its gone up enough
+			 * to set the round count up to now. Note
+			 * that on the seeding of the 4th measurement we
+			 */
+			gp_est *= 1000;
+			gp_est /= rack->r_ctl.last_gpest;
+			if ((uint32_t)gp_est > rack->r_ctl.gp_gain_req) {
+				/*
+				 * We went up enough to record the round.
+				 */
+				if (tcp_bblogging_on(rack->rc_tp)) {
+					union tcp_log_stackspecific log;
+					struct timeval tv;
+
+					memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+					log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+					log.u_bbr.flex1 = rack->r_ctl.current_round;
+					log.u_bbr.flex2 = (uint32_t)gp_est;
+					log.u_bbr.flex3 = rack->r_ctl.gp_gain_req;
+					log.u_bbr.delRate = gp_est;
+					log.u_bbr.cur_del_rate = rack->r_ctl.last_gpest;
+					log.u_bbr.flex8 = 42;
+					(void)tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0,
+							    0, &log, false, NULL, __func__, __LINE__,&tv);
+				}
+				rack->r_ctl.last_rnd_of_gp_rise = rack->r_ctl.current_round;
+				if (rack->r_ctl.use_gp_not_last == 1)
+					rack->r_ctl.last_gpest = rack->r_ctl.gp_bw;
+				else
+					rack->r_ctl.last_gpest = bytes_ps;
+			}
+		}
+	}
 	if ((rack->gp_ready == 0) &&
 	    (rack->r_ctl.num_measurements >= rack->r_ctl.req_measurements)) {
 		/* We have enough measurements now */
@@ -5152,10 +5425,15 @@
 	rack_log_pacing_delay_calc(rack, subpart, addpart, bytes_ps, stim,
 				   rack_get_bw(rack), 22, did_add, NULL, quality);
 	/* We do not update any multipliers if we are in or have seen a probe-rtt */
-	if ((rack->measure_saw_probe_rtt == 0) && rack->rc_gp_rtt_set)
-		rack_update_multiplier(rack, timely_says, bytes_ps,
-				       rack->r_ctl.rc_gp_srtt,
-				       rack->r_ctl.rc_rtt_diff);
+
+	if ((rack->measure_saw_probe_rtt == 0) &&
+	    rack->rc_gp_rtt_set) {
+		if (rack->rc_skip_timely == 0) {
+			rack_update_multiplier(rack, timely_says, bytes_ps,
+					       rack->r_ctl.rc_gp_srtt,
+					       rack->r_ctl.rc_rtt_diff);
+		}
+	}
 	rack_log_pacing_delay_calc(rack, bytes, tim, bytes_ps, stim,
 				   rack_get_bw(rack), 3, line, NULL, quality);
 	rack_log_pacing_delay_calc(rack,
@@ -5179,7 +5457,6 @@
 	rack->rc_gp_saw_ca = 0;
 	rack->rc_gp_saw_ss = 0;
 	rack->rc_dragged_bottom = 0;
-
 	if (quality == RACK_QUALITY_HIGH) {
 		/*
 		 * Gput in the stats world is in kbps where bytes_ps is
@@ -5326,7 +5603,7 @@
  */
 static void
 rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, uint32_t th_ack, uint16_t nsegs,
-    uint16_t type, int32_t recovery)
+    uint16_t type, int32_t post_recovery)
 {
 	uint32_t prior_cwnd, acked;
 	struct tcp_log_buffer *lgb = NULL;
@@ -5335,7 +5612,7 @@
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	tp->t_ccv.nsegs = nsegs;
 	acked = tp->t_ccv.bytes_this_ack = (th_ack - tp->snd_una);
-	if ((recovery) && (rack->r_ctl.rc_early_recovery_segs)) {
+	if ((post_recovery) && (rack->r_ctl.rc_early_recovery_segs)) {
 		uint32_t max;
 
 		max = rack->r_ctl.rc_early_recovery_segs * ctf_fixed_maxseg(tp);
@@ -5348,17 +5625,21 @@
 	    ((int32_t)rack->r_ctl.cwnd_to_use) - tp->snd_wnd);
 #endif
 	if ((th_ack == tp->snd_max) && rack->lt_bw_up) {
-		/* We will ack all, time
-		 * to end any lt_bw_up we
-		 * have running until something
-		 * new is sent.
+		/*
+		 * We will ack all the data, time to end any
+		 * lt_bw_up we have running until something
+		 * new is sent. Note we need to use the actual
+		 * ack_rcv_time which with pacing may be different.
 		 */
-		struct timeval tv;
+		uint64_t tmark;
 
 		rack->r_ctl.lt_bw_bytes += (tp->snd_max - rack->r_ctl.lt_seq);
 		rack->r_ctl.lt_seq = tp->snd_max;
-		(void)tcp_get_usecs(&tv);
-		rack->r_ctl.lt_bw_time += (tcp_tv_to_lusectick(&tv) - rack->r_ctl.lt_timemark);
+		tmark = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
+		if (tmark >= rack->r_ctl.lt_timemark) {
+			rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
+		}
+		rack->r_ctl.lt_timemark = tmark;
 		rack->lt_bw_up = 0;
 	}
 	quality = RACK_QUALITY_NONE;
@@ -5385,7 +5666,7 @@
 		tp->t_bytes_acked = 0;
 	}
 	prior_cwnd = tp->snd_cwnd;
-	if ((recovery == 0) || (rack_max_abc_post_recovery == 0) || rack->r_use_labc_for_rec ||
+	if ((post_recovery == 0) || (rack_max_abc_post_recovery == 0) || rack->r_use_labc_for_rec ||
 	    (rack_client_low_buf && rack->client_bufferlvl &&
 	    (rack->client_bufferlvl < rack_client_low_buf)))
 		labc_to_use = rack->rc_labc;
@@ -5446,6 +5727,14 @@
 	if (rack->r_ctl.rc_rack_largest_cwnd < rack->r_ctl.cwnd_to_use) {
 		rack->r_ctl.rc_rack_largest_cwnd = rack->r_ctl.cwnd_to_use;
 	}
+	if ((rack->rc_initial_ss_comp == 0) &&
+	    (tp->snd_cwnd >= tp->snd_ssthresh)) {
+		/*
+		 * The cwnd has grown beyond ssthresh we have
+		 * entered ca and completed our first Slowstart.
+		 */
+		rack->rc_initial_ss_comp = 1;
+	}
 }
 
 static void
@@ -5467,180 +5756,64 @@
 		rack->r_wanted_output = 1;
 }
 
-static inline void
-rack_set_most_aggr(struct tcp_rack *rack)
-{
-	rack->r_fill_less_agg = 0;
-	/* Once the cwnd as been clamped we don't do fill_cw */
-	if (rack->r_cwnd_was_clamped == 0)
-		rack->rc_pace_to_cwnd = 1;
-	rack->r_pacing_discount = 0;
-}
-
-static inline void
-rack_limit_fillcw(struct tcp_rack *rack)
-{
-	rack->r_fill_less_agg = 1;
-	/* Once the cwnd as been clamped we don't do fill_cw */
-	if (rack->r_cwnd_was_clamped == 0)
-		rack->rc_pace_to_cwnd = 1;
-	rack->r_pacing_discount = 0;
-}
-
-static inline void
-rack_disable_fillcw(struct tcp_rack *rack)
+static inline uint64_t
+rack_get_rxt_per(uint64_t snds,  uint64_t rxts)
 {
-	rack->r_fill_less_agg = 1;
-	rack->rc_pace_to_cwnd = 0;
-	rack->r_pacing_discount = 0;
-}
+	uint64_t rxt_per;
 
-static void
-rack_client_buffer_level_set(struct tcp_rack *rack)
-{
-	/*
-	 * Only if DGP is on do we do anything that
-	 * changes stack behavior. If DGP is off all
-	 * we will do is issue a BB log (if BB logging is
-	 * on) and return.
-	 */
-	if (rack->dgp_on == 0) {
-		rack_log_pacing_delay_calc(rack, 0, rack->client_bufferlvl,
-					   0, 0, 0, 30, __LINE__, NULL, 0);
-		return;
-	}
-	if (IN_RECOVERY(rack->rc_tp->t_flags) && rack->r_ctl.full_dgp_in_rec) {
-		goto set_most_agg;
-	}
-	/*
-	 * We are in DGP so what setting should we
-	 * apply based on where the client is?
-	 */
-	switch(rack->r_ctl.rc_dgp_bl_agg) {
-	default:
-	case DGP_LEVEL0:
-set_most_agg:
-		rack_set_most_aggr(rack);
-		break;
-	case DGP_LEVEL1:
-		if (rack->client_bufferlvl == 4)
-			rack_limit_fillcw(rack);
-		else if (rack->client_bufferlvl == 5)
-			rack_disable_fillcw(rack);
-		else
-			rack_set_most_aggr(rack);
-		break;
-	case DGP_LEVEL2:
-		if (rack->client_bufferlvl == 3)
-			rack_limit_fillcw(rack);
-		else if (rack->client_bufferlvl == 4)
-			rack_disable_fillcw(rack);
-		else if (rack->client_bufferlvl == 5) {
-			rack_disable_fillcw(rack);
-			rack->r_pacing_discount = 1;
-			rack->r_ctl.pacing_discount_amm = 1;
-		} else
-			rack_set_most_aggr(rack);
-		break;
-	case DGP_LEVEL3:
-		if (rack->client_bufferlvl == 2)
-			rack_limit_fillcw(rack);
-		else if (rack->client_bufferlvl == 3)
-			rack_disable_fillcw(rack);
-		else if (rack->client_bufferlvl == 4) {
-			rack_disable_fillcw(rack);
-			rack->r_pacing_discount = 1;
-			rack->r_ctl.pacing_discount_amm = 1;
-		} else if (rack->client_bufferlvl == 5) {
-			rack_disable_fillcw(rack);
-			rack->r_pacing_discount = 1;
-			rack->r_ctl.pacing_discount_amm = 2;
-		} else
-			rack_set_most_aggr(rack);
-		break;
+	if (snds > 0) {
+		rxt_per = rxts * 1000;
+		rxt_per /= snds;
+	} else {
+		/* This is an unlikely path */
+		if (rxts) {
+			/* Its the max it was all re-transmits */
+			rxt_per = 0xffffffffffffffff;
+		} else {
+			rxt_per = 0;
+		}
 	}
-	rack_log_pacing_delay_calc(rack, rack->r_ctl.rc_dgp_bl_agg, rack->client_bufferlvl, 0,
-				   0, 0, 30, __LINE__, NULL, 0);
+	return (rxt_per);
 }
 
 static void
-do_rack_check_for_unclamp(struct tcpcb *tp, struct tcp_rack *rack)
+policer_detection_log(struct tcp_rack *rack, uint32_t flex1, uint32_t flex2, uint32_t flex3, uint32_t flex4, uint8_t flex8)
 {
-	/*
-	 * Can we unclamp. We unclamp if more than
-	 * N rounds have transpired with no loss.
-	 */
-	uint64_t snds, rxts, rxt_per;
-	uint32_t rnds;
-
-	rnds = rack->r_ctl.current_round - rack->r_ctl.last_rnd_rxt_clamped;
-	if ((rack_unclamp_round_thresh > 0) &&
-	    (rnds >= rack_unclamp_round_thresh)) {
-		snds = tp->t_sndbytes - rack->r_ctl.last_sndbytes;
-		KASSERT ((snds > 0), ("rack:%p tp:%p snds:%ju is 0", rack, tp,
-		    (uintmax_t)snds));
-		rxts = tp->t_snd_rxt_bytes - rack->r_ctl.last_snd_rxt_bytes;
-		rxt_per = rxts * 1000;
-		rxt_per /= snds;
-		if ((uint32_t)rxt_per <= rack_unclamp_rxt_thresh) {
-			/* Unclamp */
-			if (tcp_bblogging_on(rack->rc_tp)) {
-				union tcp_log_stackspecific log;
-				struct timeval tv;
-
-				memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-				log.u_bbr.timeStamp = tcp_get_usecs(&tv);
-				log.u_bbr.flex3 = rnds;
-				log.u_bbr.flex4 = rack_unclamp_round_thresh;
-				log.u_bbr.flex5 = (uint32_t)rxt_per;
-				log.u_bbr.flex8 = 6;
-				log.u_bbr.pkt_epoch = rack->r_ctl.rc_pace_max_segs;
-				log.u_bbr.bbr_state = rack->rc_pace_to_cwnd;
-				log.u_bbr.delivered = rack->r_ctl.num_of_clamps_applied;
-				log.u_bbr.applimited = rack->r_ctl.max_clamps;
-				log.u_bbr.epoch = rack->r_ctl.clamp_options;
-				log.u_bbr.cur_del_rate = rxts;
-				log.u_bbr.bw_inuse = rack_get_lt_bw(rack);
-				log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-				log.u_bbr.lt_epoch = (uint32_t)((rack->r_ctl.gp_bw >> 32) & 0x00000000ffffffff);
-				log.u_bbr.pkts_out = (uint32_t)(rack->r_ctl.gp_bw & 0x00000000ffffffff);
-				tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0,
-					      0, &log, false, NULL, NULL, 0, &tv);
-			}
-			rack->r_ctl.num_of_clamps_applied = 0;
-			rack->r_cwnd_was_clamped = 0;
-			rack->excess_rxt_on = 1;
-			if (rack->r_ctl.clamp_options) {
-				/*
-				 * We only allow fillcw to be toggled
-				 * if you are setting a max seg too.
-				 */
-				if (rack->r_ctl.clamp_options & 0x1) {
-					if ((rack->rc_pace_to_cwnd == 0) && (rack->dgp_on == 0)) {
-						/* turn on fill cw  for non-dgp*/
-						rack->rc_pace_to_cwnd = 0;
-					} else if ((rack->dgp_on == 1) && (rack->rc_pace_to_cwnd == 1)) {
-						/* For DGP we want it off */
-						rack->rc_pace_to_cwnd = 1;
-					}
-				}
-			}
-			if (rack->dgp_on) {
-				/* Reset all multipliers to 100.0 so just the measured bw */
-				/* Crash any per boosts down to 100% */
-				rack->r_ctl.rack_per_of_gp_rec = 100;
-				rack->r_ctl.rack_per_of_gp_ss = 100;
-				rack->r_ctl.rack_per_of_gp_ca = 100;
-				/* Set in an upper bound for ss/ca % increase */
-				rack->r_ctl.rack_per_upper_bound_ss = (uint8_t)rack_per_upper_bound_ss;
-				rack->r_ctl.rack_per_upper_bound_ca = (uint8_t)rack_per_upper_bound_ca;
-			}
-		}
+	if (tcp_bblogging_on(rack->rc_tp)) {
+		union tcp_log_stackspecific log;
+		struct timeval tv;
+
+		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+		log.u_bbr.flex1 = flex1;
+		log.u_bbr.flex2 = flex2;
+		log.u_bbr.flex3 = flex3;
+		log.u_bbr.flex4 = flex4;
+		log.u_bbr.flex5 = rack->r_ctl.current_policer_bucket;
+		log.u_bbr.flex6 = rack->r_ctl.policer_bucket_size;
+		log.u_bbr.flex7 = 0;
+		log.u_bbr.flex8 = flex8;
+		log.u_bbr.bw_inuse = rack->r_ctl.policer_bw;
+		log.u_bbr.applimited = rack->r_ctl.current_round;
+		log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
+		log.u_bbr.delivered = (uint32_t)rack->r_ctl.bytes_acked_in_recovery;
+		log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
+		log.u_bbr.delRate = rack->rc_tp->t_snd_rxt_bytes;
+		log.u_bbr.rttProp = rack->r_ctl.gp_bw;
+		log.u_bbr.bbr_state = rack->rc_policer_detected;
+		log.u_bbr.bbr_substate = 0;
+		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
+		log.u_bbr.use_lt_bw = rack->policer_detect_on;
+		log.u_bbr.lt_epoch = 0;
+		log.u_bbr.pkts_out = 0;
+		tcp_log_event(rack->rc_tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
+			      0, &log, false, NULL, NULL, 0, &tv);
 	}
+
 }
 
 static void
-do_rack_excess_rxt(struct tcpcb *tp, struct tcp_rack *rack)
+policer_detection(struct tcpcb *tp, struct tcp_rack *rack, int post_recovery)
 {
 	/*
 	 * Rack excess rxt accounting is turned on. If we
@@ -5648,166 +5821,395 @@
 	 * rounds, then back off the cwnd and ssthresh
 	 * to fit into the long-term b/w.
 	 */
-	uint64_t snds, rxts, rxt_per, lt_bw, bdp;
-	uint32_t rnds, new_cwnd, new_ssthresh, rtt, shared_cwnd_was_enabled = 0;
 
-	/* Is it shut off by 0 rounds? */
-	if (rack_rxt_min_rnds == 0)
-		return;
-	if ((rack->r_ctl.max_clamps > 0) &&
-	    (rack->r_ctl.num_of_clamps_applied >= rack->r_ctl.max_clamps)) {
-		/*
-		 * The idea, if max_clamps is set, is that if clamping it
-		 * N times did not work again, then there is no sense
-		 * clamping it again. The link is just a lossy link and
-		 * our clamps are doing no good. Turn it off so we don't come
-		 * back here again.
-		 */
-		rack->excess_rxt_on = 0;
-		rack->r_cwnd_was_clamped = 0;
-		rack->r_ctl.num_of_clamps_applied = 0;
-		return;
-	}
-	snds = tp->t_sndbytes - rack->r_ctl.last_sndbytes;
-	rxts = tp->t_snd_rxt_bytes - rack->r_ctl.last_snd_rxt_bytes;
-	rnds = rack->r_ctl.current_round - rack->r_ctl.last_rnd_rxt_clamped;
-	/* Has enough rounds progressed for us to re-measure? */
-	if ((rnds >= rack_rxt_min_rnds) &&
-	    (rack->r_ctl.rxt_threshold > 0)){
-		rxt_per = rxts * 1000;
-		rxt_per /= snds;
-		if (rxt_per >= rack->r_ctl.rxt_threshold) {
-			/*
-			 * Action required:
-			 *  We are above our excess retransmit level, lets
-			 *  cut down the cwnd and ssthresh to match the long-term
-			 *  b/w we are getting.
-			 */
-			/* First disable scwnd if enabled */
-#ifdef NETFLIX_SHARED_CWND
-			rack->rack_enable_scwnd = 0;
-			if (rack->r_ctl.rc_scw) {
-				uint32_t limit;
+	uint32_t pkts, mid, med, alt_med, avg, segsiz, tot_retran_pkt_count = 0;
+	uint32_t cnt_of_mape_rxt = 0;
+	uint64_t snds, rxts, rxt_per, tim, del, del_bw;
+	int i;
+	struct timeval tv;
 
-				shared_cwnd_was_enabled = 1;
-				if (rack->r_limit_scw)
-					limit = max(1, rack->r_ctl.rc_lowest_us_rtt);
-				else
-					limit = 0;
-				tcp_shared_cwnd_free_full(tp, rack->r_ctl.rc_scw,
-							  rack->r_ctl.rc_scw_index,
-							  limit);
-				rack->r_ctl.rc_scw = NULL;
-			}
 
-#endif
-			/* Calculate what the cwnd and ssthresh should be */
-			tcp_trace_point(rack->rc_tp, TCP_TP_EXCESS_RXT);
-			lt_bw = rack_get_lt_bw(rack);
-			if (lt_bw == 0) {
-				/*
-				 * No lt_bw, lets chop things to one MSS
-				 * and the ssthresh to the iwnd.
-				 */
-reset_to_iw:
-				new_cwnd = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
-				new_ssthresh = tcp_compute_initwnd(tcp_maxseg(tp));
-			} else {
-				rtt = rack->rc_rack_rtt;
-				if (rtt == 0) {
-					/* If we have no rack_rtt drop to the IW situation */
-					goto reset_to_iw;
-				}
-				bdp = lt_bw * (uint64_t)rtt;
-				bdp /= HPTS_USEC_IN_SEC;
-				new_cwnd = (uint32_t)bdp;
-				new_ssthresh = new_cwnd - 1;
-				if (new_cwnd < ctf_fixed_maxseg(tp)) {
-					/* Rock bottom, goto IW settings  */
-					goto reset_to_iw;
-				}
-			}
-			rack->r_cwnd_was_clamped = 1;
-			rack->r_ctl.num_of_clamps_applied++;
-			/* Reset the counter fromn now */
-			tp->t_bytes_acked = 0;
+	/*
+	 * First is there enough packets delivered during recovery to make
+	 * a determiniation of b/w?
+	 */
+	segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
+	if ((rack->rc_policer_detected == 0) &&
+	    (rack->r_ctl.policer_del_mss > 0) &&
+	    ((uint32_t)rack->r_ctl.policer_del_mss > ((rack->r_ctl.bytes_acked_in_recovery + segsiz - 1)/segsiz))) {
+		/*
+		 * Not enough data sent in recovery for initial detection. Once
+		 * we have deteced a policer we allow less than the threshold (polcer_del_mss)
+		 * amount of data in a recovery to let us fall through and double check
+		 * our policer settings and possibly expand or collapse the bucket size and
+		 * the polcier b/w.
+		 *
+		 * Once you are declared to be policed. this block of code cannot be
+		 * reached, instead blocks further down will re-check the policer detection
+		 * triggers and possibly reset the measurements if somehow we have let the
+		 * policer bucket size grow too large.
+		 */
+		if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
+			policer_detection_log(rack, rack->r_ctl.policer_del_mss,
+					      ((rack->r_ctl.bytes_acked_in_recovery + segsiz - 1)/segsiz),
+					      rack->r_ctl.bytes_acked_in_recovery, segsiz, 18);
+		}
+		return;
+	}
+	tcp_get_usecs(&tv);
+	tim = tcp_tv_to_lusectick(&tv) - rack->r_ctl.time_entered_recovery;
+	del = rack->r_ctl.bytes_acked_in_recovery;
+	if (tim > 0)
+		del_bw = (del * (uint64_t)1000000) / tim;
+	else
+		del_bw = 0;
+	/* B/W compensation? */
+
+	if (rack->r_ctl.pol_bw_comp && ((rack->r_ctl.policer_bw > 0) ||
+					(del_bw > 0))) {
+		/*
+		 * Sanity check now that the data is in. How long does it
+		 * take for us to pace out two of our policer_max_seg's?
+		 *
+		 * If it is longer than the RTT then we are set
+		 * too slow, maybe because of not enough data
+		 * sent during recovery.
+		 */
+		uint64_t lentime, res, srtt, max_delbw, alt_bw;
+
+		srtt = (uint64_t)rack_grab_rtt(tp, rack);
+		if ((tp->t_srtt > 0) && (srtt > tp->t_srtt))
+			srtt = tp->t_srtt;
+		lentime = rack->r_ctl.policer_max_seg * (uint64_t)HPTS_USEC_IN_SEC * 2;
+		if (del_bw > rack->r_ctl.policer_bw) {
+			max_delbw = del_bw;
+		} else {
+			max_delbw = rack->r_ctl.policer_bw;
+		}
+		res = lentime / max_delbw;
+		if ((srtt > 0) && (res > srtt)) {
 			/*
-			 * Now what about options?
-			 * We look at the bottom  8 bits:
-			 * F = fill cw bit (toggle it if set)
-			 * S = Segment bits
-			 * M = set max segment bit
+			 * At this rate we can not get two policer_maxsegs
+			 * out before the ack arrives back.
 			 *
-			 * SSSS SSMF
+			 * Lets at least get it raised up so that
+			 * we can be a bit faster than that if possible.
 			 */
-			if (rack->r_ctl.clamp_options) {
-				if (rack->r_ctl.clamp_options & 0x1) {
-					if ((rack->rc_pace_to_cwnd == 0) && (rack->dgp_on == 0)) {
-						/* turn on fill cw  for non-dgp*/
-						rack->rc_pace_to_cwnd = 1;
-					} else if ((rack->dgp_on == 1) && (rack->rc_pace_to_cwnd == 1)) {
-						/* For DGP we want it off */
-						rack->rc_pace_to_cwnd = 0;
-					}
+			lentime = (rack->r_ctl.policer_max_seg * 2);
+			tim = srtt;
+			alt_bw = (lentime * (uint64_t)HPTS_USEC_IN_SEC) / tim;
+			if (alt_bw > max_delbw) {
+				uint64_t cap_alt_bw;
+
+				cap_alt_bw = (max_delbw + (max_delbw * rack->r_ctl.pol_bw_comp));
+				if ((rack_pol_min_bw > 0) && (cap_alt_bw < rack_pol_min_bw)) {
+					/* We place a min on the cap which defaults to 1Mbps */
+					cap_alt_bw = rack_pol_min_bw;
+				}
+				if (alt_bw <= cap_alt_bw) {
+					/* It should be */
+					del_bw = alt_bw;
+					policer_detection_log(rack,
+							      (uint32_t)tim,
+							      rack->r_ctl.policer_max_seg,
+							      0,
+							      0,
+							      16);
+				} else {
+					/*
+					 * This is an odd case where likely the RTT is very very
+					 * low. And yet it is still being policed. We don't want
+					 * to get more than (rack_policing_do_bw_comp+1) x del-rate
+					 * where del-rate is what we got in recovery for either the
+					 * first Policer Detection(PD) or this PD we are on now.
+					 */
+					del_bw = cap_alt_bw;
+					policer_detection_log(rack,
+							      (uint32_t)tim,
+							      rack->r_ctl.policer_max_seg,
+							      (uint32_t)max_delbw,
+							      (rack->r_ctl.pol_bw_comp + 1),
+							      16);
 				}
 			}
-			if (rack->dgp_on) {
-				/* Reset all multipliers to 100.0 so just the measured bw */
-				/* Crash any per boosts down to 100% */
-				rack->r_ctl.rack_per_of_gp_rec = 100;
-				rack->r_ctl.rack_per_of_gp_ss = 100;
-				rack->r_ctl.rack_per_of_gp_ca = 100;
-				/* Set in an upper bound for ss/ca % increase */
-				rack->r_ctl.rack_per_upper_bound_ss = (uint8_t)rack_clamp_ss_upper;
-				rack->r_ctl.rack_per_upper_bound_ca = (uint8_t)rack_clamp_ca_upper;
-				/* Now move to the lt_bw */
-				rack->r_ctl.gp_bw = lt_bw;
-				rack->rc_gp_filled = 1;
-				rack->r_ctl.num_measurements = RACK_REQ_AVG;
-			}
-			if (tcp_bblogging_on(rack->rc_tp)) {
-				union tcp_log_stackspecific log;
-				struct timeval tv;
-
-				memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-				log.u_bbr.timeStamp = tcp_get_usecs(&tv);
-				log.u_bbr.flex1 = new_cwnd;
-				log.u_bbr.flex2 = new_ssthresh;
-				log.u_bbr.flex3 = rnds;
-				log.u_bbr.flex4 = rack_rxt_min_rnds;
-				log.u_bbr.flex5 = rtt;
-				log.u_bbr.flex6 = shared_cwnd_was_enabled;
-				log.u_bbr.flex8 = 5;
-				log.u_bbr.pkt_epoch = rack->r_ctl.rc_pace_max_segs;
-				log.u_bbr.bbr_state = rack->rc_pace_to_cwnd;
-				log.u_bbr.delivered = rack->r_ctl.num_of_clamps_applied;
-				log.u_bbr.applimited = rack->r_ctl.max_clamps;
-				log.u_bbr.epoch = rack->r_ctl.clamp_options;
-				log.u_bbr.cur_del_rate = rxts;
-				log.u_bbr.delRate = snds;
-				log.u_bbr.rttProp = rack->r_ctl.rxt_threshold;
-				log.u_bbr.bw_inuse = lt_bw;
-				log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-				log.u_bbr.lt_epoch = (uint32_t)((rack->r_ctl.gp_bw >> 32) & 0x00000000ffffffff);
-				log.u_bbr.pkts_out = (uint32_t)(rack->r_ctl.gp_bw & 0x00000000ffffffff);
-				tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0,
-					       0, &log, false, NULL, NULL, 0, &tv);
-			}
-			/* Update our point where we did it */
-			if (rack->r_ctl.already_had_a_excess == 0) {
-				rack->r_ctl.already_had_a_excess = 1;
-				counter_u64_add(rack_rxt_clamps_cwnd_uniq, 1);
+		}
+	}
+	snds = tp->t_sndbytes - rack->r_ctl.last_policer_sndbytes;
+	rxts = tp->t_snd_rxt_bytes - rack->r_ctl.last_policer_snd_rxt_bytes;
+	rxt_per = rack_get_rxt_per(snds,  rxts);
+	/* Figure up the average  and median */
+	for(i = 0; i < RETRAN_CNT_SIZE; i++) {
+		if (rack->r_ctl.rc_cnt_of_retran[i] > 0) {
+			tot_retran_pkt_count += (i + 1) * rack->r_ctl.rc_cnt_of_retran[i];
+			cnt_of_mape_rxt  += rack->r_ctl.rc_cnt_of_retran[i];
+		}
+	}
+	if (cnt_of_mape_rxt)
+		avg = (tot_retran_pkt_count * 10)/cnt_of_mape_rxt;
+	else
+		avg = 0;
+	alt_med = med = 0;
+	mid = tot_retran_pkt_count/2;
+	for(i = 0; i < RETRAN_CNT_SIZE; i++) {
+		pkts = (i + 1) * rack->r_ctl.rc_cnt_of_retran[i];
+		if (mid > pkts) {
+			mid -= pkts;
+			continue;
+		}
+		med = (i + 1);
+		break;
+	}
+	mid = cnt_of_mape_rxt / 2;
+	for(i = 0; i < RETRAN_CNT_SIZE; i++) {
+		if (mid > rack->r_ctl.rc_cnt_of_retran[i]) {
+			mid -= rack->r_ctl.rc_cnt_of_retran[i];
+			continue;
+		}
+		alt_med = (i + 1);
+		break;
+	}
+	if (rack->r_ctl.policer_alt_median) {
+		/* Swap the medians */
+		uint32_t swap;
+
+		swap = med;
+		med = alt_med;
+		alt_med = swap;
+	}
+	if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
+		union tcp_log_stackspecific log;
+		struct timeval tv;
+
+		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+		log.u_bbr.flex1 = avg;
+		log.u_bbr.flex2 = med;
+		log.u_bbr.flex3 = (uint32_t)rxt_per;
+		log.u_bbr.flex4 = rack->r_ctl.policer_avg_threshold;
+		log.u_bbr.flex5 = rack->r_ctl.policer_med_threshold;
+		log.u_bbr.flex6 = rack->r_ctl.policer_rxt_threshold;
+		log.u_bbr.flex7 = rack->r_ctl.policer_alt_median;
+		log.u_bbr.flex8 = 1;
+		log.u_bbr.delivered = rack->r_ctl.policer_bucket_size;
+		log.u_bbr.applimited = rack->r_ctl.current_round;
+		log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
+		log.u_bbr.bw_inuse = del_bw;
+		log.u_bbr.cur_del_rate = rxts;
+		log.u_bbr.delRate = snds;
+		log.u_bbr.rttProp = rack->r_ctl.gp_bw;
+		log.u_bbr.bbr_state = rack->rc_policer_detected;
+		log.u_bbr.bbr_substate = 0;
+		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
+		log.u_bbr.use_lt_bw = rack->policer_detect_on;
+		log.u_bbr.lt_epoch = (uint32_t)tim;
+		log.u_bbr.pkts_out = rack->r_ctl.bytes_acked_in_recovery;
+		tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
+			      0, &log, false, NULL, NULL, 0, &tv);
+	}
+	if (med == RETRAN_CNT_SIZE) {
+		/*
+		 * If the median is the maximum, then what we
+		 * likely have here is a network breakage. Either that
+		 * or we are so unlucky that all of our traffic is being
+		 * dropped and having to be retransmitted the maximum times
+		 * and this just is not how a policer works.
+		 *
+		 * If it is truely a policer eventually we will come
+		 * through and it won't be the maximum.
+		 */
+		return;
+	}
+	/* Has enough rounds progressed for us to re-measure? */
+	if ((rxt_per >= (uint64_t)rack->r_ctl.policer_rxt_threshold) &&
+	    (avg >= rack->r_ctl.policer_avg_threshold) &&
+	    (med >= rack->r_ctl.policer_med_threshold)) {
+		/*
+		 * We hit all thresholds that indicate we are
+		 * being policed. Now we may be doing this from a rack timeout
+		 * which then means the rest of recovery will hopefully go
+		 * smoother as we pace. At the end of recovery we will
+		 * fall back in here and reset the values using the
+		 * results of the entire recovery episode (we could also
+		 * hit this as we exit recovery as well which means only
+		 * one time in here).
+		 *
+		 * This is done explicitly that if we hit the thresholds
+		 * again in a second recovery we overwrite the values. We do
+		 * that because over time, as we pace the policer_bucket_size may
+		 * continue to grow. This then provides more and more times when
+		 * we are not pacing to the policer rate. This lets us compensate
+		 * for when we hit a false positive and those flows continue to
+		 * increase. However if its a real policer we will then get over its
+		 * limit, over time, again and thus end up back here hitting the
+		 * thresholds again.
+		 *
+		 * The alternative to this is to instead whenever we pace due to
+		 * policing in rack_policed_sending we could add the amount len paced to the
+		 * idle_snd_una value (which decreases the amount in last_amount_before_rec
+		 * since that is always [th_ack - idle_snd_una]). This would then prevent
+		 * the polcier_bucket_size from growing in additional recovery episodes
+		 * Which would then mean false  postives would be pretty much stuck
+		 * after things got back to normal (assuming that what caused the
+		 * false positive was a small network outage).
+		 *
+		 */
+		tcp_trace_point(rack->rc_tp, TCP_TP_POLICER_DET);
+		if (rack->rc_policer_detected == 0) {
+			/*
+			 * Increment the stat that tells us we identified
+			 * a policer only once. Note that if we ever allow
+			 * the flag to be cleared (reverted) then we need
+			 * to adjust this to not do multi-counting.
+			 */
+			counter_u64_add(tcp_policer_detected, 1);
+		}
+		rack->r_ctl.last_policer_sndbytes = tp->t_sndbytes;
+		rack->r_ctl.last_policer_snd_rxt_bytes = tp->t_snd_rxt_bytes;
+		rack->r_ctl.policer_bw = del_bw;
+		rack->r_ctl.policer_max_seg = tcp_get_pacing_burst_size_w_divisor(rack->rc_tp,
+										  rack->r_ctl.policer_bw,
+										  min(ctf_fixed_maxseg(rack->rc_tp),
+										      rack->r_ctl.rc_pace_min_segs),
+										  0, NULL,
+										  NULL, rack->r_ctl.pace_len_divisor);
+		/* Now what about the policer bucket size */
+		rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
+		if (rack->r_ctl.policer_bucket_size < rack->r_ctl.policer_max_seg) {
+			/* We must be able to send our max-seg or else chaos ensues */
+			rack->r_ctl.policer_bucket_size = rack->r_ctl.policer_max_seg * 2;
+		}
+		if (rack->rc_policer_detected == 0)
+			rack->r_ctl.current_policer_bucket = 0;
+		if (tcp_bblogging_on(rack->rc_tp)) {
+			union tcp_log_stackspecific log;
+			struct timeval tv;
+
+			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+			log.u_bbr.flex1 = avg;
+			log.u_bbr.flex2 = med;
+			log.u_bbr.flex3 = rxt_per;
+			log.u_bbr.flex4 = rack->r_ctl.policer_avg_threshold;
+			log.u_bbr.flex5 = rack->r_ctl.policer_med_threshold;
+			log.u_bbr.flex6 = rack->r_ctl.policer_rxt_threshold;
+			log.u_bbr.flex7 = rack->r_ctl.policer_alt_median;
+			log.u_bbr.flex8 = 2;
+			log.u_bbr.applimited = rack->r_ctl.current_round;
+			log.u_bbr.bw_inuse = del_bw;
+			log.u_bbr.delivered = rack->r_ctl.policer_bucket_size;
+			log.u_bbr.cur_del_rate = rxts;
+			log.u_bbr.delRate = snds;
+			log.u_bbr.rttProp = rack->r_ctl.gp_bw;
+			log.u_bbr.bbr_state = rack->rc_policer_detected;
+			log.u_bbr.bbr_substate = 0;
+			log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
+			log.u_bbr.use_lt_bw = rack->policer_detect_on;
+			log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
+			log.u_bbr.lt_epoch = (uint32_t)tim;
+			log.u_bbr.pkts_out = rack->r_ctl.bytes_acked_in_recovery;
+			tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
+				      0, &log, false, NULL, NULL, 0, &tv);
+			/*
+			 * Put out an added log, 19, for the sole purpose
+			 * of getting the txt/rxt so that we can benchmark
+			 * in read-bbrlog the ongoing rxt rate after our
+			 * policer invocation in the HYSTART announcments.
+			 */
+			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
+			log.u_bbr.flex1 = alt_med;
+			log.u_bbr.flex8 = 19;
+			log.u_bbr.cur_del_rate = tp->t_sndbytes;
+			log.u_bbr.delRate = tp->t_snd_rxt_bytes;
+			tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
+				      0, &log, false, NULL, NULL, 0, &tv);
+		}
+		/* Turn off any fast output, thats ended */
+		rack->r_fast_output = 0;
+		/* Mark the time for credits */
+		rack->r_ctl.last_sendtime = tcp_get_u64_usecs(NULL);
+		if (rack->r_rr_config < 2) {
+			/*
+			 * We need to be stricter on the RR config so
+			 * the pacing has priority.
+			 */
+			rack->r_rr_config = 2;
+		}
+		policer_detection_log(rack,
+				      rack->r_ctl.idle_snd_una,
+				      rack->r_ctl.ack_for_idle,
+				      0,
+				      (uint32_t)tim,
+				      14);
+		rack->rc_policer_detected = 1;
+	} else if ((rack->rc_policer_detected == 1) &&
+		   (post_recovery == 1)) {
+		/*
+		 * If we are exiting recovery and have already detected
+		 * we need to possibly update the values.
+		 *
+		 * First: Update the idle -> recovery sent value.
+		 */
+		uint32_t srtt;
+
+		if (rack->r_ctl.last_amount_before_rec > rack->r_ctl.policer_bucket_size) {
+			rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
+		}
+		srtt = (uint64_t)rack_grab_rtt(tp, rack);
+		if ((tp->t_srtt > 0) && (srtt > tp->t_srtt))
+			srtt = tp->t_srtt;
+		if ((srtt != 0) &&
+		    (tim < (uint64_t)srtt)) {
+			/*
+			 * Not long enough.
+			 */
+			if (rack_verbose_logging)
+				policer_detection_log(rack,
+						      (uint32_t)tim,
+						      0,
+						      0,
+						      0,
+						      15);
+			return;
+		}
+		/*
+		 * Finally update the b/w if its grown.
+		 */
+		if (del_bw > rack->r_ctl.policer_bw) {
+			rack->r_ctl.policer_bw = del_bw;
+			rack->r_ctl.policer_max_seg = tcp_get_pacing_burst_size_w_divisor(rack->rc_tp,
+											  rack->r_ctl.policer_bw,
+											  min(ctf_fixed_maxseg(rack->rc_tp),
+											      rack->r_ctl.rc_pace_min_segs),
+											  0, NULL,
+											  NULL, rack->r_ctl.pace_len_divisor);
+			if (rack->r_ctl.policer_bucket_size < rack->r_ctl.policer_max_seg) {
+				/* We must be able to send our max-seg or else chaos ensues */
+				rack->r_ctl.policer_bucket_size = rack->r_ctl.policer_max_seg * 2;
 			}
-			counter_u64_add(rack_rxt_clamps_cwnd, 1);
-			rack->r_ctl.last_sndbytes = tp->t_sndbytes;
-			rack->r_ctl.last_snd_rxt_bytes = tp->t_snd_rxt_bytes;
-			rack->r_ctl.last_rnd_rxt_clamped = rack->r_ctl.current_round;
-			if (new_cwnd < tp->snd_cwnd)
-				tp->snd_cwnd = new_cwnd;
-			if (new_ssthresh < tp->snd_ssthresh)
-				tp->snd_ssthresh = new_ssthresh;
 		}
+		policer_detection_log(rack,
+				      rack->r_ctl.idle_snd_una,
+				      rack->r_ctl.ack_for_idle,
+				      0,
+				      (uint32_t)tim,
+				      3);
+	}
+}
+
+static void
+rack_exit_recovery(struct tcpcb *tp, struct tcp_rack *rack, int how)
+{
+	/* now check with the policer if on */
+	if (rack->policer_detect_on == 1) {
+		policer_detection(tp, rack, 1);
 	}
+	/*
+	 * Now exit recovery, note we must do the idle set after the policer_detection
+	 * to get the amount acked prior to recovery correct.
+	 */
+	rack->r_ctl.idle_snd_una = tp->snd_una;
+	EXIT_RECOVERY(tp->t_flags);
 }
 
 static void
@@ -5882,9 +6284,12 @@
 		}
 		rack_log_dsack_event(rack, 1, __LINE__, 0, 0);
 	}
-	EXIT_RECOVERY(tp->t_flags);
-	if (rack->r_ctl.full_dgp_in_rec)
-		rack_client_buffer_level_set(rack);
+	if (rack->rto_from_rec == 1) {
+		rack->rto_from_rec = 0;
+		if (rack->r_ctl.rto_ssthresh > tp->snd_ssthresh)
+			tp->snd_ssthresh = rack->r_ctl.rto_ssthresh;
+	}
+	rack_exit_recovery(tp, rack, 1);
 }
 
 static void
@@ -5909,12 +6314,69 @@
 		tp->t_flags &= ~TF_WASFRECOVERY;
 		tp->t_flags &= ~TF_WASCRECOVERY;
 		if (!IN_FASTRECOVERY(tp->t_flags)) {
-			if (rack->dgp_on && rack->r_cwnd_was_clamped) {
-				/* Reset the gains so that on exit we will be softer longer */
-				rack->r_ctl.rack_per_of_gp_rec = 100;
-				rack->r_ctl.rack_per_of_gp_ss = 98;
-				rack->r_ctl.rack_per_of_gp_ca = 98;
+			struct rack_sendmap *rsm;
+			struct timeval tv;
+			uint32_t segsiz;
+
+			/* Check if this is the end of the initial Start-up i.e. initial slow-start */
+			if (rack->rc_initial_ss_comp == 0) {
+				/* Yep it is the end of the initial slowstart */
+				rack->rc_initial_ss_comp = 1;
+			}
+			microuptime(&tv);
+			rack->r_ctl.time_entered_recovery = tcp_tv_to_lusectick(&tv);
+			if (SEQ_GEQ(ack, tp->snd_una)) {
+				/*
+				 * The ack is above snd_una. Lets see
+				 * if we can establish a postive distance from
+				 * our idle mark.
+				 */
+				rack->r_ctl.ack_for_idle = ack;
+				if (SEQ_GT(ack, rack->r_ctl.idle_snd_una)) {
+					rack->r_ctl.last_amount_before_rec = ack - rack->r_ctl.idle_snd_una;
+				} else {
+					/* No data thru yet */
+					rack->r_ctl.last_amount_before_rec = 0;
+				}
+			} else if (SEQ_GT(tp->snd_una, rack->r_ctl.idle_snd_una)) {
+				/*
+				 * The ack is out of order and behind the snd_una. It may
+				 * have contained SACK information which we processed else
+				 * we would have rejected it.
+				 */
+				rack->r_ctl.ack_for_idle = tp->snd_una;
+				rack->r_ctl.last_amount_before_rec = tp->snd_una - rack->r_ctl.idle_snd_una;
+			} else {
+				rack->r_ctl.ack_for_idle = ack;
+				rack->r_ctl.last_amount_before_rec = 0;
+			}
+			if (rack->rc_policer_detected) {
+				/*
+				 * If we are being policed and we have a loss, it
+				 * means our bucket is now empty. This can happen
+				 * where some other flow on the same host sends
+				 * that this connection is not aware of.
+				 */
+				rack->r_ctl.current_policer_bucket = 0;
+				if (rack_verbose_logging)
+					policer_detection_log(rack, rack->r_ctl.last_amount_before_rec, 0, 0, 0, 4);
+				if (rack->r_ctl.last_amount_before_rec > rack->r_ctl.policer_bucket_size) {
+					rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
+				}
+			}
+			memset(rack->r_ctl.rc_cnt_of_retran, 0, sizeof(rack->r_ctl.rc_cnt_of_retran));
+			segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
+			TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) {
+				/*
+				 * Go through the outstanding and re-peg
+				 * any that should have been left in the
+				 * retransmit list (on a double recovery).
+				 */
+				if (rsm->r_act_rxt_cnt > 0) {
+					rack_peg_rxt(rack, rsm, segsiz);
+				}
 			}
+			rack->r_ctl.bytes_acked_in_recovery = 0;
 			rack->r_ctl.rc_prr_delivered = 0;
 			rack->r_ctl.rc_prr_out = 0;
 			rack->r_fast_output = 0;
@@ -5947,15 +6409,19 @@
 		tp->t_dupacks = 0;
 		tp->t_bytes_acked = 0;
 		rack->r_fast_output = 0;
-		EXIT_RECOVERY(tp->t_flags);
-		if (tp->t_rxtshift == 1) {
+		if (IN_RECOVERY(tp->t_flags))
+			rack_exit_recovery(tp, rack, 2);
+		rack->r_ctl.bytes_acked_in_recovery = 0;
+		rack->r_ctl.time_entered_recovery = 0;
+		orig_cwnd = tp->snd_cwnd;
+		rack_log_to_prr(rack, 16, orig_cwnd, line);
+		if (CC_ALGO(tp)->cong_signal == NULL) {
+			/* TSNH */
 			tp->snd_ssthresh = max(2,
 			    min(tp->snd_wnd, rack->r_ctl.cwnd_to_use) / 2 /
 			    ctf_fixed_maxseg(tp)) * ctf_fixed_maxseg(tp);
+			tp->snd_cwnd = ctf_fixed_maxseg(tp);
 		}
-		orig_cwnd = tp->snd_cwnd;
-		tp->snd_cwnd = ctf_fixed_maxseg(tp);
-		rack_log_to_prr(rack, 16, orig_cwnd, line);
 		if (tp->t_flags2 & TF2_ECN_PERMIT)
 			tp->t_flags2 |= TF2_ECN_SND_CWR;
 		break;
@@ -5984,8 +6450,6 @@
 	}
 	if ((in_rec_at_entry == 0) && IN_RECOVERY(tp->t_flags)) {
 		rack_log_to_prr(rack, 15, cwnd_enter, line);
-		if (rack->r_ctl.full_dgp_in_rec)
-			rack_client_buffer_level_set(rack);
 		rack->r_ctl.dsack_byte_cnt = 0;
 		rack->r_ctl.retran_during_recovery = 0;
 		rack->r_ctl.rc_cwnd_at_erec = cwnd_enter;
@@ -6078,7 +6542,7 @@
 }
 
 static uint32_t
-rack_calc_thresh_rack(struct tcp_rack *rack, uint32_t srtt, uint32_t cts)
+rack_calc_thresh_rack(struct tcp_rack *rack, uint32_t srtt, uint32_t cts, int line, int log_allowed)
 {
 	int32_t lro;
 	uint32_t thresh;
@@ -6149,7 +6613,8 @@
 		 * have seen reordering <and> we have a DSACK count.
 		 */
 		thresh += rack->r_ctl.num_dsack * (srtt >> 2);
-		rack_log_dsack_event(rack, 4, __LINE__, srtt, thresh);
+		if (log_allowed)
+			rack_log_dsack_event(rack, 4, line, srtt, thresh);
 	}
 	/* SRTT * 2 is the ceiling */
 	if (thresh > (srtt * 2)) {
@@ -6159,7 +6624,8 @@
 	if (thresh > rack_rto_max) {
 		thresh = rack_rto_max;
 	}
-	rack_log_dsack_event(rack, 6, __LINE__, srtt, thresh);
+	if (log_allowed)
+		rack_log_dsack_event(rack, 6, line,  srtt, thresh);
 	return (thresh);
 }
 
@@ -6294,7 +6760,7 @@
 	}
 	idx = rsm->r_rtr_cnt - 1;
 	srtt = rack_grab_rtt(tp, rack);
-	thresh = rack_calc_thresh_rack(rack, srtt, tsused);
+	thresh = rack_calc_thresh_rack(rack, srtt, tsused, __LINE__, 1);
 	if (TSTMP_LT(tsused, ((uint32_t)rsm->r_tim_lastsent[idx]))) {
 		return (NULL);
 	}
@@ -6456,7 +6922,7 @@
 			goto activate_tlp;
 		}
 		srtt = rack_grab_rtt(tp, rack);
-		thresh = rack_calc_thresh_rack(rack, srtt, cts);
+		thresh = rack_calc_thresh_rack(rack, srtt, cts, __LINE__, 1);
 		idx = rsm->r_rtr_cnt - 1;
 		exp = ((uint32_t)rsm->r_tim_lastsent[idx]) + thresh;
 		if (SEQ_GEQ(exp, cts)) {
@@ -6563,8 +7029,6 @@
 static void
 rack_enter_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, tcp_seq snd_una)
 {
-	struct timeval tv;
-
 	if (rack->rc_in_persist == 0) {
 		if (tp->t_flags & TF_GPUTINPROG) {
 			/*
@@ -6580,21 +7044,23 @@
 			rack->rack_scwnd_is_idle = 1;
 		}
 #endif
-		rack->r_ctl.rc_went_idle_time = tcp_get_usecs(&tv);
+		rack->r_ctl.rc_went_idle_time = cts;
+		if (rack->r_ctl.rc_went_idle_time == 0)
+			rack->r_ctl.rc_went_idle_time = 1;
 		if (rack->lt_bw_up) {
 			/* Suspend our LT BW measurement */
 			uint64_t tmark;
 
 			rack->r_ctl.lt_bw_bytes += (snd_una - rack->r_ctl.lt_seq);
 			rack->r_ctl.lt_seq = snd_una;
-			tmark = tcp_tv_to_lusectick(&tv);
-			rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
+			tmark = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
+			if (tmark >= rack->r_ctl.lt_timemark) {
+				rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
+			}
 			rack->r_ctl.lt_timemark = tmark;
 			rack->lt_bw_up = 0;
 			rack->r_persist_lt_bw_off = 1;
 		}
-		if (rack->r_ctl.rc_went_idle_time == 0)
-			rack->r_ctl.rc_went_idle_time = 1;
 		rack_timer_cancel(tp, rack, cts, __LINE__);
 		rack->r_ctl.persist_lost_ends = 0;
 		rack->probe_not_answered = 0;
@@ -6609,9 +7075,6 @@
 static void
 rack_exit_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
-	struct timeval tv;
-	uint32_t t_time;
-
 	if (tcp_in_hpts(rack->rc_tp)) {
 		tcp_hpts_remove(rack->rc_tp);
 		rack->r_ctl.rc_hpts_flags = 0;
@@ -6622,7 +7085,6 @@
 		rack->rack_scwnd_is_idle = 0;
 	}
 #endif
-	t_time = tcp_get_usecs(&tv);
 	if (rack->rc_gp_dyn_mul &&
 	    (rack->use_fixed_rate == 0) &&
 	    (rack->rc_always_pace)) {
@@ -6632,7 +7094,7 @@
 		 */
 		uint32_t time_idle, idle_min;
 
-		time_idle = t_time - rack->r_ctl.rc_went_idle_time;
+		time_idle = cts - rack->r_ctl.rc_went_idle_time;
 		idle_min = rack_min_probertt_hold;
 		if (rack_probertt_gpsrtt_cnt_div) {
 			uint64_t extra;
@@ -6658,10 +7120,11 @@
 	}
 	if (rack->r_persist_lt_bw_off) {
 		/* Continue where we left off */
-		rack->r_ctl.lt_timemark = tcp_tv_to_lusectick(&tv);
+		rack->r_ctl.lt_timemark = tcp_get_u64_usecs(NULL);
 		rack->lt_bw_up = 1;
 		rack->r_persist_lt_bw_off = 0;
 	}
+	rack->r_ctl.idle_snd_una = tp->snd_una;
 	rack->rc_in_persist = 0;
 	rack->r_ctl.rc_went_idle_time = 0;
 	tp->t_rxtshift = 0;
@@ -6734,7 +7197,7 @@
 }
 
 static void
-rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
+rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
       int32_t slot, uint32_t tot_len_this_send, int sup_rack)
 {
 	struct hpts_diag diag;
@@ -6778,7 +7241,8 @@
 		rack->r_early = 0;
 		rack->r_ctl.rc_agg_early = 0;
 	}
-	if (rack->r_late) {
+	if ((rack->r_late) &&
+	    ((rack->r_use_hpts_min == 0) || (rack->dgp_on == 0))) {
 		/*
 		 * This is harder, we can
 		 * compensate some but it
@@ -6812,6 +7276,32 @@
 			if (rack->r_ctl.rc_agg_delayed == 0)
 				rack->r_late = 0;
 		}
+	} else if (rack->r_late) {
+		/* r_use_hpts_min is on and so is DGP */
+		uint32_t max_red;
+
+		max_red = (slot * rack->r_ctl.max_reduction) / 100;
+		if (max_red >= rack->r_ctl.rc_agg_delayed) {
+			slot -= rack->r_ctl.rc_agg_delayed;
+			rack->r_ctl.rc_agg_delayed = 0;
+		} else {
+			slot -= max_red;
+			rack->r_ctl.rc_agg_delayed -= max_red;
+		}
+	}
+	if ((rack->r_use_hpts_min == 1) &&
+	    (slot > 0) &&
+	    (rack->dgp_on == 1)) {
+		/*
+		 * We are enforcing a min pacing timer
+		 * based on our hpts min timeout.
+		 */
+		uint32_t min;
+
+		min = get_hpts_min_sleep_time();
+		if (min > slot) {
+			slot = min;
+		}
 	}
 	hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
 #ifdef TCP_SAD_DETECTION
@@ -7041,6 +7531,34 @@
 		rack_log_type_bbrsnd(rack, tot_len_this_send, slot, us_cts, &tv, __LINE__);
 }
 
+static void
+rack_mark_lost(struct tcpcb *tp,
+    struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t cts)
+{
+	struct rack_sendmap *nrsm;
+	uint32_t thresh,  exp;
+
+	thresh = rack_calc_thresh_rack(rack, rack_grab_rtt(tp, rack), cts, __LINE__, 0);
+	nrsm = rsm;
+	TAILQ_FOREACH_FROM(nrsm, &rack->r_ctl.rc_tmap, r_tnext) {
+		if ((nrsm->r_flags & RACK_SACK_PASSED) == 0) {
+			/* Got up to all that were marked sack-passed */
+			break;
+		}
+		if ((nrsm->r_flags & RACK_WAS_LOST) == 0) {
+			exp = ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]) + thresh;
+			if (TSTMP_LT(exp, cts) || (exp == cts)) {
+				/* We now consider it lost */
+				nrsm->r_flags |= RACK_WAS_LOST;
+				rack->r_ctl.rc_considered_lost += nrsm->r_end - nrsm->r_start;
+			} else {
+				/* Past here it won't be lost so stop */
+				break;
+			}
+		}
+	}
+}
+
 /*
  * RACK Timer, here we simply do logging and house keeping.
  * the normal rack_output() function will call the
@@ -7067,6 +7585,8 @@
 	rsm = rack_check_recovery_mode(tp, cts);
 	rack_log_to_event(rack, RACK_TO_FRM_RACK, rsm);
 	if (rsm) {
+		/* We need to stroke any lost that are now declared as lost */
+		rack_mark_lost(tp, rack, rsm, cts);
 		rack->r_ctl.rc_resend = rsm;
 		rack->r_timer_override = 1;
 		if (rack->use_rack_rr) {
@@ -7088,6 +7608,16 @@
 				      0, 0, 0);
 		return (1);
 	}
+	if ((rack->policer_detect_on == 1) &&
+	    (rack->rc_policer_detected == 0)) {
+		/*
+		 * We do this early if we have not
+		 * deteceted to attempt to detect
+		 * quicker. Normally we want to do this
+		 * as recovery exits (and we will again).
+		 */
+		policer_detection(tp, rack, 0);
+	}
 	return (0);
 }
 
@@ -7189,13 +7719,14 @@
 	nrsm->r_start = start;
 	nrsm->r_end = rsm->r_end;
 	nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
+	nrsm->r_act_rxt_cnt = rsm->r_act_rxt_cnt;
 	nrsm->r_flags = rsm->r_flags;
 	nrsm->r_dupack = rsm->r_dupack;
 	nrsm->r_no_rtt_allowed = rsm->r_no_rtt_allowed;
 	nrsm->r_rtr_bytes = 0;
 	nrsm->r_fas = rsm->r_fas;
 	nrsm->r_bas = rsm->r_bas;
-	rsm->r_end = nrsm->r_start;
+	tqhash_update_end(rack->r_ctl.tqh, rsm, nrsm->r_start);
 	nrsm->r_just_ret = rsm->r_just_ret;
 	for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) {
 		nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx];
@@ -7242,7 +7773,7 @@
 	 */
 	rack_log_map_chg(rack->rc_tp, rack, NULL,
 			 l_rsm, r_rsm, MAP_MERGE, r_rsm->r_end, __LINE__);
-	l_rsm->r_end = r_rsm->r_end;
+	tqhash_update_end(rack->r_ctl.tqh, l_rsm, r_rsm->r_end);
 	if (l_rsm->r_dupack < r_rsm->r_dupack)
 		l_rsm->r_dupack = r_rsm->r_dupack;
 	if (r_rsm->r_rtr_bytes)
@@ -7344,6 +7875,7 @@
 	 */
 	rack_log_to_event(rack, RACK_TO_FRM_TLP, NULL);
 	rack->r_ctl.retran_during_recovery = 0;
+	rack->r_might_revert = 0;
 	rack->r_ctl.dsack_byte_cnt = 0;
 	counter_u64_add(rack_tlp_tot, 1);
 	if (rack->r_state && (rack->r_state != tp->t_state))
@@ -7517,6 +8049,32 @@
 	return (0);
 }
 
+static inline int
+rack_send_ack_challange(struct tcp_rack *rack)
+{
+	struct tcptemp *t_template;
+
+	t_template = tcpip_maketemplate(rack->rc_inp);
+	if (t_template) {
+		if (rack->forced_ack == 0) {
+			rack->forced_ack = 1;
+			rack->r_ctl.forced_ack_ts = tcp_get_usecs(NULL);
+		} else {
+			rack->probe_not_answered = 1;
+		}
+		tcp_respond(rack->rc_tp, t_template->tt_ipgen,
+			    &t_template->tt_t, (struct mbuf *)NULL,
+			    rack->rc_tp->rcv_nxt, rack->rc_tp->snd_una - 1, 0);
+		free(t_template, M_TEMP);
+		/* This does send an ack so kill any D-ack timer */
+		if (rack->rc_tp->t_flags & TF_DELACK)
+			rack->rc_tp->t_flags &= ~TF_DELACK;
+		return(1);
+	} else
+		return (0);
+
+}
+
 /*
  * Persists timer, here we simply send the
  * same thing as a keepalive will.
@@ -7528,7 +8086,6 @@
 static int
 rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
-	struct tcptemp *t_template;
 	int32_t retval = 1;
 
 	if (rack->rc_in_persist == 0)
@@ -7575,26 +8132,14 @@
 		retval = -ETIMEDOUT;	/* tcp_drop() */
 		goto out;
 	}
-	t_template = tcpip_maketemplate(rack->rc_inp);
-	if (t_template) {
+	if (rack_send_ack_challange(rack)) {
 		/* only set it if we were answered */
-		if (rack->forced_ack == 0) {
-			rack->forced_ack = 1;
-			rack->r_ctl.forced_ack_ts = tcp_get_usecs(NULL);
-		} else {
-			rack->probe_not_answered = 1;
+		if (rack->probe_not_answered) {
 			counter_u64_add(rack_persists_loss, 1);
 			rack->r_ctl.persist_lost_ends++;
 		}
 		counter_u64_add(rack_persists_sends, 1);
 		counter_u64_add(rack_out_size[TCP_MSS_ACCT_PERSIST], 1);
-		tcp_respond(tp, t_template->tt_ipgen,
-			    &t_template->tt_t, (struct mbuf *)NULL,
-			    tp->rcv_nxt, tp->snd_una - 1, 0);
-		/* This sends an ack */
-		if (tp->t_flags & TF_DELACK)
-			tp->t_flags &= ~TF_DELACK;
-		free(t_template, M_TEMP);
 	}
 	if (tp->t_rxtshift < V_tcp_retries)
 		tp->t_rxtshift++;
@@ -7614,7 +8159,6 @@
 static int
 rack_timeout_keepalive(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 {
-	struct tcptemp *t_template;
 	struct inpcb *inp = tptoinpcb(tp);
 
 	rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_KEEP;
@@ -7641,19 +8185,7 @@
 		 * respond.
 		 */
 		KMOD_TCPSTAT_INC(tcps_keepprobe);
-		t_template = tcpip_maketemplate(inp);
-		if (t_template) {
-			if (rack->forced_ack == 0) {
-				rack->forced_ack = 1;
-				rack->r_ctl.forced_ack_ts = tcp_get_usecs(NULL);
-			} else {
-				rack->probe_not_answered = 1;
-			}
-			tcp_respond(tp, t_template->tt_ipgen,
-			    &t_template->tt_t, (struct mbuf *)NULL,
-			    tp->rcv_nxt, tp->snd_una - 1, 0);
-			free(t_template, M_TEMP);
-		}
+		rack_send_ack_challange(rack);
 	}
 	rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
 	return (1);
@@ -7680,8 +8212,26 @@
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	rack_timer_cancel(tp, rack, tcp_get_usecs(NULL), __LINE__);
 	rack_log_to_event(rack, RACK_TO_FRM_TMR, NULL);
+	rack->r_timer_override = 1;
+	rack->r_ctl.rc_snd_max_at_rto = tp->snd_max;
+	rack->r_ctl.rc_last_timeout_snduna = tp->snd_una;
+	rack->r_late = 0;
+	rack->r_early = 0;
+	rack->r_ctl.rc_agg_delayed = 0;
+	rack->r_ctl.rc_agg_early = 0;
 	if (rack->r_state && (rack->r_state != tp->t_state))
 		rack_set_state(tp, rack);
+	if (tp->t_rxtshift <= rack_rxt_scoreboard_clear_thresh) {
+		/*
+		 * We do not clear the scoreboard until we have had
+		 * more than rack_rxt_scoreboard_clear_thresh time-outs.
+		 */
+		rack->r_ctl.rc_resend = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
+		if (rack->r_ctl.rc_resend != NULL)
+			rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
+
+		return;
+	}
 	/*
 	 * Ideally we would like to be able to
 	 * mark SACK-PASS on anything not acked here.
@@ -7714,27 +8264,26 @@
 		trsm = rsm;
 		if (rsm->r_flags & RACK_ACKED)
 			rsm->r_flags |= RACK_WAS_ACKED;
-		rsm->r_flags &= ~(RACK_ACKED | RACK_SACK_PASSED | RACK_WAS_SACKPASS | RACK_RWND_COLLAPSED);
+		rsm->r_flags &= ~(RACK_ACKED | RACK_SACK_PASSED | RACK_WAS_SACKPASS | RACK_RWND_COLLAPSED | RACK_WAS_LOST);
 		rsm->r_flags |= RACK_MUST_RXT;
 	}
+	/* zero the lost since it's all gone */
+	rack->r_ctl.rc_considered_lost = 0;
 	/* Clear the count (we just un-acked them) */
-	rack->r_ctl.rc_last_timeout_snduna = tp->snd_una;
 	rack->r_ctl.rc_sacked = 0;
 	rack->r_ctl.rc_sacklast = NULL;
-	rack->r_ctl.rc_agg_delayed = 0;
-	rack->r_early = 0;
-	rack->r_ctl.rc_agg_early = 0;
-	rack->r_late = 0;
 	/* Clear the tlp rtx mark */
 	rack->r_ctl.rc_resend = tqhash_min(rack->r_ctl.tqh);
 	if (rack->r_ctl.rc_resend != NULL)
 		rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
 	rack->r_ctl.rc_prr_sndcnt = 0;
 	rack_log_to_prr(rack, 6, 0, __LINE__);
-	rack->r_timer_override = 1;
+	rack->r_ctl.rc_resend = tqhash_min(rack->r_ctl.tqh);
+	if (rack->r_ctl.rc_resend != NULL)
+		rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
 	if ((((tp->t_flags & TF_SACK_PERMIT) == 0)
 #ifdef TCP_SAD_DETECTION
-	    || (rack->sack_attack_disable != 0)
+	     || (rack->sack_attack_disable != 0)
 #endif
 		    ) && ((tp->t_flags & TF_SENTFIN) == 0)) {
 		/*
@@ -7744,9 +8293,8 @@
 		 */
 		rack->r_must_retran = 1;
 		rack->r_ctl.rc_out_at_rto = ctf_flight_size(rack->rc_tp,
-						rack->r_ctl.rc_sacked);
+							    rack->r_ctl.rc_sacked);
 	}
-	rack->r_ctl.rc_snd_max_at_rto = tp->snd_max;
 }
 
 static void
@@ -7829,6 +8377,17 @@
 	rack->r_ctl.retran_during_recovery = 0;
 	rack->rc_ack_required = 1;
 	rack->r_ctl.dsack_byte_cnt = 0;
+	if (IN_RECOVERY(tp->t_flags) &&
+	    (rack->rto_from_rec == 0)) {
+		/*
+		 * Mark that we had a rto while in recovery
+		 * and save the ssthresh so if we go back
+		 * into recovery we will have a chance
+		 * to slowstart back to the level.
+		 */
+		rack->rto_from_rec = 1;
+		rack->r_ctl.rto_ssthresh = tp->snd_ssthresh;
+	}
 	if (IN_FASTRECOVERY(tp->t_flags))
 		tp->t_flags |= TF_WASFRECOVERY;
 	else
@@ -7877,7 +8436,6 @@
 	 * retransmit interval.  Back off to a longer retransmit interval
 	 * and retransmit one segment.
 	 */
-	rack_remxt_tmr(tp);
 	if ((rack->r_ctl.rc_resend == NULL) ||
 	    ((rack->r_ctl.rc_resend->r_flags & RACK_RWND_COLLAPSED) == 0)) {
 		/*
@@ -7888,6 +8446,7 @@
 		 */
 		tp->t_rxtshift++;
 	}
+	rack_remxt_tmr(tp);
 	if (tp->t_rxtshift > V_tcp_retries) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
 drop_it:
@@ -8240,23 +8799,124 @@
 	}
 }
 
+/*
+ * We maintain an array fo 16 (RETRAN_CNT_SIZE) entries. This
+ * array is zeroed at the start of recovery. Each time a segment
+ * is retransmitted, we translate that into a number of packets
+ * (based on segsiz) and based on how many times its been retransmitted
+ * increment by the number of packets the counter that represents
+ * retansmitted N times. Index 0 is retransmitted 1 time, index 1
+ * is retransmitted 2 times etc.
+ *
+ * So for example when we send a 4344 byte transmission with a 1448
+ * byte segsize, and its the third time we have retransmitted this
+ * segment, we would add to the rc_cnt_of_retran[2] the value of
+ * 3. That represents 3 MSS were retransmitted 3 times (index is
+ * the number of times retranmitted minus 1).
+ */
+static void
+rack_peg_rxt(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t segsiz)
+{
+	int idx;
+	uint32_t peg;
+
+	peg = ((rsm->r_end - rsm->r_start) + segsiz) - 1;
+	peg /= segsiz;
+	idx = rsm->r_act_rxt_cnt - 1;
+	if (idx >= RETRAN_CNT_SIZE)
+		idx = RETRAN_CNT_SIZE - 1;
+	/* Max of a uint16_t retransmits in a bucket */
+	if ((rack->r_ctl.rc_cnt_of_retran[idx] + peg) < 0xffff)
+		rack->r_ctl.rc_cnt_of_retran[idx] += peg;
+	else
+		rack->r_ctl.rc_cnt_of_retran[idx] = 0xffff;
+}
+
+/*
+ * We maintain an array fo 16 (RETRAN_CNT_SIZE) entries. This
+ * array is zeroed at the start of recovery. Each time a segment
+ * is retransmitted, we translate that into a number of packets
+ * (based on segsiz) and based on how many times its been retransmitted
+ * increment by the number of packets the counter that represents
+ * retansmitted N times. Index 0 is retransmitted 1 time, index 1
+ * is retransmitted 2 times etc.
+ *
+ * The rack_unpeg_rxt is used when we go to retransmit a segment
+ * again. Basically if the segment had previously been retransmitted
+ * say 3 times (as our previous example illustrated in the comment
+ * above rack_peg_rxt() prior to calling that and incrementing
+ * r_ack_rxt_cnt we would have called rack_unpeg_rxt() that would
+ * subtract back the previous add from its last rxt (in this
+ * example r_act_cnt would have been 2 for 2 retransmissions. So
+ * we would have subtracted 3 from rc_cnt_of_reetran[1] to remove
+ * those 3 segments. You will see this in the rack_update_rsm()
+ * below where we do:
+ *	if (rsm->r_act_rxt_cnt > 0) {
+ *		rack_unpeg_rxt(rack, rsm, segsiz);
+ *	}
+ *	rsm->r_act_rxt_cnt++;
+ *	rack_peg_rxt(rack, rsm, segsiz);
+ *
+ * This effectively moves the count from rc_cnt_of_retran[1] to
+ * rc_cnt_of_retran[2].
+ */
+static void
+rack_unpeg_rxt(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t segsiz)
+{
+	int idx;
+	uint32_t peg;
+
+	idx = rsm->r_act_rxt_cnt - 1;
+	if (idx >= RETRAN_CNT_SIZE)
+		idx = RETRAN_CNT_SIZE - 1;
+	peg = ((rsm->r_end - rsm->r_start) + segsiz) - 1;
+	peg /= segsiz;
+	if (peg < rack->r_ctl.rc_cnt_of_retran[idx])
+		rack->r_ctl.rc_cnt_of_retran[idx] -= peg;
+	else {
+		/* TSNH */
+		rack->r_ctl.rc_cnt_of_retran[idx] = 0;
+	}
+}
+
 static void
 rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
-    struct rack_sendmap *rsm, uint64_t ts, uint16_t add_flag, int segsiz)
+    struct rack_sendmap *rsm, uint64_t ts, uint32_t add_flag, int segsiz)
 {
 	int32_t idx;
 
 	rsm->r_rtr_cnt++;
-	rack_log_retran_reason(rack, rsm, __LINE__, 0, 2);
-	rsm->r_dupack = 0;
 	if (rsm->r_rtr_cnt > RACK_NUM_OF_RETRANS) {
 		rsm->r_rtr_cnt = RACK_NUM_OF_RETRANS;
 		rsm->r_flags |= RACK_OVERMAX;
 	}
+	if (rsm->r_act_rxt_cnt > 0) {
+		/* Drop the count back for this, its retransmitting again */
+		rack_unpeg_rxt(rack, rsm, segsiz);
+	}
+	rsm->r_act_rxt_cnt++;
+	/* Peg the count/index */
+	rack_peg_rxt(rack, rsm, segsiz);
+	rack_log_retran_reason(rack, rsm, __LINE__, 0, 2);
+	rsm->r_dupack = 0;
 	if ((rsm->r_rtr_cnt > 1) && ((rsm->r_flags & RACK_TLP) == 0)) {
 		rack->r_ctl.rc_holes_rxt += (rsm->r_end - rsm->r_start);
 		rsm->r_rtr_bytes += (rsm->r_end - rsm->r_start);
 	}
+	if (rsm->r_flags & RACK_WAS_LOST) {
+		/*
+		 * We retransmitted it putting it back in flight
+		 * remove the lost desgination and reduce the
+		 * bytes considered lost.
+		 */
+		rsm->r_flags  &= ~RACK_WAS_LOST;
+		KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
+			("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
+		if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
+			rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
+		else
+			rack->r_ctl.rc_considered_lost = 0;
+	}
 	idx = rsm->r_rtr_cnt - 1;
 	rsm->r_tim_lastsent[idx] = ts;
 	/*
@@ -8304,7 +8964,7 @@
 
 static uint32_t
 rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
-    struct rack_sendmap *rsm, uint64_t ts, int32_t *lenp, uint16_t add_flag, int segsiz)
+    struct rack_sendmap *rsm, uint64_t ts, int32_t *lenp, uint32_t add_flag, int segsiz)
 {
 	/*
 	 * We (re-)transmitted starting at rsm->r_start for some length
@@ -8381,7 +9041,7 @@
 static void
 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
 		uint32_t seq_out, uint16_t th_flags, int32_t err, uint64_t cts,
-		struct rack_sendmap *hintrsm, uint16_t add_flag, struct mbuf *s_mb,
+		struct rack_sendmap *hintrsm, uint32_t add_flag, struct mbuf *s_mb,
 		uint32_t s_moff, int hw_tls, int segsiz)
 {
 	struct tcp_rack *rack;
@@ -8440,13 +9100,6 @@
 			len++;
 		if (th_flags & TH_FIN)
 			len++;
-		if (SEQ_LT(snd_max, tp->snd_nxt)) {
-			/*
-			 * The add/update as not been done for the FIN/SYN
-			 * yet.
-			 */
-			snd_max = tp->snd_nxt;
-		}
 	}
 	if (SEQ_LEQ((seq_out + len), snd_una)) {
 		/* Are sending an old segment to induce an ack (keep-alive)? */
@@ -8492,6 +9145,7 @@
 			rsm->r_hw_tls = 1;
 		rsm->r_tim_lastsent[0] = cts;
 		rsm->r_rtr_cnt = 1;
+ 		rsm->r_act_rxt_cnt = 0;
 		rsm->r_rtr_bytes = 0;
 		if (th_flags & TH_SYN) {
 			/* The data space is one beyond snd_una */
@@ -8515,6 +9169,10 @@
 		rsm->r_fas = (ctf_flight_size(rack->rc_tp,
 					      rack->r_ctl.rc_sacked) +
 			      (rsm->r_end - rsm->r_start));
+		if ((rack->rc_initial_ss_comp == 0) &&
+		    (rack->r_ctl.ss_hi_fs < rsm->r_fas)) {
+			   rack->r_ctl.ss_hi_fs = rsm->r_fas;
+		}
 		/* rsm->m will be NULL if RACK_HAS_SYN or RACK_HAS_FIN is set */
 		if (rsm->m) {
 			if (rsm->m->m_len <= rsm->soff) {
@@ -8558,6 +9216,13 @@
 #endif
 		TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 		rsm->r_in_tmap = 1;
+		if (rsm->r_flags & RACK_IS_PCM) {
+			rack->r_ctl.pcm_i.send_time = cts;
+			rack->r_ctl.pcm_i.eseq = rsm->r_end;
+			/* First time through we set the start too */
+			if (rack->pcm_in_progress == 0)
+				rack->r_ctl.pcm_i.sseq = rsm->r_start;
+		}
 		/*
 		 * Special case detection, is there just a single
 		 * packet outstanding when we are not in recovery?
@@ -8886,6 +9551,7 @@
 	}
 	stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_PATHRTT, imax(0, rack->r_ctl.rack_rs.rs_us_rtt));
 #endif
+	rack->r_ctl.last_rcv_tstmp_for_rtt = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
 	/*
 	 * the retransmit should happen at rtt + 4 * rttvar. Because of the
 	 * way we do the smoothing, srtt and rttvar will each average +1/2
@@ -8939,6 +9605,7 @@
 				val = rack_probertt_lower_within * rack_time_between_probertt;
 				val /= 100;
 				if ((rack->in_probe_rtt == 0)  &&
+				    (rack->rc_skip_timely == 0) &&
 				    ((us_cts - rack->r_ctl.rc_lower_rtt_us_cts) >= (rack_time_between_probertt - val)))	{
 					rack_enter_probertt(rack, us_cts);
 				}
@@ -9051,7 +9718,7 @@
 		    (!IN_FASTRECOVERY(tp->t_flags))) {
 			/* Segment was a TLP and our retrans matched */
 			if (rack->r_ctl.rc_tlp_cwnd_reduce) {
-				rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
+				rack_cong_signal(tp, CC_NDUPACK, th_ack, __LINE__);
 			}
 		}
 		if ((rack->r_ctl.rc_rack_tmit_time == 0) ||
@@ -9198,10 +9865,14 @@
  */
 static void
 rack_log_sack_passed(struct tcpcb *tp,
-    struct tcp_rack *rack, struct rack_sendmap *rsm)
+    struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t cts)
 {
 	struct rack_sendmap *nrsm;
+	uint32_t thresh;
 
+	/* Get our rxt threshold for lost consideration */
+	thresh = rack_calc_thresh_rack(rack, rack_grab_rtt(tp, rack), cts, __LINE__, 0);
+	/* Now start looking at rsm's */
 	nrsm = rsm;
 	TAILQ_FOREACH_REVERSE_FROM(nrsm, &rack->r_ctl.rc_tmap,
 	    rack_head, r_tnext) {
@@ -9224,6 +9895,17 @@
 			 */
 			continue;
 		}
+		/* Check lost state */
+		if ((nrsm->r_flags & RACK_WAS_LOST) == 0) {
+			uint32_t exp;
+
+			exp = ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]) + thresh;
+			if (TSTMP_LT(exp, cts) || (exp == cts)) {
+				/* We consider it lost */
+				nrsm->r_flags |= RACK_WAS_LOST;
+				rack->r_ctl.rc_considered_lost += nrsm->r_end - nrsm->r_start;
+			}
+		}
 		if (nrsm->r_flags & RACK_SACK_PASSED) {
 			/*
 			 * We found one that is already marked
@@ -9407,8 +10089,6 @@
 	return (1);
 }
 
-
-
 static uint32_t
 rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack,
 		   struct tcpopt *to, struct rack_sendmap **prsm, uint32_t cts,
@@ -9625,16 +10305,11 @@
 			    (rsm->bindex == next->bindex) &&
 			    ((rsm->r_flags & RACK_STRADDLE) == 0) &&
 			    ((next->r_flags & RACK_STRADDLE) == 0) &&
+			    ((rsm->r_flags & RACK_IS_PCM) == 0) &&
+			    ((next->r_flags & RACK_IS_PCM) == 0) &&
 			    (rsm->r_flags & RACK_IN_GP_WIN) &&
 			    (next->r_flags & RACK_IN_GP_WIN))
 				can_use_hookery = 1;
-			else if (next &&
-				 (rsm->bindex == next->bindex) &&
-				 ((rsm->r_flags & RACK_STRADDLE) == 0) &&
-				 ((next->r_flags & RACK_STRADDLE) == 0) &&
-				 ((rsm->r_flags & RACK_IN_GP_WIN) == 0) &&
-				 ((next->r_flags & RACK_IN_GP_WIN) == 0))
-				can_use_hookery = 1;
 			else
 				can_use_hookery = 0;
 			if (next && can_use_hookery &&
@@ -9661,7 +10336,7 @@
 				nrsm = &stack_map;
 				memcpy(nrsm, rsm, sizeof(struct rack_sendmap));
 				/* Now adjust our tree blocks */
-				rsm->r_end = start;
+				tqhash_update_end(rack->r_ctl.tqh, rsm, start);
 				next->r_start = start;
  				rsm->r_flags |= RACK_SHUFFLED;
 				next->r_flags |= RACK_SHUFFLED;
@@ -9712,6 +10387,17 @@
 				if ((nrsm->r_end - nrsm->r_start) >= segsiz)
 					rack->r_ctl.ack_count += ((nrsm->r_end - nrsm->r_start) / segsiz);
 				rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start);
+				if (rsm->r_flags & RACK_WAS_LOST) {
+					int my_chg;
+
+					my_chg = (nrsm->r_end - nrsm->r_start);
+					KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
+						("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
+					if (my_chg <= rack->r_ctl.rc_considered_lost)
+						rack->r_ctl.rc_considered_lost -= my_chg;
+					else
+						rack->r_ctl.rc_considered_lost = 0;
+				}
 				if (nrsm->r_flags & RACK_SACK_PASSED) {
 					rack->r_ctl.rc_reorder_ts = cts;
 					if (rack->r_ctl.rc_reorder_ts == 0)
@@ -9734,7 +10420,7 @@
 					 * one walk backwards from there.
 					 */
 					if (nrsm && nrsm->r_in_tmap)
-						rack_log_sack_passed(tp, rack, nrsm);
+						rack_log_sack_passed(tp, rack, nrsm, cts);
 				}
 				/* Now are we done? */
 				if (SEQ_LT(end, next->r_end) ||
@@ -9875,9 +10561,21 @@
 			/* You get a count for acking a whole segment or more */
 			if ((rsm->r_end - rsm->r_start) >= segsiz)
 				rack->r_ctl.ack_count += ((rsm->r_end - rsm->r_start) / segsiz);
+			if (rsm->r_flags & RACK_WAS_LOST) {
+				int my_chg;
+
+				my_chg = (rsm->r_end - rsm->r_start);
+				rsm->r_flags &= ~RACK_WAS_LOST;
+				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
+					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
+				if (my_chg <= rack->r_ctl.rc_considered_lost)
+					rack->r_ctl.rc_considered_lost -= my_chg;
+				else
+					rack->r_ctl.rc_considered_lost = 0;
+			}
 			rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
 			if (rsm->r_in_tmap) /* should be true */
-				rack_log_sack_passed(tp, rack, rsm);
+				rack_log_sack_passed(tp, rack, rsm, cts);
 			/* Is Reordering occuring? */
 			if (rsm->r_flags & RACK_SACK_PASSED) {
 				rsm->r_flags &= ~RACK_SACK_PASSED;
@@ -9889,6 +10587,7 @@
 				rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_END);
 			rsm->r_ack_arrival = rack_to_usec_ts(&rack->r_ctl.act_rcv_time);
 			rsm->r_flags |= RACK_ACKED;
+			rack_update_pcm_ack(rack, 0, rsm->r_start, rsm->r_end);
 			if (rsm->r_in_tmap) {
 				TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
 				rsm->r_in_tmap = 0;
@@ -9968,19 +10667,13 @@
 		    (rsm->bindex == prev->bindex) &&
 		    ((rsm->r_flags & RACK_STRADDLE) == 0) &&
 		    ((prev->r_flags & RACK_STRADDLE) == 0) &&
+		    ((rsm->r_flags & RACK_IS_PCM) == 0) &&
+		    ((prev->r_flags & RACK_IS_PCM) == 0) &&
 		    (rsm->r_flags & RACK_IN_GP_WIN) &&
 		    (prev->r_flags & RACK_IN_GP_WIN))
 			can_use_hookery = 1;
-		else if (prev &&
-			 (rsm->bindex == prev->bindex) &&
-			 ((rsm->r_flags & RACK_STRADDLE) == 0) &&
-			 ((prev->r_flags & RACK_STRADDLE) == 0) &&
-			 ((rsm->r_flags & RACK_IN_GP_WIN) == 0) &&
-			 ((prev->r_flags & RACK_IN_GP_WIN) == 0))
-			can_use_hookery = 1;
 		else
 			can_use_hookery = 0;
-
 		if (prev && can_use_hookery &&
 		    (prev->r_flags & RACK_ACKED)) {
 			/**
@@ -10003,7 +10696,7 @@
 			noextra++;
 			nrsm = &stack_map;
 			memcpy(nrsm, rsm, sizeof(struct rack_sendmap));
-			prev->r_end = end;
+			tqhash_update_end(rack->r_ctl.tqh, prev, end);
 			rsm->r_start = end;
 			rsm->r_flags |= RACK_SHUFFLED;
 			prev->r_flags |= RACK_SHUFFLED;
@@ -10064,6 +10757,17 @@
 				rack->r_ctl.ack_count += ((nrsm->r_end - nrsm->r_start) / segsiz);
 
 			rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start);
+			if (rsm->r_flags & RACK_WAS_LOST) {
+				int my_chg;
+
+				my_chg = (nrsm->r_end - nrsm->r_start);
+				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
+					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
+				if (my_chg <= rack->r_ctl.rc_considered_lost)
+					rack->r_ctl.rc_considered_lost -= my_chg;
+				else
+					rack->r_ctl.rc_considered_lost = 0;
+			}
 			if (nrsm->r_flags & RACK_SACK_PASSED) {
 				rack->r_ctl.rc_reorder_ts = cts;
 				if (rack->r_ctl.rc_reorder_ts == 0)
@@ -10160,10 +10864,22 @@
 			/* You get a count for acking a whole segment or more */
 			if ((rsm->r_end - rsm->r_start) >= segsiz)
 				rack->r_ctl.ack_count += ((rsm->r_end - rsm->r_start) / segsiz);
-
+			if (rsm->r_flags & RACK_WAS_LOST) {
+				int my_chg;
+
+				my_chg = (rsm->r_end - rsm->r_start);
+				rsm->r_flags &= ~RACK_WAS_LOST;
+				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
+					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
+				if (my_chg <= rack->r_ctl.rc_considered_lost)
+					rack->r_ctl.rc_considered_lost -= my_chg;
+				else
+					rack->r_ctl.rc_considered_lost = 0;
+			}
 			rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
+
 			if (rsm->r_in_tmap) /* should be true */
-				rack_log_sack_passed(tp, rack, rsm);
+				rack_log_sack_passed(tp, rack, rsm, cts);
 			/* Is Reordering occuring? */
 			if (rsm->r_flags & RACK_SACK_PASSED) {
 				rsm->r_flags &= ~RACK_SACK_PASSED;
@@ -10175,6 +10891,7 @@
 				rack_need_set_test(tp, rack, rsm, tp->snd_una, __LINE__, RACK_USE_END);
 			rsm->r_ack_arrival = rack_to_usec_ts(&rack->r_ctl.act_rcv_time);
 			rsm->r_flags |= RACK_ACKED;
+			rack_update_pcm_ack(rack, 0, rsm->r_start, rsm->r_end);
 			rack_log_map_chg(tp, rack, NULL, rsm, nrsm, MAP_SACK_M5, end, __LINE__);
 			if (rsm->r_in_tmap) {
 				TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext);
@@ -10214,8 +10931,12 @@
 				break;
 			if (rsm->r_flags & RACK_STRADDLE)
 				break;
+			if (rsm->r_flags & RACK_IS_PCM)
+				break;
 			if (next->r_flags & RACK_STRADDLE)
 				break;
+			if (next->r_flags & RACK_IS_PCM)
+				break;
 			if (next->r_flags & RACK_ACKED) {
 				/* yep this and next can be merged */
 				rsm = rack_merge_rsm(rack, rsm, next);
@@ -10242,8 +10963,12 @@
 				break;
 			if (rsm->r_flags & RACK_STRADDLE)
 				break;
+			if (rsm->r_flags & RACK_IS_PCM)
+				break;
 			if (prev->r_flags & RACK_STRADDLE)
 				break;
+			if (prev->r_flags & RACK_IS_PCM)
+				break;
 			if (prev->r_flags & RACK_ACKED) {
 				/* yep the previous and this can be merged */
 				rsm = rack_merge_rsm(rack, prev, rsm);
@@ -10264,6 +10989,9 @@
 	/* Pass back the moved. */
 	*moved_two = moved;
 	*no_extra = noextra;
+	if (IN_RECOVERY(tp->t_flags)) {
+		rack->r_ctl.bytes_acked_in_recovery += changed;
+	}
 	return (changed);
 }
 
@@ -10464,6 +11192,17 @@
 	 * RTT's.
 	 */
 
+	if (sack_filter_blks_used(&rack->r_ctl.rack_sf)) {
+		/*
+		 * If we have some sack blocks in the filter
+		 * lets prune them out by calling sfb with no blocks.
+		 */
+		sack_filter_blks(&rack->r_ctl.rack_sf, NULL, 0, th_ack);
+	}
+	if (SEQ_GT(th_ack, tp->snd_una)) {
+		/* Clear any app ack remembered settings */
+		rack->r_ctl.cleared_app_ack = 0;
+	}
 	rack->r_wanted_output = 1;
 	if (SEQ_GT(th_ack, tp->snd_una))
 		rack->r_ctl.last_cumack_advance = acktime;
@@ -10533,10 +11272,10 @@
 			return;
 		}
 #ifdef INVARIANTS
-		panic("No rack map tp:%p for state:%d ack:%u rack:%p snd_una:%u snd_max:%u snd_nxt:%u\n",
+		panic("No rack map tp:%p for state:%d ack:%u rack:%p snd_una:%u snd_max:%u\n",
 		      tp,
 		      tp->t_state, th_ack, rack,
-		      tp->snd_una, tp->snd_max, tp->snd_nxt);
+		      tp->snd_una, tp->snd_max);
 #endif
 		return;
 	}
@@ -10599,6 +11338,20 @@
 		uint32_t left;
 		uint8_t newly_acked;
 
+		if (rsm->r_flags & RACK_WAS_LOST) {
+			/*
+			 * This can happen when we marked it as lost
+			 * and yet before retransmitting we get an ack
+			 * which can happen due to reordering.
+			 */
+			rsm->r_flags  &= ~RACK_WAS_LOST;
+			KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
+				("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
+			if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
+				rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
+			else
+				rack->r_ctl.rc_considered_lost = 0;
+		}
 		rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__);
 		rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes;
 		rsm->r_rtr_bytes = 0;
@@ -10613,6 +11366,10 @@
 			rsm->r_in_tmap = 0;
 		}
 		newly_acked = 1;
+		if (((rsm->r_flags & RACK_ACKED) == 0) &&
+		    (IN_RECOVERY(tp->t_flags))) {
+			rack->r_ctl.bytes_acked_in_recovery += (rsm->r_end - rsm->r_start);
+		}
 		if (rsm->r_flags & RACK_ACKED) {
 			/*
 			 * It was acked on the scoreboard -- remove
@@ -10639,6 +11396,9 @@
 				 */
 				rack->r_might_revert = 1;
 			}
+			rack_update_pcm_ack(rack, 1, rsm->r_start, rsm->r_end);
+		} else {
+			rack_update_pcm_ack(rack, 1, rsm->r_start, rsm->r_end);
 		}
 		if ((rsm->r_flags & RACK_TO_REXT) &&
 		    (tp->t_flags & TF_RCVD_TSTMP) &&
@@ -10691,6 +11451,27 @@
 		 * total for the part being cum-acked.
 		 */
 		rack->r_ctl.rc_sacked -= (th_ack - rsm->r_start);
+	} else {
+		if (((rsm->r_flags & RACK_ACKED) == 0) &&
+		    (IN_RECOVERY(tp->t_flags))) {
+			rack->r_ctl.bytes_acked_in_recovery += (th_ack - rsm->r_start);
+		}
+		rack_update_pcm_ack(rack, 1, rsm->r_start, th_ack);
+	}
+	/* And what about the lost flag? */
+	if (rsm->r_flags & RACK_WAS_LOST) {
+		/*
+		 * This can happen when we marked it as lost
+		 * and yet before retransmitting we get an ack
+		 * which can happen due to reordering. In this
+		 * case its only a partial ack of the send.
+		 */
+		KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)),
+			("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm,  rack, th_ack));
+		if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start))
+			rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start;
+		else
+			rack->r_ctl.rc_considered_lost = 0;
 	}
 	/*
 	 * Clear the dup ack count for
@@ -10807,7 +11588,26 @@
 			tp->snd_ssthresh = rack->r_ctl.rc_ssthresh_at_erec;
 			tp->snd_recover = tp->snd_una;
 			rack_log_to_prr(rack, 14, orig_cwnd, __LINE__);
-			EXIT_RECOVERY(tp->t_flags);
+			if (IN_RECOVERY(tp->t_flags)) {
+				rack_exit_recovery(tp, rack, 3);
+				if ((rack->rto_from_rec == 1) && (rack_ssthresh_rest_rto_rec != 0) ){
+					/*
+					 * We were in recovery, had an RTO
+					 * and then re-entered recovery (more sack's arrived)
+					 * and we have properly recorded the old ssthresh from
+					 * the first recovery. We want to be able to slow-start
+					 * back to this level. The ssthresh from the timeout
+					 * and then back into recovery will end up most likely
+					 * to be min(cwnd=1mss, 2mss). Which makes it basically
+					 * so we get no slow-start after our RTO.
+					 */
+					rack->rto_from_rec = 0;
+					if (rack->r_ctl.rto_ssthresh > tp->snd_ssthresh)
+						tp->snd_ssthresh = rack->r_ctl.rto_ssthresh;
+				}
+			}
+			rack->r_ctl.bytes_acked_in_recovery = 0;
+			rack->r_ctl.time_entered_recovery = 0;
 		}
 		rack->r_might_revert = 0;
 	}
@@ -11062,7 +11862,8 @@
 static uint32_t
 do_rack_compute_pipe(struct tcpcb *tp, struct tcp_rack *rack, uint32_t snd_una)
 {
-	return (((tp->snd_max - snd_una) - rack->r_ctl.rc_sacked) + rack->r_ctl.rc_holes_rxt);
+	return (((tp->snd_max - snd_una) -
+		 (rack->r_ctl.rc_sacked + rack->r_ctl.rc_considered_lost)) + rack->r_ctl.rc_holes_rxt);
 }
 
 static int32_t
@@ -11505,7 +12306,7 @@
 	    ((rsm->r_flags & RACK_MUST_RXT) == 0)) {
 		/* Enter recovery */
 		entered_recovery = 1;
-		rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
+		rack_cong_signal(tp, CC_NDUPACK, th_ack, __LINE__);
 		/*
 		 * When we enter recovery we need to assure we send
 		 * one packet.
@@ -11547,7 +12348,7 @@
 }
 
 static void
-rack_strike_dupack(struct tcp_rack *rack)
+rack_strike_dupack(struct tcp_rack *rack, tcp_seq th_ack)
 {
 	struct rack_sendmap *rsm;
 
@@ -11581,7 +12382,7 @@
 			if (rack->r_ctl.rc_resend != NULL) {
 				if (!IN_FASTRECOVERY(rack->rc_tp->t_flags)) {
 					rack_cong_signal(rack->rc_tp, CC_NDUPACK,
-							 rack->rc_tp->snd_una, __LINE__);
+							 th_ack,  __LINE__);
 				}
 				rack->r_wanted_output = 1;
 				rack->r_timer_override = 1;
@@ -11598,6 +12399,25 @@
 		       struct tcp_rack *rack,
 		       struct socket *so)
 {
+	/*
+	 * So what is dragging bottom?
+	 *
+	 * Dragging bottom means you were under pacing and had a
+	 * delay in processing inbound acks waiting on our pacing
+	 * timer to expire. While you were waiting all of the acknowledgments
+	 * for the packets you sent have arrived. This means we are pacing
+	 * way underneath the bottleneck to the point where our Goodput
+	 * measurements stop working, since they require more than one
+	 * ack (usually at least 8 packets worth with multiple acks so we can
+	 * gauge the inter-ack times). If that occurs we have a real problem
+	 * since we are stuck in a hole that we can't get out of without
+	 * something speeding us up.
+	 *
+	 * We also check to see if we are widdling down to just one segment
+	 * outstanding. If this occurs and we have room to send in our cwnd/rwnd
+	 * then we are adding the delayed ack interval into our measurments and
+	 * we need to speed up slightly.
+	 */
 	uint32_t segsiz, minseg;
 
 	segsiz = ctf_fixed_maxseg(tp);
@@ -11614,10 +12434,13 @@
 		 */
 		uint64_t lt_bw;
 
+		tcp_trace_point(rack->rc_tp, TCP_TP_PACED_BOTTOM);
 		lt_bw = rack_get_lt_bw(rack);
 		rack->rc_dragged_bottom = 1;
 		rack_validate_multipliers_at_or_above100(rack);
 		if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_VALID) &&
+		    (rack->dis_lt_bw == 0) &&
+		    (rack->use_lesser_lt_bw == 0) &&
 		    (lt_bw > 0)) {
 			/*
 			 * Lets use the long-term b/w we have
@@ -11729,7 +12552,7 @@
 			log.u_bbr.delivered = (uint32_t)((cur->end >> 32) & 0x00000000ffffffff) ;
 			log.u_bbr.epoch = (uint32_t)(cur->deadline & 0x00000000ffffffff);
 			log.u_bbr.lt_epoch = (uint32_t)((cur->deadline >> 32) & 0x00000000ffffffff) ;
-			log.u_bbr.bbr_state = 1;
+			log.u_bbr.inhpts = 1;
 #ifdef TCP_REQUEST_TRK
 			off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
 			log.u_bbr.use_lt_bw = (uint8_t)(off / sizeof(struct tcp_sendfile_track));
@@ -11745,6 +12568,20 @@
 		log.u_bbr.flex7 |= rack->rc_hybrid_mode;
 		log.u_bbr.flex7 <<= 1;
 		log.u_bbr.flex7 |= rack->dgp_on;
+		/*
+		 * Compose bbr_state to be a bit wise 0000ADHF
+		 * where A is the always_pace flag
+		 * where D is the dgp_on flag
+		 * where H is the hybrid_mode on flag
+		 * where F is the use_fixed_rate flag.
+		 */
+		log.u_bbr.bbr_state = rack->rc_always_pace;
+		log.u_bbr.bbr_state <<= 1;
+		log.u_bbr.bbr_state |= rack->dgp_on;
+		log.u_bbr.bbr_state <<= 1;
+		log.u_bbr.bbr_state |= rack->rc_hybrid_mode;
+		log.u_bbr.bbr_state <<= 1;
+		log.u_bbr.bbr_state |= rack->use_fixed_rate;
 		log.u_bbr.flex8 = mod;
 		log.u_bbr.delRate = rack->r_ctl.bw_rate_cap;
 		log.u_bbr.bbr_substate = rack->r_ctl.client_suggested_maxseg;
@@ -11763,12 +12600,13 @@
 
 #ifdef TCP_REQUEST_TRK
 static void
-rack_set_dgp_hybrid_mode(struct tcp_rack *rack, tcp_seq seq, uint32_t len)
+rack_set_dgp_hybrid_mode(struct tcp_rack *rack, tcp_seq seq, uint32_t len, uint64_t cts)
 {
-	struct tcp_sendfile_track *rc_cur;
+	struct tcp_sendfile_track *rc_cur, *orig_ent;
 	struct tcpcb *tp;
 	int err = 0;
 
+	orig_ent = rack->r_ctl.rc_last_sft;
 	rc_cur = tcp_req_find_req_for_seq(rack->rc_tp, seq);
 	if (rc_cur == NULL) {
 		/* If not in the beginning what about the end piece */
@@ -11781,11 +12619,17 @@
 	/* If we find no parameters we are in straight DGP mode */
 	if(rc_cur == NULL) {
 		/* None found for this seq, just DGP for now */
-		rack->r_ctl.client_suggested_maxseg = 0;
-		rack->rc_catch_up = 0;
-		rack->r_ctl.bw_rate_cap = 0;
-		if (rack->rc_hybrid_mode)
+		if (rack->rc_hybrid_mode) {
+			rack->r_ctl.client_suggested_maxseg = 0;
+			rack->rc_catch_up = 0;
+			if (rack->cspr_is_fcc == 0)
+				rack->r_ctl.bw_rate_cap = 0;
+			else
+				rack->r_ctl.fillcw_cap = rack_fillcw_bw_cap;
+		}
+		if (rack->rc_hybrid_mode) {
 			rack_log_hybrid(rack, (seq + len - 1), NULL, HYBRID_LOG_NO_RANGE, __LINE__, err);
+		}
 		if (rack->r_ctl.rc_last_sft) {
 			rack->r_ctl.rc_last_sft = NULL;
 		}
@@ -11793,6 +12637,20 @@
 	}
 	if ((rc_cur->hybrid_flags & TCP_HYBRID_PACING_WASSET) == 0) {
 		/* This entry was never setup for hybrid pacing on/off etc */
+		if (rack->rc_hybrid_mode) {
+			rack->r_ctl.client_suggested_maxseg = 0;
+			rack->rc_catch_up = 0;
+			rack->r_ctl.bw_rate_cap = 0;
+		}
+		if (rack->r_ctl.rc_last_sft) {
+			rack->r_ctl.rc_last_sft = NULL;
+		}
+		if ((rc_cur->flags & TCP_TRK_TRACK_FLG_FSND) == 0) {
+			rc_cur->flags |= TCP_TRK_TRACK_FLG_FSND;
+			rc_cur->first_send = cts;
+			rc_cur->sent_at_fs = rack->rc_tp->t_sndbytes;
+			rc_cur->rxt_at_fs = rack->rc_tp->t_snd_rxt_bytes;
+		}
 		return;
 	}
 	/*
@@ -11812,18 +12670,40 @@
 	}
 	if (rack->rc_hybrid_mode == 0) {
 		rack->r_ctl.rc_last_sft = rc_cur;
+		if (orig_ent) {
+			orig_ent->sent_at_ls = rack->rc_tp->t_sndbytes;
+			orig_ent->rxt_at_ls = rack->rc_tp->t_snd_rxt_bytes;
+			orig_ent->flags |= TCP_TRK_TRACK_FLG_LSND;
+		}
 		rack_log_hybrid(rack, seq, rc_cur, HYBRID_LOG_RULES_APP, __LINE__, 0);
 		return;
 	}
 	if ((rc_cur->hybrid_flags & TCP_HYBRID_PACING_CSPR) && rc_cur->cspr){
 		/* Compensate for all the header overhead's */
-		rack->r_ctl.bw_rate_cap	= rack_compensate_for_linerate(rack, rc_cur->cspr);
-	} else
-		rack->r_ctl.bw_rate_cap = 0;
+		if (rack->cspr_is_fcc == 0)
+			rack->r_ctl.bw_rate_cap	= rack_compensate_for_linerate(rack, rc_cur->cspr);
+		else
+			rack->r_ctl.fillcw_cap =  rack_compensate_for_linerate(rack, rc_cur->cspr);
+	} else {
+		if (rack->rc_hybrid_mode) {
+			if (rack->cspr_is_fcc == 0)
+				rack->r_ctl.bw_rate_cap = 0;
+			else
+				rack->r_ctl.fillcw_cap = rack_fillcw_bw_cap;
+		}
+	}
 	if (rc_cur->hybrid_flags & TCP_HYBRID_PACING_H_MS)
 		rack->r_ctl.client_suggested_maxseg = rc_cur->hint_maxseg;
 	else
 		rack->r_ctl.client_suggested_maxseg = 0;
+	if (rc_cur->timestamp == rack->r_ctl.last_tm_mark) {
+		/*
+		 * It is the same timestamp as the previous one
+		 * add the hybrid flag that will indicate we use
+		 * sendtime not arrival time for catch-up mode.
+		 */
+		rc_cur->hybrid_flags |= TCP_HYBRID_PACING_SENDTIME;
+	}
 	if ((rc_cur->hybrid_flags & TCP_HYBRID_PACING_CU) &&
 	    (rc_cur->cspr > 0)) {
 		uint64_t len;
@@ -11833,7 +12713,20 @@
 		 * Calculate the deadline time, first set the
 		 * time to when the request arrived.
 		 */
-		rc_cur->deadline = rc_cur->localtime;
+		if (rc_cur->hybrid_flags & TCP_HYBRID_PACING_SENDTIME) {
+			/*
+			 * For cases where its a duplicate tm (we received more
+			 * than one request for a tm) we want to use now, the point
+			 * where we are just sending the first bit of the request.
+			 */
+			rc_cur->deadline = cts;
+		} else {
+			/*
+			 * Here we have a different tm from the last request
+			 * so we want to use arrival time as our base.
+			 */
+			rc_cur->deadline = rc_cur->localtime;
+		}
 		/*
 		 * Next calculate the length and compensate for
 		 * TLS if need be.
@@ -11867,9 +12760,15 @@
 		 */
 		rack_set_pace_segments(tp, rack, __LINE__, NULL);
 	}
+	if (orig_ent) {
+		orig_ent->sent_at_ls = rack->rc_tp->t_sndbytes;
+		orig_ent->rxt_at_ls = rack->rc_tp->t_snd_rxt_bytes;
+		orig_ent->flags |= TCP_TRK_TRACK_FLG_LSND;
+	}
 	rack_log_hybrid(rack, seq, rc_cur, HYBRID_LOG_RULES_APP, __LINE__, 0);
 	/* Remember it for next time and for CU mode */
 	rack->r_ctl.rc_last_sft = rc_cur;
+	rack->r_ctl.last_tm_mark = rc_cur->timestamp;
 }
 #endif
 
@@ -11884,7 +12783,7 @@
 	    (ent->flags == TCP_TRK_TRACK_FLG_EMPTY) ||
 	    (SEQ_GEQ(seq, ent->end_seq))) {
 		/* Time to update the track. */
-		rack_set_dgp_hybrid_mode(rack, seq, len);
+		rack_set_dgp_hybrid_mode(rack, seq, len, cts);
 		ent = rack->r_ctl.rc_last_sft;
 	}
 	/* Out of all */
@@ -12116,8 +13015,17 @@
 		 * if so be sure to NULL the pointer so we know we are no longer
 		 * set to anything.
 		 */
-		if (ent == rack->r_ctl.rc_last_sft)
+		if (ent == rack->r_ctl.rc_last_sft) {
 			rack->r_ctl.rc_last_sft = NULL;
+			if (rack->rc_hybrid_mode) {
+				rack->rc_catch_up = 0;
+				if (rack->cspr_is_fcc == 0)
+					rack->r_ctl.bw_rate_cap = 0;
+				else
+					rack->r_ctl.fillcw_cap = rack_fillcw_bw_cap;
+				rack->r_ctl.client_suggested_maxseg = 0;
+			}
+		}
 		/* Generate the log that the tcp_netflix call would have */
 		tcp_req_log_req_info(rack->rc_tp, ent,
 				      i, TCP_TRK_REQ_LOG_FREED, 0, 0);
@@ -12139,7 +13047,7 @@
 rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, struct tcpopt *to,
     uint32_t tiwin, int32_t tlen,
-    int32_t * ofia, int32_t thflags, int32_t *ret_val)
+    int32_t * ofia, int32_t thflags, int32_t *ret_val, int32_t orig_tlen)
 {
 	int32_t ourfinisacked = 0;
 	int32_t nsegs, acked_amount;
@@ -12147,7 +13055,8 @@
 	struct mbuf *mfree;
 	struct tcp_rack *rack;
 	int32_t under_pacing = 0;
-	int32_t recovery = 0;
+	int32_t post_recovery = 0;
+	uint32_t p_cwnd;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
@@ -12176,8 +13085,9 @@
 
 		if ((th->th_ack == tp->snd_una) &&
 		    (tiwin == tp->snd_wnd) &&
+		    (orig_tlen == 0) &&
 		    ((to->to_flags & TOF_SACK) == 0)) {
-			rack_strike_dupack(rack);
+			rack_strike_dupack(rack, th->th_ack);
 			dup_ack_struck = 1;
 		}
 		rack_log_ack(tp, to, th, ((in_rec == 0) && IN_FASTRECOVERY(tp->t_flags)),
@@ -12185,6 +13095,7 @@
 		if ((rack->sack_attack_disable > 0) &&
 		    (th->th_ack == tp->snd_una) &&
 		    (tiwin == tp->snd_wnd) &&
+		    (orig_tlen == 0) &&
 		    (dsack_seen == 0) &&
 		    (sacks_seen > 0)) {
 			/*
@@ -12197,7 +13108,7 @@
 			 * were we are ignoring sacks from this guy due to
 			 * it being a suspected attacker.
 			 */
-			rack_strike_dupack(rack);
+			rack_strike_dupack(rack, th->th_ack);
 		}
 
 	}
@@ -12306,15 +13217,37 @@
 			tcp_rack_partialack(tp);
 		} else {
 			rack_post_recovery(tp, th->th_ack);
-			recovery = 1;
+			post_recovery = 1;
+			/*
+			 * Grab the segsiz, multiply by 2 and add the snd_cwnd
+			 * that is the max the CC should add if we are exiting
+			 * recovery and doing a late add.
+			 */
+			p_cwnd = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
+			p_cwnd <<= 1;
+			p_cwnd += tp->snd_cwnd;
 		}
+	} else if ((rack->rto_from_rec == 1) &&
+		   SEQ_GEQ(th->th_ack, tp->snd_recover)) {
+		/*
+		 * We were in recovery, hit a rxt timeout
+		 * and never re-entered recovery. The timeout(s)
+		 * made up all the lost data. In such a case
+		 * we need to clear the rto_from_rec flag.
+		 */
+		rack->rto_from_rec = 0;
 	}
 	/*
 	 * Let the congestion control algorithm update congestion control
 	 * related information. This typically means increasing the
 	 * congestion window.
 	 */
-	rack_ack_received(tp, rack, th->th_ack, nsegs, CC_ACK, recovery);
+	rack_ack_received(tp, rack, th->th_ack, nsegs, CC_ACK, post_recovery);
+	if (post_recovery &&
+	    (tp->snd_cwnd > p_cwnd)) {
+		/* Must be non-newreno (cubic) getting too ahead of itself */
+		tp->snd_cwnd = p_cwnd;
+	}
 	SOCKBUF_LOCK(&so->so_snd);
 	acked_amount = min(acked, (int)sbavail(&so->so_snd));
 	tp->snd_wnd -= acked_amount;
@@ -12338,13 +13271,6 @@
 	rack_log_wakeup(tp,rack, &so->so_snd, acked, 2);
 	/* NB: sowwakeup_locked() does an implicit unlock. */
 	sowwakeup_locked(so);
-	/* now check the rxt clamps */
-	if ((recovery == 1) &&
-	    (rack->excess_rxt_on) &&
-	    (rack->r_cwnd_was_clamped == 0))  {
-		do_rack_excess_rxt(tp, rack);
-	} else if (rack->r_cwnd_was_clamped)
-		do_rack_check_for_unclamp(tp, rack);
 	m_freem(mfree);
 	if (SEQ_GT(tp->snd_una, tp->snd_recover))
 		tp->snd_recover = tp->snd_una;
@@ -12363,11 +13289,12 @@
 	if (tp->snd_una == tp->snd_max) {
 		/* Nothing left outstanding */
 		tp->t_flags &= ~TF_PREVVALID;
+		rack->r_ctl.idle_snd_una = tp->snd_una;
 		rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL);
-		rack->r_ctl.retran_during_recovery = 0;
-		rack->r_ctl.dsack_byte_cnt = 0;
 		if (rack->r_ctl.rc_went_idle_time == 0)
 			rack->r_ctl.rc_went_idle_time = 1;
+		rack->r_ctl.retran_during_recovery = 0;
+		rack->r_ctl.dsack_byte_cnt = 0;
 		rack_log_progress_event(rack, tp, 0, PROGRESS_CLEAR, __LINE__);
 		if (sbavail(&tptosocket(tp)->so_snd) == 0)
 			tp->t_acktime = 0;
@@ -12562,7 +13489,6 @@
 	}
 }
 
-
 /*
  * Return value of 1, the TCB is unlocked and most
  * likely gone, return value of 0, the TCP is still
@@ -12713,12 +13639,20 @@
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				m_freem(m);
-			} else
+			} else {
+				int32_t newsize;
+
+				if (tlen > 0) {
+					newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
+					if (newsize)
+						if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
+							so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
+				}
 #ifdef NETFLIX_SB_LIMITS
 				appended =
 #endif
 					sbappendstream_locked(&so->so_rcv, m, 0);
-
+			}
 			rack_log_wakeup(tp,rack, &so->so_rcv, tlen, 1);
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
@@ -12877,9 +13811,6 @@
 	if (__predict_false(th->th_seq != tp->rcv_nxt)) {
 		return (0);
 	}
-	if (__predict_false(tp->snd_nxt != tp->snd_max)) {
-		return (0);
-	}
 	if (tiwin && tiwin != tp->snd_wnd) {
 		return (0);
 	}
@@ -13005,10 +13936,6 @@
 		/* Above what we have sent? */
 		return (0);
 	}
-	if (__predict_false(tp->snd_nxt != tp->snd_max)) {
-		/* We are retransmitting */
-		return (0);
-	}
 	if (__predict_false(tiwin == 0)) {
 		/* zero window */
 		return (0);
@@ -13176,6 +14103,7 @@
 		rack->r_ctl.retran_during_recovery = 0;
 		rack->rc_suspicious = 0;
 		rack->r_ctl.dsack_byte_cnt = 0;
+		rack->r_ctl.idle_snd_una = tp->snd_una;
 		rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL);
 		if (rack->r_ctl.rc_went_idle_time == 0)
 			rack->r_ctl.rc_went_idle_time = 1;
@@ -13203,6 +14131,7 @@
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
 {
 	int32_t ret_val = 0;
+	int32_t orig_tlen = tlen;
 	int32_t todrop;
 	int32_t ourfinisacked = 0;
 	struct tcp_rack *rack;
@@ -13267,8 +14196,9 @@
 		 */
 		if (IS_FASTOPEN(tp->t_flags) &&
 		    (tp->snd_una != tp->snd_max)) {
-			tp->snd_nxt = th->th_ack;
-			tfo_partial = 1;
+			/* Was it a partial ack? */
+			if (SEQ_LT(th->th_ack, tp->snd_max))
+				tfo_partial = 1;
 		}
 		/*
 		 * If there's data, delay ACK; if there's also a FIN ACKNOW
@@ -13299,6 +14229,24 @@
 			 * and there is no send_map.
 			 */
 			tp->snd_una++;
+			if (tfo_partial && (SEQ_GT(tp->snd_max, tp->snd_una))) {
+				/*
+				 * We sent a SYN with data, and thus have a
+				 * sendmap entry with a SYN set. Lets find it
+				 * and take off the send bit and the byte and
+				 * set it up to be what we send (send it next).
+				 */
+				struct rack_sendmap *rsm;
+
+				rsm = tqhash_min(rack->r_ctl.tqh);
+				if (rsm) {
+					if (rsm->r_flags & RACK_HAS_SYN) {
+						rsm->r_flags &= ~RACK_HAS_SYN;
+						rsm->r_start++;
+					}
+					rack->r_ctl.rc_resend = rsm;
+				}
+			}
 		}
 		/*
 		 * Received <SYN,ACK> in SYN_SENT[*] state. Transitions:
@@ -13361,7 +14309,7 @@
 			tcp_rack_xmit_timer(rack, t + 1, 1, t, 0, NULL, 2);
 			tcp_rack_xmit_timer_commit(rack, tp);
 		}
-		if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val))
+		if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen))
 			return (ret_val);
 		/* We may have changed to FIN_WAIT_1 above */
 		if (tp->t_state == TCPS_FIN_WAIT_1) {
@@ -13407,6 +14355,7 @@
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
 {
 	struct tcp_rack *rack;
+	int32_t orig_tlen = tlen;
 	int32_t ret_val = 0;
 	int32_t ourfinisacked = 0;
 
@@ -13579,7 +14528,7 @@
 		tcp_rack_xmit_timer(rack, t + 1, 1, t, 0, NULL, 2);
 		tcp_rack_xmit_timer_commit(rack, tp);
 	}
-	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
+	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
 		return (ret_val);
 	}
 	if (tp->t_state == TCPS_FIN_WAIT_1) {
@@ -13624,6 +14573,7 @@
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
 {
 	int32_t ret_val = 0;
+	int32_t orig_tlen = tlen;
 	struct tcp_rack *rack;
 
 	/*
@@ -13730,7 +14680,7 @@
 	/*
 	 * Ack processing.
 	 */
-	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val)) {
+	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val, orig_tlen)) {
 		return (ret_val);
 	}
 	if (sbavail(&so->so_snd)) {
@@ -13756,6 +14706,7 @@
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
 {
 	int32_t ret_val = 0;
+	int32_t orig_tlen = tlen;
 	struct tcp_rack *rack;
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
@@ -13830,7 +14781,7 @@
 	/*
 	 * Ack processing.
 	 */
-	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val)) {
+	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val, orig_tlen)) {
 		return (ret_val);
 	}
 	if (sbavail(&so->so_snd)) {
@@ -13884,6 +14835,7 @@
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
 {
 	int32_t ret_val = 0;
+	int32_t orig_tlen = tlen;
 	int32_t ourfinisacked = 0;
 	struct tcp_rack *rack;
 
@@ -13966,7 +14918,7 @@
 	/*
 	 * Ack processing.
 	 */
-	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
+	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
 		return (ret_val);
 	}
 	if (ourfinisacked) {
@@ -14011,6 +14963,7 @@
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
 {
 	int32_t ret_val = 0;
+	int32_t orig_tlen = tlen;
 	int32_t ourfinisacked = 0;
 	struct tcp_rack *rack;
 
@@ -14093,7 +15046,7 @@
 	/*
 	 * Ack processing.
 	 */
-	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
+	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
 		return (ret_val);
 	}
 	if (ourfinisacked) {
@@ -14124,6 +15077,7 @@
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
 {
 	int32_t ret_val = 0;
+	int32_t orig_tlen;
 	int32_t ourfinisacked = 0;
 	struct tcp_rack *rack;
 
@@ -14152,6 +15106,7 @@
 		if (ctf_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
+	orig_tlen = tlen;
 	if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
 			      &rack->r_ctl.challenge_ack_ts,
 			      &rack->r_ctl.challenge_ack_cnt)) {
@@ -14206,7 +15161,7 @@
 	/*
 	 * case TCPS_LAST_ACK: Ack processing.
 	 */
-	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
+	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
 		return (ret_val);
 	}
 	if (ourfinisacked) {
@@ -14237,6 +15192,7 @@
     uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
 {
 	int32_t ret_val = 0;
+	int32_t orig_tlen = tlen;
 	int32_t ourfinisacked = 0;
 	struct tcp_rack *rack;
 
@@ -14320,7 +15276,7 @@
 	/*
 	 * Ack processing.
 	 */
-	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
+	if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
 		return (ret_val);
 	}
 	if (sbavail(&so->so_snd)) {
@@ -14919,65 +15875,43 @@
 }
 
 static void
-rack_translate_clamp_value(struct tcp_rack *rack, uint32_t optval)
+rack_translate_policer_detect(struct tcp_rack *rack, uint32_t optval)
 {
 	/*
-	 * P = percent bits
-	 * F = fill cw bit -- Toggle fillcw if this bit is set.
-	 * S = Segment bits
-	 * M = set max segment bit
-	 * U = Unclamined
-	 * C = If set to non-zero override the max number of clamps.
-	 * L = Bit to indicate if clamped gets lower.
+	 * P = Percent of retransmits 499 = 49.9%
+	 * A = Average number 1 (.1%) -> 169 (16.9%)
+	 * M = Median number of retrans 1 - 16
+	 * MMMM MMMM AAAA AAAA PPPP PPPP PPPP PPPP
 	 *
-	 * CCCC CCCCC UUUU UULF PPPP PPPP PPPP PPPP
-	 *
-	 * The lowest 3 nibbles is the perentage .1 - 6553.5%
-	 * where 10.1 = 101, max 6553.5
-	 * The upper 16 bits  holds some options.
-	 * The F bit will turn on fill-cw on if you are
-	 * not pacing, it will turn it off if dgp is on.
-	 * The L bit will change it so when clamped we get
-	 * the min(gp, lt-bw) for dgp.
 	 */
-	uint16_t per;
+	uint16_t per, upp;
 
-	rack->r_ctl.saved_rxt_clamp_val = optval;
 	per = optval & 0x0000ffff;
-	rack->r_ctl.rxt_threshold = (uint64_t)(per & 0xffff);
-	if (optval > 0) {
-		uint16_t clamp_opt;
-
-		rack->excess_rxt_on = 1;
-		clamp_opt = ((optval & 0xffff0000) >> 16);
-		rack->r_ctl.clamp_options = clamp_opt & 0x00ff;
-		if (clamp_opt & 0xff00) {
-			/* A max clamps is also present */
-			rack->r_ctl.max_clamps = (clamp_opt >> 8);
-		} else {
-			/* No specified clamps means no limit */
-			rack->r_ctl.max_clamps = 0;
-		}
-		if (rack->r_ctl.clamp_options & 0x0002) {
-			rack->r_clamped_gets_lower  = 1;
-		} else {
-			rack->r_clamped_gets_lower  = 0;
-		}
+	rack->r_ctl.policer_rxt_threshold = (uint32_t)(per & 0xffff);
+	upp = ((optval & 0xffff0000) >> 16);
+	rack->r_ctl.policer_avg_threshold = (0x00ff & upp);
+	rack->r_ctl.policer_med_threshold = ((upp >> 8) & 0x00ff);
+	if ((rack->r_ctl.policer_rxt_threshold > 0) &&
+	    (rack->r_ctl.policer_avg_threshold > 0) &&
+	    (rack->r_ctl.policer_med_threshold > 0)) {
+		rack->policer_detect_on = 1;
 	} else {
-		/* Turn it off back to default */
-		rack->excess_rxt_on = 0;
-		rack->r_clamped_gets_lower  = 0;
+		rack->policer_detect_on = 0;
 	}
-
+	rack->r_ctl.saved_policer_val = optval;
+	policer_detection_log(rack, optval,
+			      rack->r_ctl.policer_avg_threshold,
+			      rack->r_ctl.policer_med_threshold,
+			      rack->r_ctl.policer_rxt_threshold, 11);
 }
 
-
 static int32_t
 rack_init(struct tcpcb *tp, void **ptr)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct tcp_rack *rack = NULL;
 	uint32_t iwin, snt, us_cts;
+	size_t sz;
 	int err, no_query;
 
 	tcp_hpts_init(tp);
@@ -15036,16 +15970,22 @@
 	rack->rc_new_rnd_needed = 1;
 	rack->r_ctl.rc_split_limit = V_tcp_map_split_limit;
 	/* We want abe like behavior as well */
+
 	rack->r_ctl.rc_saved_beta.newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED;
 	rack->r_ctl.rc_reorder_fade = rack_reorder_fade;
 	rack->rc_allow_data_af_clo = rack_ignore_data_after_close;
 	rack->r_ctl.rc_tlp_threshold = rack_tlp_thresh;
-	if (rack_rxt_clamp_thresh) {
-		rack_translate_clamp_value(rack, rack_rxt_clamp_thresh);
-		rack->excess_rxt_on = 1;
+	rack->r_ctl.policer_del_mss = rack_req_del_mss;
+	if ((rack_policer_rxt_thresh > 0) &&
+	    (rack_policer_avg_thresh > 0) &&
+	    (rack_policer_med_thresh > 0)) {
+		rack->r_ctl.policer_rxt_threshold = rack_policer_rxt_thresh;
+		rack->r_ctl.policer_avg_threshold = rack_policer_avg_thresh;
+		rack->r_ctl.policer_med_threshold = rack_policer_med_thresh;
+		rack->policer_detect_on = 1;
+	} else {
+		rack->policer_detect_on = 0;
 	}
-	if (rack_uses_full_dgp_in_rec)
-		rack->r_ctl.full_dgp_in_rec = 1;
 	if (rack_fill_cw_state)
 		rack->rc_pace_to_cwnd = 1;
 	if (rack_pacing_min_seg)
@@ -15063,6 +16003,15 @@
 	if (rack_tcp_accounting) {
 		tp->t_flags2 |= TF2_TCP_ACCOUNTING;
 	}
+#endif
+	rack->r_ctl.pcm_i.cnt_alloc = RACK_DEFAULT_PCM_ARRAY;
+	sz = (sizeof(struct rack_pcm_stats) * rack->r_ctl.pcm_i.cnt_alloc);
+	rack->r_ctl.pcm_s = malloc(sz,M_TCPPCM, M_NOWAIT);
+	if (rack->r_ctl.pcm_s == NULL) {
+		rack->r_ctl.pcm_i.cnt_alloc = 0;
+	}
+#ifdef NETFLIX_STATS
+	rack->r_ctl.side_chan_dis_mask = tcp_sidechannel_disable_mask;
 #endif
 	rack->r_ctl.rack_per_upper_bound_ss = (uint8_t)rack_per_upper_bound_ss;
 	rack->r_ctl.rack_per_upper_bound_ca = (uint8_t)rack_per_upper_bound_ca;
@@ -15070,6 +16019,7 @@
 		rack->rack_enable_scwnd = 1;
 	rack->r_ctl.pace_len_divisor = rack_default_pacing_divisor;
 	rack->rc_user_set_max_segs = rack_hptsi_segments;
+	rack->r_ctl.max_reduction = rack_max_reduce;
 	rack->rc_force_max_seg = 0;
 	TAILQ_INIT(&rack->r_ctl.opt_list);
 	rack->r_ctl.rc_saved_beta.beta = V_newreno_beta_ecn;
@@ -15084,12 +16034,22 @@
 	} else {
 		rack->r_ctl.saved_hibeta = 50;
 	}
+	/*
+	 * We initialize to all ones so we never match 0
+	 * just in case the client sends in 0, it hopefully
+	 * will never have all 1's in ms :-)
+	 */
+	rack->r_ctl.last_tm_mark = 0xffffffffffffffff;
 	rack->r_ctl.rc_reorder_shift = rack_reorder_thresh;
 	rack->r_ctl.rc_pkt_delay = rack_pkt_delay;
+	rack->r_ctl.pol_bw_comp = rack_policing_do_bw_comp;
 	rack->r_ctl.rc_tlp_cwnd_reduce = rack_lower_cwnd_at_tlp;
 	rack->r_ctl.rc_lowest_us_rtt = 0xffffffff;
 	rack->r_ctl.rc_highest_us_rtt = 0;
 	rack->r_ctl.bw_rate_cap = rack_bw_rate_cap;
+	rack->pcm_enabled = rack_pcm_is_enabled;
+	if (rack_fillcw_bw_cap)
+		rack->r_ctl.fillcw_cap = rack_fillcw_bw_cap;
 	rack->r_ctl.timer_slop = TICKS_2_USEC(tcp_rexmit_slop);
 	if (rack_use_cmp_acks)
 		rack->r_use_cmp_ack = 1;
@@ -15098,6 +16058,7 @@
 	if (rack_gp_no_rec_chg)
 		rack->rc_gp_no_rec_chg = 1;
 	if (rack_pace_every_seg && tcp_can_enable_pacing()) {
+		rack->r_ctl.pacing_method |= RACK_REG_PACING;
 		rack->rc_always_pace = 1;
 		if (rack->rack_hibeta)
 			rack_set_cc_pacing(rack);
@@ -15114,13 +16075,31 @@
 		rack->r_limit_scw = 0;
 	rack_init_retransmit_value(rack, rack_rxt_controls);
 	rack->rc_labc = V_tcp_abc_l_var;
+	if (rack_honors_hpts_min_to)
+		rack->r_use_hpts_min = 1;
+	if (tp->snd_una != 0) {
+		rack->r_ctl.idle_snd_una = tp->snd_una;
+		rack->rc_sendvars_notset = 0;
+		/*
+		 * Make sure any TCP timers are not running.
+		 */
+		tcp_timer_stop(tp);
+	} else {
+		/*
+		 * Server side, we are called from the
+		 * syn-cache. This means none of the
+		 * snd_una/max are set yet so we have
+		 * to defer this until the first send.
+		 */
+		rack->rc_sendvars_notset = 1;
+	}
+
 	rack->r_ctl.rc_rate_sample_method = rack_rate_sample_method;
 	rack->rack_tlp_threshold_use = rack_tlp_threshold_use;
 	rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr;
 	rack->r_ctl.rc_min_to = rack_min_to;
 	microuptime(&rack->r_ctl.act_rcv_time);
 	rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time;
-	rack->rc_init_win = rack_default_init_window;
 	rack->r_ctl.rack_per_of_gp_ss = rack_per_of_gp_ss;
 	if (rack_hw_up_only)
 		rack->r_up_only = 1;
@@ -15132,15 +16111,34 @@
 	} else
 		rack->r_ctl.rack_per_of_gp_ca = rack_per_of_gp_ca;
 	rack->r_ctl.rack_per_of_gp_rec = rack_per_of_gp_rec;
+	if (rack_timely_off) {
+		rack->rc_skip_timely = 1;
+	}
+	if (rack->rc_skip_timely) {
+		rack->r_ctl.rack_per_of_gp_rec = 90;
+		rack->r_ctl.rack_per_of_gp_ca = 100;
+		rack->r_ctl.rack_per_of_gp_ss = 250;
+	}
 	rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt;
 	rack->r_ctl.rc_tlp_rxt_last_time = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
+	rack->r_ctl.last_rcv_tstmp_for_rtt = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
+
 	setup_time_filter_small(&rack->r_ctl.rc_gp_min_rtt, FILTER_TYPE_MIN,
 				rack_probertt_filter_life);
 	us_cts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
 	rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
 	rack->r_ctl.rc_time_of_last_probertt = us_cts;
-	rack->r_ctl.challenge_ack_ts = tcp_ts_getticks();
+	rack->r_ctl.rc_went_idle_time = us_cts;
+	rack->r_ctl.challenge_ack_ts = tcp_ts_getticks() - (tcp_ack_war_time_window + 1);
 	rack->r_ctl.rc_time_probertt_starts = 0;
+
+	rack->r_ctl.gp_rnd_thresh = rack_rnd_cnt_req & 0xff;
+	if (rack_rnd_cnt_req  & 0x10000)
+		rack->r_ctl.gate_to_fs = 1;
+	rack->r_ctl.gp_gain_req = rack_gp_gain_req;
+	if ((rack_rnd_cnt_req & 0x100) > 0) {
+
+	}
 	if (rack_dsack_std_based & 0x1) {
 		/* Basically this means all rack timers are at least (srtt + 1/4 srtt) */
 		rack->rc_rack_tmr_std_based = 1;
@@ -15449,10 +16447,8 @@
 			rack->r_ctl.fsb.tcp_ip_hdr = NULL;
 			rack->r_ctl.fsb.th = NULL;
 		}
-		if (rack->rc_always_pace) {
-			tcp_decrement_paced_conn();
-			rack_undo_cc_pacing(rack);
-			rack->rc_always_pace = 0;
+		if (rack->rc_always_pace == 1) {
+			rack_remove_pacing(rack);
 		}
 		/* Clean up any options if they were not applied */
 		while (!TAILQ_EMPTY(&rack->r_ctl.opt_list)) {
@@ -15492,6 +16488,12 @@
 			uma_zfree(rack_zone, rsm);
 			rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
 		}
+		if (rack->r_ctl.pcm_s != NULL) {
+			free(rack->r_ctl.pcm_s, M_TCPPCM);
+			rack->r_ctl.pcm_s = NULL;
+			rack->r_ctl.pcm_i.cnt_alloc = 0;
+			rack->r_ctl.pcm_i.cnt = 0;
+		}
 		if ((rack->r_ctl.rc_num_maps_alloced > 0) &&
 		    (tcp_bblogging_on(tp))) {
 			union tcp_log_stackspecific log;
@@ -15593,6 +16595,16 @@
 	int tmr_up;
 
 	tmr_up = rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK;
+	if (tcp_in_hpts(rack->rc_tp) == 0) {
+		/*
+		 * Ok we probably need some timer up, but no
+		 * matter what the mask we are not in hpts. We
+		 * may have received an old ack and thus did nothing.
+		 */
+		rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+		rack_start_hpts_timer(rack, tp, tcp_get_usecs(NULL), 0, 0, 0);
+		return;
+	}
 	if (rack->rc_in_persist && (tmr_up == PACE_TMR_PERSIT))
 		return;
 	rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
@@ -15916,6 +16928,134 @@
 	}
 }
 
+static void
+rack_new_round_starts(struct tcpcb *tp, struct tcp_rack *rack, uint32_t high_seq)
+{
+	/*
+	 * The next send has occurred mark the end of the round
+	 * as when that data gets acknowledged. We can
+	 * also do common things we might need to do when
+	 * a round begins.
+	 */
+	rack->r_ctl.roundends = tp->snd_max;
+	rack->rc_new_rnd_needed = 0;
+	rack_log_hystart_event(rack, tp->snd_max, 4);
+}
+
+
+static void
+rack_log_pcm(struct tcp_rack *rack, uint8_t mod, uint32_t flex1, uint32_t flex2,
+	     uint32_t flex3)
+{
+	if (tcp_bblogging_on(rack->rc_tp)) {
+		union tcp_log_stackspecific log;
+		struct timeval tv;
+		
+		(void)tcp_get_usecs(&tv);
+		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
+		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
+		log.u_bbr.flex8 = mod;
+		log.u_bbr.flex1 = flex1;
+		log.u_bbr.flex2 = flex2;
+		log.u_bbr.flex3 = flex3;
+		log.u_bbr.flex4 = rack_pcm_every_n_rounds;
+		log.u_bbr.flex5 = rack->r_ctl.pcm_idle_rounds;
+		log.u_bbr.bbr_substate = rack->pcm_needed;
+		log.u_bbr.bbr_substate <<= 1;
+		log.u_bbr.bbr_substate |= rack->pcm_in_progress;
+		log.u_bbr.bbr_substate <<= 1;
+		log.u_bbr.bbr_substate |= rack->pcm_enabled; /* bits are NIE for Needed, Inprogress, Enabled */
+		(void)tcp_log_event(rack->rc_tp, NULL, NULL, NULL, TCP_PCM_MEASURE, ERRNO_UNK,
+				    0, &log, false, NULL, NULL, 0, &tv);
+	}
+}
+
+static void
+rack_new_round_setup(struct tcpcb *tp, struct tcp_rack *rack, uint32_t high_seq)
+{
+	/*
+	 * The round (current_round) has ended. We now
+	 * setup for the next round by incrementing the
+	 * round numnber and doing any round specific
+	 * things.
+	 */
+	rack_log_hystart_event(rack, high_seq, 21);
+	rack->r_ctl.current_round++;
+	/* New round (current_round) begins at next send */
+	rack->rc_new_rnd_needed = 1;
+	if ((rack->pcm_enabled == 1) &&
+	    (rack->pcm_needed == 0) &&
+	    (rack->pcm_in_progress == 0)) {
+		/*
+		 * If we have enabled PCM, then we need to
+		 * check if the round has adanced to the state
+		 * where one is required.
+		 */
+		int rnds;
+
+		rnds = rack->r_ctl.current_round - rack->r_ctl.last_pcm_round;
+		if ((rnds + rack->r_ctl.pcm_idle_rounds) >= rack_pcm_every_n_rounds) {
+			rack->pcm_needed = 1;
+			rack_log_pcm(rack, 3, rack->r_ctl.last_pcm_round, rack_pcm_every_n_rounds, rack->r_ctl.current_round );
+		} else if (rack_verbose_logging) {
+			rack_log_pcm(rack, 3, rack->r_ctl.last_pcm_round, rack_pcm_every_n_rounds, rack->r_ctl.current_round );
+		}
+	}
+	if (tp->t_ccv.flags & CCF_HYSTART_ALLOWED) {
+		/* We have hystart enabled send the round info in */
+		if (CC_ALGO(tp)->newround != NULL) {
+			CC_ALGO(tp)->newround(&tp->t_ccv, rack->r_ctl.current_round);
+		}
+	}
+	/*
+	 * For DGP an initial startup check. We want to validate
+	 * that we are not just pushing on slow-start and just
+	 * not gaining.. i.e. filling buffers without getting any
+	 * boost in b/w during the inital slow-start.
+	 */
+	if (rack->dgp_on &&
+	    (rack->rc_initial_ss_comp == 0) &&
+	    (tp->snd_cwnd < tp->snd_ssthresh) &&
+	    (rack->r_ctl.num_measurements >= RACK_REQ_AVG) &&
+	    (rack->r_ctl.gp_rnd_thresh > 0) &&
+	    ((rack->r_ctl.current_round - rack->r_ctl.last_rnd_of_gp_rise) >= rack->r_ctl.gp_rnd_thresh)) {
+
+		/*
+		 * We are in the initial SS and we have hd rack_rnd_cnt_req rounds(def:5) where
+		 * we have not gained the required amount in the gp_est (120.0% aka 1200). Lets
+		 * exit SS.
+		 *
+		 * Pick up the flight size now as we enter slowstart (not the
+		 * cwnd which may be inflated).
+		 */
+		rack->rc_initial_ss_comp = 1;
+
+		if (tcp_bblogging_on(rack->rc_tp)) {
+			union tcp_log_stackspecific log;
+			struct timeval tv;
+
+			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+			log.u_bbr.flex1 = rack->r_ctl.current_round;
+			log.u_bbr.flex2 = rack->r_ctl.last_rnd_of_gp_rise;
+			log.u_bbr.flex3 = rack->r_ctl.gp_rnd_thresh;
+			log.u_bbr.flex5 = rack->r_ctl.gate_to_fs;
+			log.u_bbr.flex5 = rack->r_ctl.ss_hi_fs;
+			log.u_bbr.flex8 = 40;
+			(void)tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0,
+					    0, &log, false, NULL, __func__, __LINE__,&tv);
+		}
+		if ((rack->r_ctl.gate_to_fs == 1) &&
+		     (tp->snd_cwnd > rack->r_ctl.ss_hi_fs)) {
+			tp->snd_cwnd = rack->r_ctl.ss_hi_fs;
+		}
+		tp->snd_ssthresh = tp->snd_cwnd - 1;
+		/* Turn off any fast output running */
+		rack->r_fast_output = 0;
+	}
+}
+
 static int
 rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mbuf *m, int nxt_pkt, struct timeval *tv)
 {
@@ -15949,7 +17089,7 @@
 #endif
 	int nsegs = 0;
 	int under_pacing = 0;
-	int recovery = 0;
+	int post_recovery = 0;
 #ifdef TCP_ACCOUNTING
 	sched_pin();
 #endif
@@ -16122,7 +17262,7 @@
 			}
 		} else if (ae->ack_val_set == ACK_DUPACK) {
 			/* Case D */
-			rack_strike_dupack(rack);
+			rack_strike_dupack(rack, ae->ack);
 		} else if (ae->ack_val_set == ACK_RWND) {
 			/* Case C */
 			if ((ae->flags & TSTMP_LRO) || (ae->flags & TSTMP_HDWR)) {
@@ -16172,8 +17312,6 @@
 				}
 #endif
 				high_seq = ae->ack;
-				if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp))
-					rack_log_hystart_event(rack, high_seq, 8);
 				/* Setup our act_rcv_time */
 				if ((ae->flags & TSTMP_LRO) || (ae->flags & TSTMP_HDWR)) {
 					ts.tv_sec = ae->timestamp / 1000000000;
@@ -16239,13 +17377,11 @@
 		if (SEQ_GEQ(high_seq, rack->r_ctl.roundends) &&
 		    (rack->rc_new_rnd_needed == 0) &&
 		    (nxt_pkt == 0)) {
-			rack_log_hystart_event(rack, high_seq, 21);
-			rack->r_ctl.current_round++;
-			/* Force the next send to setup the next round */
-			rack->rc_new_rnd_needed = 1;
-			if (CC_ALGO(tp)->newround != NULL) {
-				CC_ALGO(tp)->newround(&tp->t_ccv, rack->r_ctl.current_round);
-			}
+			/*
+			 * We have crossed into a new round with
+			 * this th_ack value.
+			 */
+			rack_new_round_setup(tp, rack, high_seq);
 		}
 		/*
 		 * Clear the probe not answered flag
@@ -16306,8 +17442,17 @@
 				tcp_rack_partialack(tp);
 			} else {
 				rack_post_recovery(tp, high_seq);
-				recovery = 1;
+				post_recovery = 1;
 			}
+		}  else if ((rack->rto_from_rec == 1) &&
+			    SEQ_GEQ(high_seq, tp->snd_recover)) {
+			/*
+			 * We were in recovery, hit a rxt timeout
+			 * and never re-entered recovery. The timeout(s)
+			 * made up all the lost data. In such a case
+			 * we need to clear the rto_from_rec flag.
+			 */
+			rack->rto_from_rec = 0;
 		}
 		/* Handle the rack-log-ack part (sendmap) */
 		if ((sbused(&so->so_snd) == 0) &&
@@ -16340,9 +17485,24 @@
 		KMOD_TCPSTAT_ADD(tcps_rcvackpack, 1);
 		KMOD_TCPSTAT_ADD(tcps_rcvackbyte, acked);
 		if (acked_amount > 0) {
+			uint32_t p_cwnd;
 			struct mbuf *mfree;
 
-			rack_ack_received(tp, rack, high_seq, nsegs, CC_ACK, recovery);
+			if (post_recovery) {
+				/*
+				 * Grab the segsiz, multiply by 2 and add the snd_cwnd
+				 * that is the max the CC should add if we are exiting
+				 * recovery and doing a late add.
+				 */
+				p_cwnd = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
+				p_cwnd <<= 1;
+				p_cwnd += tp->snd_cwnd;
+			}
+			rack_ack_received(tp, rack, high_seq, nsegs, CC_ACK, post_recovery);
+			if (post_recovery && (tp->snd_cwnd > p_cwnd)) {
+				/* Must be non-newreno (cubic) getting too ahead of itself */
+				tp->snd_cwnd = p_cwnd;
+			}
 			SOCKBUF_LOCK(&so->so_snd);
 			mfree = sbcut_locked(&so->so_snd, acked_amount);
 			tp->snd_una = high_seq;
@@ -16351,12 +17511,6 @@
 			/* Wake up the socket if we have room to write more */
 			rack_log_wakeup(tp,rack, &so->so_snd, acked, 2);
 			sowwakeup_locked(so);
-			if ((recovery == 1) &&
-			    (rack->excess_rxt_on) &&
-			    (rack->r_cwnd_was_clamped == 0)) {
-				do_rack_excess_rxt(tp, rack);
-			} else if (rack->r_cwnd_was_clamped)
-				do_rack_check_for_unclamp(tp, rack);
 			m_freem(mfree);
 		}
 		/* update progress */
@@ -16587,7 +17741,9 @@
 	}
 	rack_handle_might_revert(tp, rack);
 	ctf_calc_rwin(so, tp);
-	if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) {
+	if ((rack->r_wanted_output != 0) ||
+	    (rack->r_fast_output != 0) ||
+	    (tp->t_flags & TF_ACKNOW )) {
 	send_out_a_rst:
 		if (tcp_output(tp) < 0) {
 #ifdef TCP_ACCOUNTING
@@ -16630,7 +17786,7 @@
 	 * us_cts - is the time that LRO or hardware actually got the packet in microseconds.
 	 */
 	uint32_t cts, us_cts, ms_cts;
-	uint32_t tiwin, high_seq;
+	uint32_t tiwin;
 	struct timespec ts;
 	struct tcpopt to;
 	struct tcp_rack *rack;
@@ -16818,7 +17974,6 @@
 			tp->t_flags &= ~TF_GPUTINPROG;
 		}
 	}
-	high_seq = th->th_ack;
 	if (tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 		struct timeval ltv;
@@ -16938,7 +18093,6 @@
 		m_freem(m);
 		goto done_with_input;
 	}
-
 	/*
 	 * Segment received on connection. Reset idle time and keep-alive
 	 * timer. XXX: This should be done after segment validation to
@@ -16975,7 +18129,28 @@
 		if (TSTMP_GT(to.to_tsecr, ms_cts))
 			to.to_tsecr = 0;
 	}
-
+	if ((rack->r_rcvpath_rtt_up == 1) &&
+	    (to.to_flags & TOF_TS) &&
+	    (TSTMP_GEQ(to.to_tsecr, rack->r_ctl.last_rcv_tstmp_for_rtt))) {
+		uint32_t rtt = 0;
+
+		/*
+		 * We are receiving only and thus not sending
+		 * data to do an RTT. We set a flag when we first
+		 * sent this TS to the peer. We now have it back
+		 * and have an RTT to share. We log it as a conf
+		 * 4, we are not so sure about it.. since we
+		 * may have lost an ack.
+		 */
+		if (TSTMP_GT(cts, rack->r_ctl.last_time_of_arm_rcv))
+		    rtt = (cts - rack->r_ctl.last_time_of_arm_rcv);
+		rack->r_rcvpath_rtt_up = 0;
+		/* Submit and commit the timer */
+		if (rtt > 0) {
+			tcp_rack_xmit_timer(rack, rtt, 0, rtt, 4, NULL, 1);
+			tcp_rack_xmit_timer_commit(rack, tp);
+		}
+	}
 	/*
 	 * If its the first time in we need to take care of options and
 	 * verify we can do SACK for rack!
@@ -17069,7 +18244,7 @@
 	    (rack->use_fixed_rate == 0) &&
 	    (rack->rc_always_pace)) {
 		/* Check in on probertt */
-		rack_check_probe_rtt(rack, us_cts);
+		rack_check_probe_rtt(rack, cts);
 	}
 	rack_clear_rate_sample(rack);
 	if ((rack->forced_ack) &&
@@ -17113,7 +18288,7 @@
 			 * If we are going for target, lets recheck before
 			 * we output.
 			 */
-			rack_check_probe_rtt(rack, us_cts);
+			rack_check_probe_rtt(rack, cts);
 		}
 		if (rack->set_pacing_done_a_iw == 0) {
 			/* How much has been acked? */
@@ -17144,7 +18319,10 @@
 		}
 #endif
 		if ((nxt_pkt == 0) && (no_output == 0)) {
-			if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) {
+			if ((rack->r_wanted_output != 0) ||
+			    (tp->t_flags & TF_ACKNOW) ||
+			    (rack->r_fast_output != 0)) {
+
 do_output_now:
 				if (tcp_output(tp) < 0) {
 #ifdef TCP_ACCOUNTING
@@ -17156,6 +18334,8 @@
 			}
 			rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
 			rack_free_trim(rack);
+		} else if ((nxt_pkt == 0) && (tp->t_flags & TF_ACKNOW)) {
+			goto do_output_now;
 		} else if ((no_output == 1) &&
 			   (nxt_pkt == 0)  &&
 			   (tcp_in_hpts(rack->rc_tp) == 0)) {
@@ -17170,9 +18350,6 @@
 		/* Clear the flag, it may have been cleared by output but we may not have  */
 		if ((nxt_pkt == 0) && (tp->t_flags2 & TF2_HPTS_CALLS))
 			tp->t_flags2 &= ~TF2_HPTS_CALLS;
-		/* Update any rounds needed */
-		if (rack_verbose_logging &&  tcp_bblogging_on(rack->rc_tp))
-			rack_log_hystart_event(rack, high_seq, 8);
 		/*
 		 * The draft (v3) calls for us to use SEQ_GEQ, but that
 		 * causes issues when we are just going app limited. Lets
@@ -17186,13 +18363,11 @@
 		if (SEQ_GEQ(tp->snd_una, rack->r_ctl.roundends) &&
 		    (rack->rc_new_rnd_needed == 0) &&
 		    (nxt_pkt == 0)) {
-			rack_log_hystart_event(rack, tp->snd_una, 21);
-			rack->r_ctl.current_round++;
-			/* Force the next send to setup the next round */
-			rack->rc_new_rnd_needed = 1;
-			if (CC_ALGO(tp)->newround != NULL) {
-				CC_ALGO(tp)->newround(&tp->t_ccv, rack->r_ctl.current_round);
-			}
+			/*
+			 * We have crossed into a new round with
+			 * the new snd_unae.
+			 */
+			rack_new_round_setup(tp, rack, tp->snd_una);
 		}
 		if ((nxt_pkt == 0) &&
 		    ((rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) == 0) &&
@@ -17242,6 +18417,7 @@
 		if (did_out)
 			rack->r_wanted_output = 0;
 	}
+
 #ifdef TCP_ACCOUNTING
 	sched_unpin();
 #endif
@@ -17325,7 +18501,7 @@
 	srtt = rack_grab_rtt(tp, rack);
 	idx = rsm->r_rtr_cnt - 1;
 	ts_low = (uint32_t)rsm->r_tim_lastsent[idx];
-	thresh = rack_calc_thresh_rack(rack, srtt, tsused);
+	thresh = rack_calc_thresh_rack(rack, srtt, tsused, __LINE__, 1);
 	if ((tsused == ts_low) ||
 	    (TSTMP_LT(tsused, ts_low))) {
 		/* No time since sending */
@@ -17354,7 +18530,7 @@
 }
 
 static void
-rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t slot,
+rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot,
 			   uint64_t bw_est, uint64_t bw, uint64_t len_time, int method,
 			   int line, struct rack_sendmap *rsm, uint8_t quality)
 {
@@ -17370,6 +18546,7 @@
 			if ((method != 2) &&
 			    (method != 3) &&
 			    (method != 7) &&
+			    (method != 89) &&
 			    (method != 14) &&
 			    (method != 20)) {
 				return;
@@ -17429,12 +18606,8 @@
 		log.u_bbr.bbr_substate = quality;
 		log.u_bbr.bbr_state = rack->dgp_on;
 		log.u_bbr.bbr_state <<= 1;
-		log.u_bbr.bbr_state |= rack->r_fill_less_agg;
-		log.u_bbr.bbr_state <<= 1;
 		log.u_bbr.bbr_state |= rack->rc_pace_to_cwnd;
 		log.u_bbr.bbr_state <<= 2;
-		log.u_bbr.bbr_state |= rack->r_pacing_discount;
-		log.u_bbr.flex7 = ((rack->r_ctl.pacing_discount_amm << 1) | log.u_bbr.flex7);
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -17537,7 +18710,6 @@
 {
 	uint64_t lentim, fill_bw;
 
-	/* Lets first see if we are full, if so continue with normal rate */
 	rack->r_via_fill_cw = 0;
 	if (ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) > rack->r_ctl.cwnd_to_use)
 		return (slot);
@@ -17551,6 +18723,8 @@
 		/* The rtt is huge, N * smallest, lets not fill */
 		return (slot);
 	}
+	if (rack->r_ctl.fillcw_cap && *rate_wanted >= rack->r_ctl.fillcw_cap)
+		return (slot);
 	/*
 	 * first lets calculate the b/w based on the last us-rtt
 	 * and the the smallest send window.
@@ -17570,26 +18744,47 @@
 	/* Now lets make it into a b/w */
 	fill_bw *= (uint64_t)HPTS_USEC_IN_SEC;
 	fill_bw /= (uint64_t)rack->r_ctl.rc_last_us_rtt;
+	/* Adjust to any cap */
+	if (rack->r_ctl.fillcw_cap && fill_bw >= rack->r_ctl.fillcw_cap)
+		fill_bw = rack->r_ctl.fillcw_cap;
+
 at_lt_bw:
-	if (rack->r_fill_less_agg) {
+	if (rack_bw_multipler > 0) {
 		/*
-		 * We want the average of the rate_wanted
-		 * and our fill-cw calculated bw. We also want
-		 * to cap any increase to be no more than
-		 * X times the lt_bw (where X is the rack_bw_multipler).
+		 * We want to limit fill-cw to the some multiplier
+		 * of the max(lt_bw, gp_est). The normal default
+		 * is 0 for off, so a sysctl has enabled it.
 		 */
-		uint64_t lt_bw, rate;
+		uint64_t lt_bw, gp, rate;
 
+		gp = rack_get_gp_est(rack);
 		lt_bw = rack_get_lt_bw(rack);
-		if (lt_bw > *rate_wanted)
+		if (lt_bw > gp)
 			rate = lt_bw;
 		else
-			rate = *rate_wanted;
-		fill_bw += rate;
-		fill_bw /= 2;
-		if (rack_bw_multipler && (fill_bw > (rate * rack_bw_multipler))) {
-			fill_bw = rate * rack_bw_multipler;
-		}
+			rate = gp;
+		rate *= rack_bw_multipler;
+		rate /= 100;
+		if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
+			union tcp_log_stackspecific log;
+			struct timeval tv;
+
+			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+			log.u_bbr.flex1 = rack_bw_multipler;
+			log.u_bbr.flex2 = len;
+			log.u_bbr.cur_del_rate = gp;
+			log.u_bbr.delRate = lt_bw;
+			log.u_bbr.bw_inuse = rate;
+			log.u_bbr.rttProp = fill_bw;
+			log.u_bbr.flex8 = 44;
+			tcp_log_event(rack->rc_tp, NULL, NULL, NULL,
+				      BBR_LOG_CWND, 0,
+				      0, &log, false, NULL,
+				      __func__, __LINE__, &tv);
+		}
+		if (fill_bw > rate)
+			fill_bw = rate;
 	}
 	/* We are below the min b/w */
 	if (non_paced)
@@ -17638,9 +18833,8 @@
 		}
 	}
 	if (rack->r_ctl.bw_rate_cap && (fill_bw > rack->r_ctl.bw_rate_cap)) {
-		if (rack->rc_hybrid_mode)
-			rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-					   fill_bw, 0, 0, HYBRID_LOG_RATE_CAP, 2, NULL, __LINE__);
+		rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
+				   fill_bw, 0, 0, HYBRID_LOG_RATE_CAP, 2, NULL, __LINE__);
 		fill_bw = rack->r_ctl.bw_rate_cap;
 	}
 	/*
@@ -17659,11 +18853,121 @@
 		return (slot);
 }
 
-static int32_t
-rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, struct rack_sendmap *rsm, uint32_t segsiz)
+static uint32_t
+rack_policer_check_send(struct tcp_rack *rack, uint32_t len, uint32_t segsiz, uint32_t *needs)
 {
-	uint64_t srtt;
-	int32_t slot = 0;
+	uint64_t calc;
+
+	rack->rc_policer_should_pace = 0;
+	calc = rack_policer_bucket_reserve * rack->r_ctl.policer_bucket_size;
+	calc /= 100;
+	/*
+	 * Now lets look at if we want more than is in the bucket <or>
+	 * we want more than is reserved in the bucket.
+	 */
+	if (rack_verbose_logging > 0)
+		policer_detection_log(rack, len, segsiz, calc, rack->r_ctl.current_policer_bucket, 8);
+	if ((calc > rack->r_ctl.current_policer_bucket) ||
+	    (len >= (rack->r_ctl.current_policer_bucket - calc))) {
+		/*
+		 * We may want to pace depending on if we are going
+		 * into the reserve or not.
+		 */
+		uint32_t newlen;
+
+		if (calc > rack->r_ctl.current_policer_bucket) {
+			/*
+			 * This will eat into the reserve if we
+			 * don't have room at all some lines
+			 * below will catch it.
+			 */
+			newlen = rack->r_ctl.policer_max_seg;
+			rack->rc_policer_should_pace = 1;
+		} else {
+			/*
+			 * We have all of the reserve plus something in the bucket
+			 * that we can give out.
+			 */
+			newlen = rack->r_ctl.current_policer_bucket - calc;
+			if (newlen < rack->r_ctl.policer_max_seg) {
+				/*
+				 * Into the reserve to get a full policer_max_seg
+				 * so we set the len to that and eat into
+				 * the reserve. If we go over the code
+				 * below will make us wait.
+				 */
+				newlen = rack->r_ctl.policer_max_seg;
+				rack->rc_policer_should_pace = 1;
+			}
+		}
+		if (newlen > rack->r_ctl.current_policer_bucket) {
+			/* We have to wait some */
+			*needs = newlen - rack->r_ctl.current_policer_bucket;
+			return (0);
+		}
+		if (rack_verbose_logging > 0)
+			policer_detection_log(rack, len, segsiz, newlen, 0, 9);
+		len = newlen;
+	} /* else we have all len available above the reserve */
+	if (rack_verbose_logging > 0)
+		policer_detection_log(rack, len, segsiz, calc, 0, 10);
+	return (len);
+}
+
+static uint32_t
+rack_policed_sending(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, uint32_t segsiz, int call_line)
+{
+	/*
+	 * Given a send of len, and a token bucket set at current_policer_bucket_size
+	 * are we close enough to the end of the bucket that we need to pace? If so
+	 * calculate out a time and return it. Otherwise subtract the tokens from
+	 * the bucket.
+	 */
+	uint64_t calc;
+
+	if ((rack->r_ctl.policer_bw == 0) ||
+	    (rack->r_ctl.policer_bucket_size < segsiz)) {
+		/*
+		 * We should have an estimate here...
+		 */
+		return (0);
+	}
+	calc = (uint64_t)rack_policer_bucket_reserve * (uint64_t)rack->r_ctl.policer_bucket_size;
+	calc /= 100;
+	if ((rack->r_ctl.current_policer_bucket < len) ||
+	    (rack->rc_policer_should_pace == 1) ||
+	    ((rack->r_ctl.current_policer_bucket - len) <= (uint32_t)calc)) {
+		/* we need to pace */
+		uint64_t lentim, res;
+		uint32_t slot;
+
+		lentim = (uint64_t)len * (uint64_t)HPTS_USEC_IN_SEC;
+		res = lentim / rack->r_ctl.policer_bw;
+		slot = (uint32_t)res;
+		if (rack->r_ctl.current_policer_bucket > len)
+			rack->r_ctl.current_policer_bucket -= len;
+		else
+			rack->r_ctl.current_policer_bucket = 0;
+		policer_detection_log(rack, len, slot, (uint32_t)rack_policer_bucket_reserve, call_line, 5);
+		rack->rc_policer_should_pace = 0;
+		return(slot);
+	}
+	/* Just take tokens out of the bucket and let rack do whatever it would have */
+	policer_detection_log(rack, len, 0, (uint32_t)rack_policer_bucket_reserve, call_line, 6);
+	if (len < rack->r_ctl.current_policer_bucket) {
+		rack->r_ctl.current_policer_bucket -= len;
+	} else {
+		rack->r_ctl.current_policer_bucket = 0;
+	}
+	return (0);
+}
+
+
+static int32_t
+rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, struct rack_sendmap *rsm, uint32_t segsiz, int line)
+{
+	uint64_t srtt;
+	int32_t slot = 0;
 	int32_t minslot = 0;
 	int can_start_hw_pacing = 1;
 	int err;
@@ -17674,6 +18978,25 @@
 		pace_one = 1;
 	else
 		pace_one = 0;
+	if (rack->rc_policer_detected == 1) {
+		/*
+		 * A policer has been detected and we
+		 * have all of our data (policer-bw and
+		 * policer bucket size) calculated. Call
+		 * into the function to find out if we are
+		 * overriding the time.
+		 */
+		slot = rack_policed_sending(rack, tp, len, segsiz, line);
+		if (slot) {
+			uint64_t logbw;
+
+			logbw = rack->r_ctl.current_policer_bucket;
+			logbw <<= 32;
+			logbw |= rack->r_ctl.policer_bucket_size;
+			rack_log_pacing_delay_calc(rack, len, slot, rack->r_ctl.policer_bw, logbw, 0, 89, __LINE__, NULL, 0);
+			return(slot);
+		}
+	}
 	if (rack->rc_always_pace == 0) {
 		/*
 		 * We use the most optimistic possible cwnd/srtt for
@@ -18214,6 +19537,16 @@
 		rack->r_ctl.rc_gp_output_ts = my_rsm->r_tim_lastsent[0];
 		tp->gput_ack = tp->gput_seq + rack_get_measure_window(tp, rack);
 		rack->r_ctl.rc_gp_cumack_ts = 0;
+		if ((rack->r_ctl.cleared_app_ack == 1) &&
+		    (SEQ_GEQ(rack->r_ctl.cleared_app_ack, tp->gput_seq))) {
+			/*
+			 * We just cleared an application limited period
+			 * so the next seq out needs to skip the first
+			 * ack.
+			 */
+			rack->app_limited_needs_set = 1;
+			rack->r_ctl.cleared_app_ack = 0;
+		}
 		rack_log_pacing_delay_calc(rack,
 					   tp->gput_seq,
 					   tp->gput_ack,
@@ -19132,7 +20465,6 @@
 		rack->r_late = 0;
 		rack->r_ctl.rc_agg_early = 0;
 	}
-
 	rack_log_output(tp, &to, len, rsm->r_start, flags, error, rack_to_usec_ts(tv),
 			rsm, RACK_SENT_FP, rsm->m, rsm->soff, rsm->r_hw_tls, segsiz);
 	if (doing_tlp) {
@@ -19189,17 +20521,8 @@
 			tcp_rl_log_enobuf(rack->r_ctl.crte);
 		}
 		counter_u64_add(rack_saw_enobuf, 1);
-	} else
-		slot = rack_get_pacing_delay(rack, tp, len, NULL, segsiz);
-	if ((slot == 0) ||
-	    (rack->rc_always_pace == 0) ||
-	    (rack->r_rr_config == 1)) {
-		/*
-		 * We have no pacing set or we
-		 * are using old-style rack or
-		 * we are overridden to use the old 1ms pacing.
-		 */
-		slot = rack->r_ctl.rc_min_to;
+	} else {
+		slot = rack_get_pacing_delay(rack, tp, len, NULL, segsiz, __LINE__);
 	}
 	rack_start_hpts_timer(rack, tp, cts, slot, len, 0);
 #ifdef TCP_ACCOUNTING
@@ -19261,7 +20584,7 @@
 		    (so->so_snd.sb_hiwat / 8 * 7) &&
 		    sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
 		    sendwin >= (sbused(&so->so_snd) -
-		    (tp->snd_nxt - tp->snd_una))) {
+		    (tp->snd_max - tp->snd_una))) {
 			if (rack_autosndbuf_inc)
 				scaleup = (rack_autosndbuf_inc * so->so_snd.sb_hiwat) / 100;
 			else
@@ -19313,7 +20636,7 @@
 	uint32_t s_soff;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
 	uint32_t if_hw_tsomaxsegsize;
-	uint16_t add_flag = RACK_SENT_FP;
+	uint32_t add_flag = RACK_SENT_FP;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 
@@ -19680,6 +21003,22 @@
 		rack->r_ctl.lt_timemark = tcp_tv_to_lusectick(tv);
 		rack->r_ctl.lt_seq = tp->snd_una;
 		rack->lt_bw_up = 1;
+	} else if ((error == 0) &&
+		   (((tp->snd_max + len) - rack->r_ctl.lt_seq) > 0x7fffffff)) {
+		/*
+		 * Need to record what we have since we are
+		 * approaching seq wrap.
+		 */
+		struct timeval tv;
+		uint64_t tmark;
+
+		rack->r_ctl.lt_bw_bytes += (tp->snd_una - rack->r_ctl.lt_seq);
+		rack->r_ctl.lt_seq = tp->snd_una;
+		tmark = tcp_get_u64_usecs(&tv);
+		if (tmark > rack->r_ctl.lt_timemark) {
+			rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
+			rack->r_ctl.lt_timemark = tmark;
+		}
 	}
 	rack_log_output(tp, &to, len, tp->snd_max, flags, error, rack_to_usec_ts(tv),
 			NULL, add_flag, s_mb, s_soff, rack->r_ctl.fsb.hw_tls, segsiz);
@@ -19699,13 +21038,7 @@
 	tp->snd_max += len;
 	tp->snd_nxt = tp->snd_max;
 	if (rack->rc_new_rnd_needed) {
-		/*
-		 * Update the rnd to start ticking not
-		 * that from a time perspective all of
-		 * the preceding idle time is "in the round"
-		 */
-		rack->rc_new_rnd_needed = 0;
-		rack->r_ctl.roundends = tp->snd_max;
+		rack_new_round_starts(tp, rack, tp->snd_max);
 	}
 	{
 		int idx;
@@ -19746,7 +21079,7 @@
 	}
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 	counter_u64_add(rack_fto_send, 1);
-	slot = rack_get_pacing_delay(rack, tp, tot_len, NULL, segsiz);
+	slot = rack_get_pacing_delay(rack, tp, tot_len, NULL, segsiz, __LINE__);
 	rack_start_hpts_timer(rack, tp, cts, slot, tot_len, 0);
 #ifdef TCP_ACCOUNTING
 	crtsc = get_cyclecount();
@@ -19856,7 +21189,7 @@
 		goto restart;
 	}
 	/* Now has it been long enough ? */
-	thresh = rack_calc_thresh_rack(rack, rack_grab_rtt(rack->rc_tp, rack), cts);
+	thresh = rack_calc_thresh_rack(rack, rack_grab_rtt(rack->rc_tp, rack), cts, __LINE__, 1);
 	if ((cts - ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)])) > thresh) {
 		rack_log_collapse(rack, rsm->r_start,
 				  (cts - ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)])),
@@ -19870,6 +21203,25 @@
 	return (NULL);
 }
 
+static void
+rack_credit_back_policer_idle_time(struct tcp_rack *rack, uint64_t idle_t, int line)
+{
+	/*
+	 * We were idle some time (idle_t) and so our policer bucket
+	 * needs to grow. It can go no higher than policer_bucket_size.
+	 */
+	uint64_t len;
+
+	len = idle_t * rack->r_ctl.policer_bw;
+	len /= HPTS_USEC_IN_SEC;
+	rack->r_ctl.current_policer_bucket += (uint32_t)len;
+	if (rack->r_ctl.policer_bucket_size < rack->r_ctl.current_policer_bucket) {
+		rack->r_ctl.current_policer_bucket = rack->r_ctl.policer_bucket_size;
+	}
+	if (rack_verbose_logging > 0)
+		policer_detection_log(rack, (uint32_t)len, line, (uint32_t)idle_t, 0, 7);
+}
+
 static inline void
 rack_validate_sizes(struct tcp_rack *rack, int32_t *len, int32_t segsiz, uint32_t pace_max_seg)
 {
@@ -19931,7 +21283,7 @@
 	unsigned ipsec_optlen = 0;
 
 #endif
-	int32_t idle, sendalot;
+	int32_t idle, sendalot, tot_idle;
 	int32_t sub_from_prr = 0;
 	volatile int32_t sack_rxmit;
 	struct rack_sendmap *rsm = NULL;
@@ -19940,7 +21292,7 @@
 	int32_t slot = 0;
 	int32_t sup_rack = 0;
 	uint32_t cts, ms_cts, delayed, early;
-	uint16_t add_flag = RACK_SENT_SP;
+	uint32_t add_flag = RACK_SENT_SP;
 	/* The doing_tlp flag will be set by the actual rack_timeout_tlp() */
 	uint8_t doing_tlp = 0;
 	uint32_t cwnd_to_use, pace_max_seg;
@@ -20101,12 +21453,16 @@
 		early = rack->r_ctl.rc_last_output_to - cts;
 	} else
 		early = 0;
-	if (delayed) {
+	if (delayed && (rack->rc_always_pace == 1)) {
 		rack->r_ctl.rc_agg_delayed += delayed;
 		rack->r_late = 1;
-	} else if (early) {
+	} else if (early && (rack->rc_always_pace == 1)) {
 		rack->r_ctl.rc_agg_early += early;
 		rack->r_early = 1;
+	} else if (rack->rc_always_pace == 0) {
+		/* Non-paced we are not late */
+		rack->r_ctl.rc_agg_delayed = rack->r_ctl.rc_agg_early = 0;
+		rack->r_early = rack->r_late = 0;
 	}
 	/* Now that early/late accounting is done turn off the flag */
 	rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
@@ -20168,9 +21524,9 @@
 	}
 	if ((tp->snd_una == tp->snd_max) &&
 	    rack->r_ctl.rc_went_idle_time &&
-	    TSTMP_GT(cts, rack->r_ctl.rc_went_idle_time)) {
-		idle = cts - rack->r_ctl.rc_went_idle_time;
-		if (idle > rack_min_probertt_hold) {
+	    (cts > rack->r_ctl.rc_went_idle_time)) {
+		tot_idle = idle = (cts - rack->r_ctl.rc_went_idle_time);
+		if (idle > (uint64_t)rack_min_probertt_hold) {
 			/* Count as a probe rtt */
 			if (rack->in_probe_rtt == 0) {
 				rack->r_ctl.rc_lower_rtt_us_cts = cts;
@@ -20183,17 +21539,75 @@
 		}
 		idle = 0;
 	}
+	if(rack->policer_detect_on) {
+		/*
+		 * If we are doing policer detetion we at a minium
+		 * record the time but if possible add back to
+		 * the bucket based on the idle time.
+		 */
+		uint64_t idle_t, u64_cts;
+
+		segsiz = min(ctf_fixed_maxseg(tp),
+			     rack->r_ctl.rc_pace_min_segs);
+		u64_cts = tcp_tv_to_lusectick(&tv);
+		if ((rack->rc_policer_detected == 1) &&
+		    (rack->r_ctl.policer_bucket_size > segsiz) &&
+		    (rack->r_ctl.policer_bw > 0) &&
+		    (u64_cts > rack->r_ctl.last_sendtime)) {
+			/* We are being policed add back the time */
+			idle_t = u64_cts - rack->r_ctl.last_sendtime;
+			rack_credit_back_policer_idle_time(rack, idle_t, __LINE__);
+		}
+		rack->r_ctl.last_sendtime = u64_cts;
+	}
 	if (rack_use_fsb &&
 	    (rack->r_ctl.fsb.tcp_ip_hdr) &&
 	    (rack->r_fsb_inited == 0) &&
 	    (rack->r_state != TCPS_CLOSED))
 		rack_init_fsb_block(tp, rack, tcp_outflags[tp->t_state]);
+	if (rack->rc_sendvars_notset == 1) {
+		rack->r_ctl.idle_snd_una = tp->snd_una;
+		rack->rc_sendvars_notset = 0;
+		/*
+		 * Make sure any TCP timers (keep-alive) is not running.
+		 */
+		tcp_timer_stop(tp);
+	}
+	if ((rack->rack_no_prr == 1) &&
+	    (rack->rc_always_pace == 0)) {
+		/*
+		 * Sanity check before sending, if we have
+		 * no-pacing enabled and prr is turned off that
+		 * is a logistics error. Correct this by turnning
+		 * prr back on. A user *must* set some form of
+		 * pacing in order to turn PRR off. We do this
+		 * in the output path so that we can avoid socket
+		 * option ordering issues that would occur if we
+		 * tried to do it while setting rack_no_prr on.
+		 */
+		rack->rack_no_prr = 0;
+	}
+	if ((rack->pcm_enabled == 1) &&
+	    (rack->pcm_needed == 0) &&
+	    (tot_idle > 0)) {
+		/*
+		 * We have been idle some micro seconds. We need
+		 * to factor this in to see if a PCM is needed.
+		 */
+		uint32_t rtts_idle, rnds;
+
+		if (tp->t_srtt)
+			rtts_idle = tot_idle / tp->t_srtt;
+		else
+			rtts_idle = 0;
+		rnds = rack->r_ctl.current_round - rack->r_ctl.last_pcm_round;
+		rack->r_ctl.pcm_idle_rounds += rtts_idle;
+		if ((rnds + rack->r_ctl.pcm_idle_rounds)  >= rack_pcm_every_n_rounds) {
+			rack->pcm_needed = 1;
+			rack_log_pcm(rack, 8, rack->r_ctl.last_pcm_round, rtts_idle, rack->r_ctl.current_round );
+		}
+	}
 again:
-	/*
-	 * If we've recently taken a timeout, snd_max will be greater than
-	 * snd_nxt.  There may be SACK information that allows us to avoid
-	 * resending already delivered data.  Adjust snd_nxt accordingly.
-	 */
 	sendalot = 0;
 	cts = tcp_get_usecs(&tv);
 	ms_cts = tcp_tv_to_mssectick(&tv);
@@ -20205,6 +21619,44 @@
 		pace_max_seg = rack->rc_user_set_max_segs * segsiz;
 	else
 		pace_max_seg = rack->r_ctl.rc_pace_max_segs;
+	if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+	    (rack->r_ctl.pcm_max_seg == 0)) {
+		/*
+		 * We set in our first send so we know that the ctf_fixed_maxseg
+		 * has been fully set. If we do it in rack_init() we most likely
+		 * see 512 bytes so we end up at 5120, not desirable.
+		 */
+		rack->r_ctl.pcm_max_seg = rc_init_window(rack);
+		if (rack->r_ctl.pcm_max_seg < (ctf_fixed_maxseg(tp) * 10)) {
+			/*
+			 * Assure our initial PCM probe is at least 10 MSS.
+			 */
+			rack->r_ctl.pcm_max_seg = ctf_fixed_maxseg(tp) * 10;
+		}
+	}
+	if ((rack->r_ctl.pcm_max_seg != 0)  && (rack->pcm_needed == 1)) {
+		uint32_t rw_avail, cwa;
+
+		if (tp->snd_wnd > ctf_outstanding(tp))
+			rw_avail = tp->snd_wnd - ctf_outstanding(tp);
+		else
+			rw_avail = 0;
+		if (tp->snd_cwnd > ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked))
+			cwa = tp->snd_cwnd -ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
+		else
+			cwa = 0;
+		if ((cwa >= rack->r_ctl.pcm_max_seg) &&
+		    (rw_avail > rack->r_ctl.pcm_max_seg)) {
+			/* Raise up the max seg for this trip through */
+			pace_max_seg = rack->r_ctl.pcm_max_seg;
+			/* Disable any fast output */
+			rack->r_fast_output = 0;
+		}
+		if (rack_verbose_logging) {
+			rack_log_pcm(rack, 4,
+				     cwa, rack->r_ctl.pcm_max_seg, rw_avail);
+		}
+	}
 	sb_offset = tp->snd_max - tp->snd_una;
 	cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
 	flags = tcp_outflags[tp->t_state];
@@ -20431,10 +21883,19 @@
 	    ((rsm->r_flags & RACK_HAS_FIN) == 0)) {
 		int ret;
 
+		if ((rack->rc_policer_detected == 1) &&
+		    (rack->r_ctl.policer_bucket_size > segsiz) &&
+		    (rack->r_ctl.policer_bw > 0)) {
+			/* Check to see if there is room */
+			if (rack->r_ctl.current_policer_bucket < len) {
+				goto skip_fast_output;
+			}
+		}
 		ret = rack_fast_rsm_output(tp, rack, rsm, ts_val, cts, ms_cts, &tv, len, doing_tlp);
 		if (ret == 0)
 			return (0);
 	}
+skip_fast_output:
 	so = inp->inp_socket;
 	sb = &so->so_snd;
 	if (do_a_prefetch == 0) {
@@ -20487,28 +21948,19 @@
 		prefetch_rsm = 1;
 	}
 	SOCKBUF_LOCK(sb);
-	/*
-	 * If snd_nxt == snd_max and we have transmitted a FIN, the
-	 * sb_offset will be > 0 even if so_snd.sb_cc is 0, resulting in a
-	 * negative length.  This can also occur when TCP opens up its
-	 * congestion window while receiving additional duplicate acks after
-	 * fast-retransmit because TCP will reset snd_nxt to snd_max after
-	 * the fast-retransmit.
-	 *
-	 * In the normal retransmit-FIN-only case, however, snd_nxt will be
-	 * set to snd_una, the sb_offset will be 0, and the length may wind
-	 * up 0.
-	 *
-	 * If sack_rxmit is true we are retransmitting from the scoreboard
-	 * in which case len is already set.
-	 */
 	if ((sack_rxmit == 0) &&
 	    (TCPS_HAVEESTABLISHED(tp->t_state) || IS_FASTOPEN(tp->t_flags))) {
+		/*
+		 * We are not retransmitting (sack_rxmit is 0) so we
+		 * are sending new data. This is always based on snd_max.
+		 * Now in theory snd_max may be equal to snd_una, if so
+		 * then nothing is outstanding and the offset would be 0.
+		 */
 		uint32_t avail;
 
 		avail = sbavail(sb);
-		if (SEQ_GT(tp->snd_nxt, tp->snd_una) && avail)
-			sb_offset = tp->snd_nxt - tp->snd_una;
+		if (SEQ_GT(tp->snd_max, tp->snd_una) && avail)
+			sb_offset = tp->snd_max - tp->snd_una;
 		else
 			sb_offset = 0;
 		if ((IN_FASTRECOVERY(tp->t_flags) == 0) || rack->rack_no_prr) {
@@ -20632,13 +22084,53 @@
 		kern_prefetch(so, &prefetch_so_done);
 		prefetch_so_done = 1;
 	}
+	orig_len = len;
+	if ((rack->rc_policer_detected == 1) &&
+	    (rack->r_ctl.policer_bucket_size > segsiz) &&
+	    (rack->r_ctl.policer_bw > 0) &&
+	    (len > 0)) {
+		/*
+		 * Ok we believe we have a policer watching
+		 * what we send, can we send len? If not can
+		 * we tune it down to a smaller value?
+		 */
+		uint32_t plen, buck_needs;
+
+		plen = rack_policer_check_send(rack, len, segsiz, &buck_needs);
+		if (plen == 0) {
+			/*
+			 * We are not allowed to send. How long
+			 * do we need to pace for i.e. how long
+			 * before len is available to send?
+			 */
+			uint64_t lentime;
+
+			lentime = buck_needs;
+			lentime *= HPTS_USEC_IN_SEC;
+			lentime /= rack->r_ctl.policer_bw;
+			slot = (uint32_t)lentime;
+			tot_len_this_send = 0;
+			SOCKBUF_UNLOCK(sb);
+			if (rack_verbose_logging > 0)
+				policer_detection_log(rack, len, slot, buck_needs, 0, 12);
+			rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
+			rack_log_type_just_return(rack, cts, 0, slot, hpts_calling, 0, cwnd_to_use);
+			goto just_return_clean;
+		}
+		if (plen < len) {
+			sendalot = 0;
+			len = plen;
+		}
+	}
 	/*
 	 * Lop off SYN bit if it has already been sent.  However, if this is
 	 * SYN-SENT state and if segment contains data and if we don't know
 	 * that foreign host supports TAO, suppress sending segment.
 	 */
-	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una) &&
-	    ((sack_rxmit == 0) && (tp->t_rxtshift == 0))) {
+	if ((flags & TH_SYN) &&
+	    SEQ_GT(tp->snd_max, tp->snd_una) &&
+	    ((sack_rxmit == 0) &&
+	     (tp->t_rxtshift == 0))) {
 		/*
 		 * When sending additional segments following a TFO SYN|ACK,
 		 * do not include the SYN bit.
@@ -20678,7 +22170,6 @@
 	}
 	/* Without fast-open there should never be data sent on a SYN */
 	if ((flags & TH_SYN) && (!IS_FASTOPEN(tp->t_flags))) {
-		tp->snd_nxt = tp->iss;
 		len = 0;
 	}
 	if ((len > segsiz) && (tcp_dsack_block_exists(tp))) {
@@ -20686,22 +22177,10 @@
 		add_flag |= RACK_SENT_W_DSACK;
 		len = segsiz;
 	}
-	orig_len = len;
 	if (len <= 0) {
 		/*
-		 * If FIN has been sent but not acked, but we haven't been
-		 * called to retransmit, len will be < 0.  Otherwise, window
-		 * shrank after we sent into it.  If window shrank to 0,
-		 * cancel pending retransmit, pull snd_nxt back to (closed)
-		 * window, and set the persist timer if it isn't already
-		 * going.  If the window didn't close completely, just wait
-		 * for an ACK.
-		 *
-		 * We also do a general check here to ensure that we will
-		 * set the persist timer when we have data to send, but a
-		 * 0-byte window. This makes sure the persist timer is set
-		 * even if the packet hits one of the "goto send" lines
-		 * below.
+		 * We have nothing to send, or the window shrank, or
+		 * is closed, do we need to go into persists?
 		 */
 		len = 0;
 		if ((tp->snd_wnd == 0) &&
@@ -20859,10 +22338,6 @@
 		if (sack_rxmit) {
 			if ((rsm->r_flags & RACK_HAS_FIN) == 0)
 				flags &= ~TH_FIN;
-		} else {
-			if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
-				   sbused(sb)))
-				flags &= ~TH_FIN;
 		}
 	}
 	recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
@@ -20903,10 +22378,6 @@
 			pass = 4;
 			goto send;
 		}
-		if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {	/* retransmit case */
-			pass = 5;
-			goto send;
-		}
 		if (sack_rxmit) {
 			pass = 6;
 			goto send;
@@ -21014,7 +22485,7 @@
 	 * yet done so, then we need to send.
 	 */
 	if ((flags & TH_FIN) &&
-	    (tp->snd_nxt == tp->snd_una)) {
+	    (tp->snd_max == tp->snd_una)) {
 		pass = 11;
 		goto send;
 	}
@@ -21027,15 +22498,32 @@
 	{
 		int app_limited = CTF_JR_SENT_DATA;
 
+		if ((IS_FASTOPEN(tp->t_flags) == 0) &&
+		    (flags & TH_FIN) &&
+		    (len == 0) &&
+		    (sbused(sb) == (tp->snd_max - tp->snd_una)) &&
+		    ((tp->snd_max - tp->snd_una) <= segsiz)) {
+			/*
+			 * Ok less than or right at a MSS is
+			 * outstanding. The original FreeBSD stack would
+			 * have sent a FIN, which can speed things up for
+			 * a transactional application doing a MSG_WAITALL.
+			 * To speed things up since we do *not* send a FIN
+			 * if data is outstanding, we send a "challenge ack".
+			 * The idea behind that is instead of having to have
+			 * the peer wait for the delayed-ack timer to run off
+			 * we send an ack that makes the peer send us an ack.
+			 */
+			rack_send_ack_challange(rack);
+		}
 		if (tot_len_this_send > 0) {
-			/* Make sure snd_nxt is up to max */
 			rack->r_ctl.fsb.recwin = recwin;
-			slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz);
+			slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
 			if ((error == 0) &&
+			    (rack->rc_policer_detected == 0)  &&
 			    rack_use_rfo &&
 			    ((flags & (TH_SYN|TH_FIN)) == 0) &&
 			    (ipoptlen == 0) &&
-			    (tp->snd_nxt == tp->snd_max) &&
 			    (tp->rcv_numsacks == 0) &&
 			    rack->r_fsb_inited &&
 			    TCPS_HAVEESTABLISHED(tp->t_state) &&
@@ -21052,11 +22540,10 @@
 						       segsiz, pace_max_seg, hw_tls, flags);
 			} else
 				rack->r_fast_output = 0;
-
-
 			rack_log_fsb(rack, tp, so, flags,
 				     ipoptlen, orig_len, len, 0,
 				     1, optlen, __LINE__, 1);
+			/* Assure when we leave that snd_nxt will point to top */
 			if (SEQ_GT(tp->snd_max, tp->snd_nxt))
 				tp->snd_nxt = tp->snd_max;
 		} else {
@@ -21218,6 +22705,7 @@
 		rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, sup_rack);
 		rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling, app_limited, cwnd_to_use);
 	}
+just_return_clean:
 #ifdef NETFLIX_SHARED_CWND
 	if ((sbavail(sb) == 0) &&
 	    rack->r_ctl.rc_scw) {
@@ -21284,13 +22772,39 @@
 		 * is acked first.
 		 */
 		flags &= ~TH_FIN;
+		if ((sbused(sb) == (tp->snd_max - tp->snd_una)) &&
+		    ((tp->snd_max - tp->snd_una) <= segsiz)) {
+			/*
+			 * Ok less than or right at a MSS is
+			 * outstanding. The original FreeBSD stack would
+			 * have sent a FIN, which can speed things up for
+			 * a transactional application doing a MSG_WAITALL.
+			 * To speed things up since we do *not* send a FIN
+			 * if data is outstanding, we send a "challenge ack".
+			 * The idea behind that is instead of having to have
+			 * the peer wait for the delayed-ack timer to run off
+			 * we send an ack that makes the peer send us an ack.
+			 */
+			rack_send_ack_challange(rack);
+		}
 	}
 	/* Enforce stack imposed max seg size if we have one */
-	if (rack->r_ctl.rc_pace_max_segs &&
-	    (len > rack->r_ctl.rc_pace_max_segs)) {
+	if (pace_max_seg &&
+	    (len > pace_max_seg)) {
 		mark = 1;
-		len = rack->r_ctl.rc_pace_max_segs;
+		len = pace_max_seg;
+	}
+	if ((rsm == NULL) &&
+	    (rack->pcm_in_progress == 0) &&
+	    (rack->r_ctl.pcm_max_seg > 0) &&
+	    (len >= rack->r_ctl.pcm_max_seg)) {
+		/* It is large enough for a measurement */
+		add_flag |= RACK_IS_PCM;
+		rack_log_pcm(rack, 5, len, rack->r_ctl.pcm_max_seg,  add_flag);
+	} else if (rack_verbose_logging) {
+		rack_log_pcm(rack, 6, len, rack->r_ctl.pcm_max_seg,  add_flag);
 	}
+
 	SOCKBUF_LOCK_ASSERT(sb);
 	if (len > 0) {
 		if (len >= segsiz)
@@ -21313,6 +22827,24 @@
 #endif
 		hdrlen = sizeof(struct tcpiphdr);
 
+	/*
+	 * Ok what seq are we sending from. If we have
+	 * no rsm to use, then we look at various bits,
+	 * if we are putting out a SYN it will be ISS.
+	 * If we are retransmitting a FIN it will
+	 * be snd_max-1 else its snd_max.
+	 */
+	if (rsm == NULL) {
+		if (flags & TH_SYN)
+			rack_seq = tp->iss;
+		else if ((flags & TH_FIN) &&
+			 (tp->t_flags & TF_SENTFIN))
+			rack_seq = tp->snd_max - 1;
+		else
+			rack_seq = tp->snd_max;
+	} else {
+		rack_seq = rsm->r_start;
+	}
 	/*
 	 * Compute options for segment. We only have to care about SYN and
 	 * established connection segments.  Options for SYN-ACK segments
@@ -21322,7 +22854,6 @@
 	if ((tp->t_flags & TF_NOOPT) == 0) {
 		/* Maximum segment size. */
 		if (flags & TH_SYN) {
-			tp->snd_nxt = tp->iss;
 			to.to_mss = tcp_mssopt(&inp->inp_inc);
 			if (tp->t_port)
 				to.to_mss -= V_tcp_udp_tunneling_overhead;
@@ -21369,14 +22900,47 @@
 		/* Timestamps. */
 		if ((tp->t_flags & TF_RCVD_TSTMP) ||
 		    ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
-			to.to_tsval = ms_cts + tp->ts_offset;
+			uint32_t ts_to_use;
+
+			if ((rack->r_rcvpath_rtt_up == 1) &&
+			    (ms_cts == rack->r_ctl.last_rcv_tstmp_for_rtt)) {
+				/*
+				 * When we are doing a rcv_rtt probe all
+				 * other timestamps use the next msec. This
+				 * is safe since our previous ack is in the
+				 * air and we will just have a few more
+				 * on the next ms. This assures that only
+				 * the one ack has the ms_cts that was on
+				 * our ack-probe.
+				 */
+				ts_to_use = ms_cts + 1;
+			} else {
+				ts_to_use = ms_cts;
+			}
+			to.to_tsval = ts_to_use + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
+			if ((len == 0) &&
+			    (TCPS_HAVEESTABLISHED(tp->t_state)) &&
+			    ((ms_cts - rack->r_ctl.last_rcv_tstmp_for_rtt) > RCV_PATH_RTT_MS) &&
+			    (tp->snd_una == tp->snd_max) &&
+			    (flags & TH_ACK) &&
+			    (sbavail(sb) == 0) &&
+			    (rack->r_ctl.current_round != 0) &&
+			    ((flags & (TH_SYN|TH_FIN)) == 0) &&
+			    (rack->r_rcvpath_rtt_up == 0)) {
+				rack->r_ctl.last_rcv_tstmp_for_rtt = ms_cts;
+				rack->r_ctl.last_time_of_arm_rcv = cts;
+				rack->r_rcvpath_rtt_up = 1;
+				/* Subtract 1 from seq to force a response */
+				rack_seq--;
+			}
 		}
 		/* Set receive buffer autosizing timestamp. */
 		if (tp->rfbuf_ts == 0 &&
-		    (so->so_rcv.sb_flags & SB_AUTOSIZE))
-			tp->rfbuf_ts = tcp_ts_getticks();
+		    (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
+			tp->rfbuf_ts = ms_cts;
+		}
 		/* Selective ACK's. */
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (flags & TH_SYN)
@@ -21544,7 +23108,24 @@
 	    (sbused(sb))) {
 		/*
 		 * We have outstanding data, don't send a fin by itself!.
+		 *
+		 * Check to see if we need to send a challenge ack.
 		 */
+		if ((sbused(sb) == (tp->snd_max - tp->snd_una)) &&
+		    ((tp->snd_max - tp->snd_una) <= segsiz)) {
+			/*
+			 * Ok less than or right at a MSS is
+			 * outstanding. The original FreeBSD stack would
+			 * have sent a FIN, which can speed things up for
+			 * a transactional application doing a MSG_WAITALL.
+			 * To speed things up since we do *not* send a FIN
+			 * if data is outstanding, we send a "challenge ack".
+			 * The idea behind that is instead of having to have
+			 * the peer wait for the delayed-ack timer to run off
+			 * we send an ack that makes the peer send us an ack.
+			 */
+			rack_send_ack_challange(rack);
+		}
 		goto just_return;
 	}
 	/*
@@ -21557,10 +23138,8 @@
 		uint32_t max_val;
 		uint32_t moff;
 
-		if (rack->r_ctl.rc_pace_max_segs)
-			max_val = rack->r_ctl.rc_pace_max_segs;
-		else if (rack->rc_user_set_max_segs)
-			max_val = rack->rc_user_set_max_segs * segsiz;
+		if (pace_max_seg)
+			max_val = pace_max_seg;
 		else
 			max_val = len;
 		/*
@@ -21596,16 +23175,28 @@
 		if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) {
 			m_copydata(mb, moff, (int)len,
 				   mtod(m, caddr_t)+hdrlen);
-			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+			/*
+			 * If we are not retransmitting advance the
+			 * sndptr to help remember the next place in
+			 * the sb.
+			 */
+			if (rsm == NULL)
 				sbsndptr_adv(sb, mb, len);
 			m->m_len += len;
 		} else {
 			struct sockbuf *msb;
 
-			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
-				msb = NULL;
-			else
+			/*
+			 * If we are not retransmitting pass in msb so
+			 * the socket buffer can be advanced. Otherwise
+			 * set it to NULL if its a retransmission since
+			 * we don't want to change the sb remembered
+			 * location.
+			 */
+			if (rsm == NULL)
 				msb = sb;
+			else
+				msb = NULL;
 			m->m_next = tcp_m_copym(
 				mb, moff, &len,
 				if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb,
@@ -21631,7 +23222,7 @@
 				goto out;
 			}
 		}
-		if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
+		if (sack_rxmit) {
 			if (rsm && (rsm->r_flags & RACK_TLP)) {
 				/*
 				 * TLP should not count in retran count, but
@@ -21750,14 +23341,6 @@
 #endif
 		}
 	}
-	/*
-	 * Fill in fields, remembering maximum advertised window for use in
-	 * delaying messages about window sizes. If resending a FIN, be sure
-	 * not to use a new sequence number.
-	 */
-	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
-	    tp->snd_nxt == tp->snd_max)
-		tp->snd_nxt--;
 	/*
 	 * If we are starting a connection, send ECN setup SYN packet. If we
 	 * are on a retransmit, we may resend those bits a number of times
@@ -21787,29 +23370,7 @@
 #endif
 		}
 	}
-	/*
-	 * If we are doing retransmissions, then snd_nxt will not reflect
-	 * the first unsent octet.  For ACK only packets, we do not want the
-	 * sequence number of the retransmitted packet, we want the sequence
-	 * number of the next unsent octet.  So, if there is no data (and no
-	 * SYN or FIN), use snd_max instead of snd_nxt when filling in
-	 * ti_seq.  But if we are in persist state, snd_max might reflect
-	 * one byte beyond the right edge of the window, so use snd_nxt in
-	 * that case, since we know we aren't doing a retransmission.
-	 * (retransmit and persist are mutually exclusive...)
-	 */
-	if (sack_rxmit == 0) {
-		if (len || (flags & (TH_SYN | TH_FIN))) {
-			th->th_seq = htonl(tp->snd_nxt);
-			rack_seq = tp->snd_nxt;
-		} else {
-			th->th_seq = htonl(tp->snd_max);
-			rack_seq = tp->snd_max;
-		}
-	} else {
-		th->th_seq = htonl(rsm->r_start);
-		rack_seq = rsm->r_start;
-	}
+	th->th_seq = htonl(rack_seq);
 	th->th_ack = htonl(tp->rcv_nxt);
 	tcp_set_flags(th, flags);
 	/*
@@ -22170,6 +23731,13 @@
 			rack_to_usec_ts(&tv),
 			rsm, add_flag, s_mb, s_moff, hw_tls, segsiz);
 	if (error == 0) {
+		if (add_flag & RACK_IS_PCM) {
+			/* We just launched a PCM */
+			/* rrs here log */
+			rack->pcm_in_progress = 1;
+			rack->pcm_needed = 0;
+			rack_log_pcm(rack, 7, len, rack->r_ctl.pcm_max_seg,  add_flag);
+		}
 		if (rsm == NULL) {
 			if (rack->lt_bw_up == 0) {
 				rack->r_ctl.lt_timemark = tcp_tv_to_lusectick(&tv);
@@ -22184,9 +23752,11 @@
 
 				rack->r_ctl.lt_bw_bytes += (tp->snd_una - rack->r_ctl.lt_seq);
 				rack->r_ctl.lt_seq = tp->snd_una;
-				tmark = tcp_tv_to_lusectick(&tv);
-				rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
-				rack->r_ctl.lt_timemark = tmark;
+				tmark = tcp_get_u64_usecs(&tv);
+				if (tmark > rack->r_ctl.lt_timemark) {
+					rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
+					rack->r_ctl.lt_timemark = tmark;
+				}
 			}
 		}
 		rack->forced_ack = 0;	/* If we send something zap the FA flag */
@@ -22256,15 +23826,17 @@
 	    (len > 0) &&
 	    (tp->snd_una == tp->snd_max))
 		rack->r_ctl.rc_tlp_rxt_last_time = cts;
+
 	{
-		tcp_seq startseq = tp->snd_nxt;
+		/*
+		 * This block is not associated with the above error == 0 test.
+		 * It is used to advance snd_max if we have a new transmit.
+		 */
+		tcp_seq startseq = tp->snd_max;
+
 
-		/* Track our lost count */
 		if (rsm && (doing_tlp == 0))
 			rack->r_ctl.rc_loss_count += rsm->r_end - rsm->r_start;
-		/*
-		 * Advance snd_nxt over sequence space of this segment.
-		 */
 		if (error)
 			/* We don't log or do anything with errors */
 			goto nomore;
@@ -22287,53 +23859,53 @@
 			rack->rc_tlp_in_progress = 1;
 			rack->r_ctl.rc_tlp_cnt_out++;
 		}
-		if (flags & (TH_SYN | TH_FIN)) {
-			if (flags & TH_SYN)
-				tp->snd_nxt++;
-			if (flags & TH_FIN) {
-				tp->snd_nxt++;
-				tp->t_flags |= TF_SENTFIN;
-			}
-		}
-		/* In the ENOBUFS case we do *not* update snd_max */
+		/*
+		 * If we are retransmitting we are done, snd_max
+		 * does not get updated.
+		 */
 		if (sack_rxmit)
 			goto nomore;
-
-		tp->snd_nxt += len;
-		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
-			if (tp->snd_una == tp->snd_max) {
-				/*
-				 * Update the time we just added data since
-				 * none was outstanding.
-				 */
-				rack_log_progress_event(rack, tp, ticks, PROGRESS_START, __LINE__);
-				tp->t_acktime = ticks;
-			}
-			tp->snd_max = tp->snd_nxt;
-			if (rack->rc_new_rnd_needed) {
-				/*
-				 * Update the rnd to start ticking not
-				 * that from a time perspective all of
-				 * the preceding idle time is "in the round"
-				 */
-				rack->rc_new_rnd_needed = 0;
-				rack->r_ctl.roundends = tp->snd_max;
-			}
+		if ((tp->snd_una == tp->snd_max) && (len > 0)) {
 			/*
-			 * Time this transmission if not a retransmission and
-			 * not currently timing anything.
-			 * This is only relevant in case of switching back to
-			 * the base stack.
+			 * Update the time we just added data since
+			 * nothing was outstanding.
 			 */
-			if (tp->t_rtttime == 0) {
-				tp->t_rtttime = ticks;
-				tp->t_rtseq = startseq;
-				KMOD_TCPSTAT_INC(tcps_segstimed);
+			rack_log_progress_event(rack, tp, ticks, PROGRESS_START, __LINE__);
+			tp->t_acktime = ticks;
+		}
+		/*
+		 * Now for special SYN/FIN handling.
+		 */
+		if (flags & (TH_SYN | TH_FIN)) {
+			if ((flags & TH_SYN) &&
+			    ((tp->t_flags & TF_SENTSYN) == 0)) {
+				tp->snd_max++;
+				tp->t_flags |= TF_SENTSYN;
 			}
-			if (len &&
-			    ((tp->t_flags & TF_GPUTINPROG) == 0))
-				rack_start_gp_measurement(tp, rack, startseq, sb_offset);
+			if ((flags & TH_FIN) &&
+			    ((tp->t_flags & TF_SENTFIN) == 0)) {
+				tp->snd_max++;
+				tp->t_flags |= TF_SENTFIN;
+			}
+		}
+		tp->snd_max += len;
+		if (rack->rc_new_rnd_needed) {
+			rack_new_round_starts(tp, rack, tp->snd_max);
+		}
+		/*
+		 * Time this transmission if not a retransmission and
+		 * not currently timing anything.
+		 * This is only relevant in case of switching back to
+		 * the base stack.
+		 */
+		if (tp->t_rtttime == 0) {
+			tp->t_rtttime = ticks;
+			tp->t_rtseq = startseq;
+			KMOD_TCPSTAT_INC(tcps_segstimed);
 		}
+		if (len &&
+		    ((tp->t_flags & TF_GPUTINPROG) == 0))
+			rack_start_gp_measurement(tp, rack, startseq, sb_offset);
 		/*
 		 * If we are doing FO we need to update the mbuf position and subtract
 		 * this happens when the peer sends us duplicate information and
@@ -22356,6 +23928,47 @@
 				rack->r_ctl.fsb.o_t_len = M_TRAILINGROOM(rack->r_ctl.fsb.m);
 			}
 		}
+		if (rack_pcm_blast == 0) {
+			if ((orig_len > len) &&
+			    (add_flag & RACK_IS_PCM) &&
+			    (len < pace_max_seg) &&
+			    ((pace_max_seg - len) > segsiz)) {
+				/*
+				 * We are doing a PCM measurement and we did
+				 * not get enough data in the TSO to meet the
+				 * burst requirement.
+				 */
+				uint32_t n_len;
+
+				n_len = (orig_len - len);
+				orig_len -= len;
+				pace_max_seg -= len;
+				len = n_len;
+				sb_offset = tp->snd_max - tp->snd_una;
+				/* Re-lock for the next spin */
+				SOCKBUF_LOCK(sb);
+				goto send;
+			}
+		} else {
+			if ((orig_len > len) &&
+			    (add_flag & RACK_IS_PCM) &&
+			    ((orig_len - len) > segsiz)) {
+				/*
+				 * We are doing a PCM measurement and we did
+				 * not get enough data in the TSO to meet the
+				 * burst requirement.
+				 */
+				uint32_t n_len;
+
+				n_len = (orig_len - len);
+				orig_len -= len;
+				len = n_len;
+				sb_offset = tp->snd_max - tp->snd_una;
+				/* Re-lock for the next spin */
+				SOCKBUF_LOCK(sb);
+				goto send;
+			}
+		}
 	}
 nomore:
 	if (error) {
@@ -22488,14 +24101,10 @@
 enobufs:
 	if (sendalot) {
 		/* Do we need to turn off sendalot? */
-		if (rack->r_ctl.rc_pace_max_segs &&
-		    (tot_len_this_send >= rack->r_ctl.rc_pace_max_segs)) {
+		if (pace_max_seg &&
+		    (tot_len_this_send >= pace_max_seg)) {
 			/* We hit our max. */
 			sendalot = 0;
-		} else if ((rack->rc_user_set_max_segs) &&
-			   (tot_len_this_send >= (rack->rc_user_set_max_segs * segsiz))) {
-			/* We hit the user defined max */
-			sendalot = 0;
 		}
 	}
 	if ((error == 0) && (flags & TH_FIN))
@@ -22515,22 +24124,7 @@
 		 * hit the else if with slot preset. Other
 		 * errors return.
 		 */
-		slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, rsm, segsiz);
-	}
-	if (rsm &&
-	    (rsm->r_flags & RACK_HAS_SYN) == 0 &&
-	    rack->use_rack_rr) {
-		/* Its a retransmit and we use the rack cheat? */
-		if ((slot == 0) ||
-		    (rack->rc_always_pace == 0) ||
-		    (rack->r_rr_config == 1)) {
-			/*
-			 * We have no pacing set or we
-			 * are using old-style rack or
-			 * we are overridden to use the old 1ms pacing.
-			 */
-			slot = rack->r_ctl.rc_min_to;
-		}
+		slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, rsm, segsiz, __LINE__);
 	}
 	/* We have sent clear the flag */
 	rack->r_ent_rec_ns = 0;
@@ -22568,9 +24162,9 @@
 		    rack_use_rfo &&
 		    ((flags & (TH_SYN|TH_FIN)) == 0) &&
 		    (rsm == NULL) &&
-		    (tp->snd_nxt == tp->snd_max) &&
 		    (ipoptlen == 0) &&
 		    (tp->rcv_numsacks == 0) &&
+		    (rack->rc_policer_detected == 0)  &&
 		    rack->r_fsb_inited &&
 		    TCPS_HAVEESTABLISHED(tp->t_state) &&
 		    ((IN_RECOVERY(tp->t_flags)) == 0) &&
@@ -22599,7 +24193,6 @@
 		    (rsm == NULL) &&
 		    (ipoptlen == 0) &&
 		    (tp->rcv_numsacks == 0) &&
-		    (tp->snd_nxt == tp->snd_max) &&
 		    (rack->r_must_retran == 0) &&
 		    rack->r_fsb_inited &&
 		    TCPS_HAVEESTABLISHED(tp->t_state) &&
@@ -22625,8 +24218,8 @@
 		}
 		goto again;
 	}
-	/* Assure when we leave that snd_nxt will point to top */
 skip_all_send:
+	/* Assure when we leave that snd_nxt will point to top */
 	if (SEQ_GT(tp->snd_max, tp->snd_nxt))
 		tp->snd_nxt = tp->snd_max;
 	rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, 0);
@@ -22705,14 +24298,26 @@
 static int
 rack_set_dgp(struct tcp_rack *rack)
 {
-	/* pace_always=1 */
-	if (rack->rc_always_pace == 0) {
-		if (tcp_can_enable_pacing() == 0)
-			return (EBUSY);
+	if (rack->dgp_on == 1)
+		return(0);
+	if ((rack->use_fixed_rate == 1) &&
+	    (rack->rc_always_pace == 1)) {
+		/*
+		 * We are already pacing another
+		 * way.
+		 */
+		return (EBUSY);
+	}
+	if (rack->rc_always_pace == 1) {
+		rack_remove_pacing(rack);
 	}
+	if (tcp_incr_dgp_pacing_cnt() == 0)
+		return (ENOSPC);
+	rack->r_ctl.pacing_method |= RACK_DGP_PACING;
 	rack->rc_fillcw_apply_discount = 0;
 	rack->dgp_on = 1;
 	rack->rc_always_pace = 1;
+	rack->rc_pace_dnd = 1;
 	rack->use_fixed_rate = 0;
 	if (rack->gp_ready)
 		rack_set_cc_pacing(rack);
@@ -22737,14 +24342,7 @@
 	/* npush=2 */
 	rack->r_ctl.rc_no_push_at_mrtt = 2;
 	/* fillcw=1 */
-	if (rack->r_cwnd_was_clamped == 0) {
-		rack->rc_pace_to_cwnd = 1;
-	} else {
-		rack->rc_pace_to_cwnd = 0;
-		/* Reset all multipliers to 100.0 so just the measured bw */
-		rack->r_ctl.rack_per_of_gp_ss = 100;
-		rack->r_ctl.rack_per_of_gp_ca = 100;
-	}
+	rack->rc_pace_to_cwnd = 1;
 	rack->rc_pace_fill_if_rttin_range = 0;
 	rack->rtt_limit_mul = 0;
 	/* noprr=1 */
@@ -22753,12 +24351,9 @@
 	rack->r_limit_scw = 1;
 	/* gp_inc_rec */
 	rack->r_ctl.rack_per_of_gp_rec = 90;
-	rack_client_buffer_level_set(rack);
 	return (0);
 }
 
-
-
 static int
 rack_set_profile(struct tcp_rack *rack, int prof)
 {
@@ -22768,72 +24363,37 @@
 		 * Profile 1 is "standard" DGP. It ignores
 		 * client buffer level.
 		 */
-		rack->r_ctl.rc_dgp_bl_agg = DGP_LEVEL0;
 		err = rack_set_dgp(rack);
 		if (err)
 			return (err);
-	} else if (prof == 2) {
-		/*
-		 * Profile 2 is DGP. Less aggressive with
-		 * respect to client buffer level.
-		 */
-		rack->r_ctl.rc_dgp_bl_agg = DGP_LEVEL1;
+	} else if (prof == 6) {
 		err = rack_set_dgp(rack);
 		if (err)
 			return (err);
-	} else if (prof == 3) {
 		/*
-		 * Profile 3 is DGP. Even Less aggressive with
-		 * respect to client buffer level.
-		 */
-		rack->r_ctl.rc_dgp_bl_agg = DGP_LEVEL2;
-		err = rack_set_dgp(rack);
-		if (err)
-			return (err);
-	} else if (prof == 4) {
-		/*
-		 * Profile 4 is DGP with the most responsiveness
-		 * to client buffer level.
-		 */
-		rack->r_ctl.rc_dgp_bl_agg = DGP_LEVEL3;
-		err = rack_set_dgp(rack);
-		if (err)
-			return (err);
-	} else if (prof == 5) {
-		err = rack_set_dgp(rack);
-		if (err)
-			return (err);
-		/*
-		 * By turning DGP off we change the rate
-		 * picked to be only the one the cwnd and rtt
-		 * get us.
-		 */
-		rack->dgp_on = 0;
-	} else if (prof == 6) {
-		err = rack_set_dgp(rack);
-		if (err)
-			return (err);
-		/*
-		 * Profile 6 tweaks DGP so that it will apply to
-		 * fill-cw the same settings that profile5 does
-		 * to replace DGP. It gets then the max(dgp-rate, fillcw(discounted).
+		 * Profile 6 tweaks DGP so that it will apply to
+		 * fill-cw the same settings that profile5 does
+		 * to replace DGP. It gets then the max(dgp-rate, fillcw(discounted).
 		 */
 		rack->rc_fillcw_apply_discount = 1;
 	} else if (prof == 0) {
 		/* This changes things back to the default settings */
-		rack->dgp_on = 0;
-		rack->rc_hybrid_mode = 0;
+		if (rack->rc_always_pace == 1) {
+			rack_remove_pacing(rack);
+		} else {
+			/* Make sure any stray flags are off */
+			rack->dgp_on = 0;
+			rack->rc_hybrid_mode = 0;
+			rack->use_fixed_rate = 0;
+		}
 		err = 0;
 		if (rack_fill_cw_state)
 			rack->rc_pace_to_cwnd = 1;
 		else
 			rack->rc_pace_to_cwnd = 0;
-		if (rack->rc_always_pace) {
-			tcp_decrement_paced_conn();
-			rack_undo_cc_pacing(rack);
-			rack->rc_always_pace = 0;
-		}
+
 		if (rack_pace_every_seg && tcp_can_enable_pacing()) {
+			rack->r_ctl.pacing_method |= RACK_REG_PACING;
 			rack->rc_always_pace = 1;
 			if (rack->rack_hibeta)
 				rack_set_cc_pacing(rack);
@@ -22883,7 +24443,6 @@
 		}
 		rack->r_rr_config = 0;
 		rack->r_ctl.rc_no_push_at_mrtt = 0;
-		rack->rc_pace_to_cwnd = 0;
 		rack->rc_pace_fill_if_rttin_range = 0;
 		rack->rtt_limit_mul = 0;
 
@@ -22911,7 +24470,7 @@
 	struct deferred_opt_list *dol;
 
 	dol = malloc(sizeof(struct deferred_opt_list),
-		     M_TCPFSB, M_NOWAIT|M_ZERO);
+		     M_TCPDO, M_NOWAIT|M_ZERO);
 	if (dol == NULL) {
 		/*
 		 * No space yikes -- fail out..
@@ -22935,19 +24494,6 @@
 
 	microuptime(&tv);
 
-	/*
-	 * If BB logging is not on we need to look at the DTL flag.
-	 * If its on already then those reasons override the DTL input.
-	 * We do this with any request, you can turn DTL on, but it does
-	 * not turn off at least from hybrid pacing requests.
-	 */
-	if (tcp_bblogging_on(rack->rc_tp) == 0) {
-		if (hybrid->hybrid_flags & TCP_HYBRID_PACING_DTL) {
-			/* Turn on BB point logging  */
-			tcp_set_bblog_state(rack->rc_tp, TCP_LOG_VIA_BBPOINTS,
-					    TCP_BBPOINT_REQ_LEVEL_LOGGING);
-		}
-	}
 	/* Make sure no fixed rate is on */
 	rack->use_fixed_rate = 0;
 	rack->r_ctl.rc_fixed_pacing_rate_rec = 0;
@@ -22962,6 +24508,8 @@
 		rack_log_hybrid(rack, seq, NULL, HYBRID_LOG_NO_ROOM, __LINE__, 0);
 		return (ENOSPC);
 	}
+	/* mask our internal flags */
+	hybrid->hybrid_flags &= TCP_HYBRID_PACING_USER_MASK;
 	/* The seq will be snd_una + everything in the buffer */
 	seq = sft->start_seq;
 	if ((hybrid->hybrid_flags & TCP_HYBRID_PACING_ENABLE) == 0) {
@@ -22986,6 +24534,26 @@
 			return (err);
 		}
 	}
+	/*
+	 * Now we must switch to hybrid mode as well which also
+	 * means moving to regular pacing.
+	 */
+	if (rack->rc_hybrid_mode == 0) {
+		/* First time */
+		if (tcp_can_enable_pacing()) {
+			rack->r_ctl.pacing_method |= RACK_REG_PACING;
+			rack->rc_hybrid_mode = 1;
+		} else {
+			return (ENOSPC);
+		}
+		if (rack->r_ctl.pacing_method & RACK_DGP_PACING) {
+			/*
+			 * This should be true.
+			 */
+			tcp_dec_dgp_pacing_cnt();
+			rack->r_ctl.pacing_method &= ~RACK_DGP_PACING;
+		}
+	}
 	/* Now set in our flags */
 	sft->hybrid_flags = hybrid->hybrid_flags | TCP_HYBRID_PACING_WASSET;
 	if (hybrid->hybrid_flags & TCP_HYBRID_PACING_CSPR)
@@ -22996,7 +24564,6 @@
 		sft->hint_maxseg = hybrid->hint_maxseg;
 	else
 		sft->hint_maxseg = 0;
-	rack->rc_hybrid_mode = 1;
 	rack->rc_tp->tcp_hybrid_start++;
 	rack_log_hybrid(rack, seq, sft, HYBRID_LOG_RULES_SET, __LINE__,0);
 	return (0);
@@ -23005,6 +24572,36 @@
 #endif
 }
 
+static int
+rack_stack_information(struct tcpcb *tp, struct stack_specific_info *si)
+{
+	/*
+	 * Gather rack specific information.
+	 */
+	struct tcp_rack *rack;
+
+	rack = (struct tcp_rack *)tp->t_fb_ptr;
+	/* We pulled a SSI info log out what was there */
+	policer_detection_log(rack, rack->rc_highly_buffered, 0, 0, 0, 20);
+	if (rack->policer_detect_on) {
+		si->policer_detection_enabled = 1;
+		if (rack->rc_policer_detected) {
+			si->policer_detected = 1;
+			si->policer_bucket_size = rack->r_ctl.policer_bucket_size;
+			si->policer_last_bw = rack->r_ctl.policer_bw;
+		} else {
+			si->policer_detected = 0;
+			si->policer_bucket_size = 0;
+			si->policer_last_bw = 0;
+		}
+		si->current_round = rack->r_ctl.current_round;
+		si->highly_buffered = rack->rc_highly_buffered;
+	}
+	si->bytes_transmitted = tp->t_sndbytes;
+	si->bytes_retransmitted = tp->t_snd_rxt_bytes;
+	return (0);
+}
+
 static int
 rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
 		    uint32_t optval, uint64_t loptval, struct tcp_hybrid_req *hybrid)
@@ -23077,34 +24674,7 @@
 		}
 		break;
 	case TCP_RACK_PACING_BETA:
-		RACK_OPTS_INC(tcp_rack_beta);
-		if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) {
-			/* This only works for newreno. */
-			error = EINVAL;
-			break;
-		}
-		if (rack->rc_pacing_cc_set) {
-			/*
-			 * Set them into the real CC module
-			 * whats in the rack pcb is the old values
-			 * to be used on restoral/
-			 */
-			sopt.sopt_dir = SOPT_SET;
-			opt.name = CC_NEWRENO_BETA;
-			opt.val = optval;
-			if (CC_ALGO(tp)->ctl_output != NULL)
-				error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
-			else {
-				error = ENOENT;
-				break;
-			}
-		} else {
-			/*
-			 * Not pacing yet so set it into our local
-			 * rack pcb storage.
-			 */
-			rack->r_ctl.rc_saved_beta.beta = optval;
-		}
+		error = EINVAL;
 		break;
 	case TCP_RACK_TIMER_SLOP:
 		RACK_OPTS_INC(tcp_rack_timer_slop);
@@ -23188,8 +24758,29 @@
 		else
 			rack->r_up_only = 0;
 		break;
+	case TCP_FILLCW_RATE_CAP:		/*  URL:fillcw_cap */
+		RACK_OPTS_INC(tcp_fillcw_rate_cap);
+		rack->r_ctl.fillcw_cap = loptval;
+		break;
 	case TCP_PACING_RATE_CAP:
 		RACK_OPTS_INC(tcp_pacing_rate_cap);
+		if ((rack->dgp_on == 1) &&
+		    (rack->r_ctl.pacing_method & RACK_DGP_PACING)) {
+			/*
+			 * If we are doing DGP we need to switch
+			 * to using the pacing limit.
+			 */
+			if (tcp_can_enable_pacing() == 0) {
+				error = ENOSPC;
+				break;
+			}
+			/*
+			 * Now change up the flags and counts to be correct.
+			 */
+			rack->r_ctl.pacing_method |= RACK_REG_PACING;
+			tcp_dec_dgp_pacing_cnt();
+			rack->r_ctl.pacing_method &= ~RACK_DGP_PACING;
+		}
 		rack->r_ctl.bw_rate_cap = loptval;
 		break;
 	case TCP_HYBRID_PACING:
@@ -23197,8 +24788,18 @@
 			error = EINVAL;
 			break;
 		}
+		if (rack->r_ctl.side_chan_dis_mask & HYBRID_DIS_MASK) {
+			error = EPERM;
+			break;
+		}
 		error = process_hybrid_pacing(rack, hybrid);
 		break;
+	case TCP_SIDECHAN_DIS:			/*  URL:scodm */
+		if (optval)
+			rack->r_ctl.side_chan_dis_mask = optval;
+		else
+			rack->r_ctl.side_chan_dis_mask = 0;
+		break;
 	case TCP_RACK_PROFILE:
 		RACK_OPTS_INC(tcp_profile);
 		error = rack_set_profile(rack, optval);
@@ -23224,15 +24825,37 @@
 			rack->r_limit_scw = 0;
 		break;
 	case TCP_RACK_DGP_IN_REC:
-		RACK_OPTS_INC(tcp_dgp_in_rec);
-		if (optval)
-			rack->r_ctl.full_dgp_in_rec = 1;
-		else
-			rack->r_ctl.full_dgp_in_rec = 0;
+		error = EINVAL;
+		break;
+	case TCP_POLICER_DETECT:		/*  URL:pol_det */
+		RACK_OPTS_INC(tcp_pol_detect);
+		rack_translate_policer_detect(rack, optval);
 		break;
-	case TCP_RXT_CLAMP:
-		RACK_OPTS_INC(tcp_rxt_clamp);
-		rack_translate_clamp_value(rack, optval);
+	case TCP_POLICER_MSS:
+		RACK_OPTS_INC(tcp_pol_mss);
+		rack->r_ctl.policer_del_mss = (uint8_t)optval;
+		if (optval & 0x00000100) {
+			/*
+			 * Value is setup like so:
+			 * VVVV VVVV VVVV VVVV VVVV VVAI MMMM MMMM
+			 * Where MMMM MMMM is MSS setting
+			 * I (9th bit) is the Postive value that
+			 * says it is being set (if its 0 then the
+			 * upper bits 11 - 32 have no meaning.
+			 * This allows setting it off with
+			 * 0x000001MM.
+			 *
+			 * The 10th bit is used to turn on the
+			 * alternate median (not the expanded one).
+			 *
+			 */
+			rack->r_ctl.pol_bw_comp = (optval >> 10);
+		}
+		if (optval & 0x00000200) {
+			rack->r_ctl.policer_alt_median = 1;
+		} else {
+			rack->r_ctl.policer_alt_median = 0;
+		}
 		break;
  	case TCP_RACK_PACE_TO_FILL:
 		RACK_OPTS_INC(tcp_fillcw);
@@ -23240,8 +24863,6 @@
 			rack->rc_pace_to_cwnd = 0;
 		else {
 			rack->rc_pace_to_cwnd = 1;
-			if (optval > 1)
-				rack->r_fill_less_agg = 1;
 		}
 		if ((optval >= rack_gp_rtt_maxmul) &&
 		    rack_gp_rtt_maxmul &&
@@ -23299,6 +24920,12 @@
 		else
 			error = EINVAL;
 		break;
+	case RACK_CSPR_IS_FCC:			/*  URL:csprisfcc */
+		if (optval > 0)
+			rack->cspr_is_fcc = 1;
+		else
+			rack->cspr_is_fcc = 0;
+		break;
 	case TCP_TIMELY_DYN_ADJ:
 		RACK_OPTS_INC(tcp_timely_dyn);
 		if (optval == 0)
@@ -23341,11 +24968,16 @@
 		 * method using a pacing rate.
 		 */
 		RACK_OPTS_INC(tcp_rack_pace_always);
+		if (rack->r_ctl.side_chan_dis_mask & CCSP_DIS_MASK) {
+			error = EPERM;
+			break;
+		}
 		if (optval > 0) {
 			if (rack->rc_always_pace) {
 				error = EALREADY;
 				break;
 			} else if (tcp_can_enable_pacing()) {
+				rack->r_ctl.pacing_method |= RACK_REG_PACING;
 				rack->rc_always_pace = 1;
 				if (rack->rack_hibeta)
 					rack_set_cc_pacing(rack);
@@ -23355,10 +24987,8 @@
 				break;
 			}
 		} else {
-			if (rack->rc_always_pace) {
-				tcp_decrement_paced_conn();
-				rack->rc_always_pace = 0;
-				rack_undo_cc_pacing(rack);
+			if (rack->rc_always_pace == 1) {
+				rack_remove_pacing(rack);
 			}
 		}
 		if  (rack->r_mbuf_queue || rack->rc_always_pace || rack->r_use_cmp_ack)
@@ -23375,58 +25005,11 @@
 		val *= 1000;
 		val /= 8;
 		rack->r_ctl.init_rate = val;
-		if (rack->rc_init_win != rack_default_init_window) {
-			uint32_t win, snt;
-
-			/*
-			 * Options don't always get applied
-			 * in the order you think. So in order
-			 * to assure we update a cwnd we need
-			 * to check and see if we are still
-			 * where we should raise the cwnd.
-			 */
-			win = rc_init_window(rack);
-			if (SEQ_GT(tp->snd_max, tp->iss))
-				snt = tp->snd_max - tp->iss;
-			else
-				snt = 0;
-			if ((snt < win) &&
-			    (tp->snd_cwnd < win))
-				tp->snd_cwnd = win;
-		}
 		if (rack->rc_always_pace)
 			rack_update_seg(rack);
 		break;
 	case TCP_BBR_IWINTSO:
-		RACK_OPTS_INC(tcp_initial_win);
-		if (optval && (optval <= 0xff)) {
-			uint32_t win, snt;
-
-			rack->rc_init_win = optval;
-			win = rc_init_window(rack);
-			if (SEQ_GT(tp->snd_max, tp->iss))
-				snt = tp->snd_max - tp->iss;
-			else
-				snt = 0;
-			if ((snt < win) &&
-			    (tp->t_srtt |
-			     rack->r_ctl.init_rate)) {
-				/*
-				 * We are not past the initial window
-				 * and we have some bases for pacing,
-				 * so we need to possibly adjust up
-				 * the cwnd. Note even if we don't set
-				 * the cwnd, its still ok to raise the rc_init_win
-				 * which can be used coming out of idle when we
-				 * would have a rate.
-				 */
-				if (tp->snd_cwnd < win)
-					tp->snd_cwnd = win;
-			}
-			if (rack->rc_always_pace)
-				rack_update_seg(rack);
-		} else
-			error = EINVAL;
+		error = EINVAL;
 		break;
 	case TCP_RACK_FORCE_MSEG:
 		RACK_OPTS_INC(tcp_rack_force_max_seg);
@@ -23443,6 +25026,24 @@
 	case TCP_RACK_PACE_MAX_SEG:
 		/* Max segments size in a pace in bytes */
 		RACK_OPTS_INC(tcp_rack_max_seg);
+		if ((rack->dgp_on == 1) &&
+		    (rack->r_ctl.pacing_method & RACK_DGP_PACING)) {
+			/*
+			 * If we set a max-seg and are doing DGP then
+			 * we now fall under the pacing limits not the
+			 * DGP ones.
+			 */
+			if (tcp_can_enable_pacing() == 0) {
+				error = ENOSPC;
+				break;
+			}
+			/*
+			 * Now change up the flags and counts to be correct.
+			 */
+			rack->r_ctl.pacing_method |= RACK_REG_PACING;
+			tcp_dec_dgp_pacing_cnt();
+			rack->r_ctl.pacing_method &= ~RACK_DGP_PACING;
+		}
 		if (optval <= MAX_USER_SET_SEG)
 			rack->rc_user_set_max_segs = optval;
 		else
@@ -23452,6 +25053,18 @@
 	case TCP_RACK_PACE_RATE_REC:
 		/* Set the fixed pacing rate in Bytes per second ca */
 		RACK_OPTS_INC(tcp_rack_pace_rate_rec);
+		if (rack->r_ctl.side_chan_dis_mask & CCSP_DIS_MASK) {
+			error = EPERM;
+			break;
+		}
+		if (rack->dgp_on) {
+			/*
+			 * We are already pacing another
+			 * way.
+			 */
+			error = EBUSY;
+			break;
+		}
 		rack->r_ctl.rc_fixed_pacing_rate_rec = optval;
 		if (rack->r_ctl.rc_fixed_pacing_rate_ca == 0)
 			rack->r_ctl.rc_fixed_pacing_rate_ca = optval;
@@ -23470,6 +25083,18 @@
 	case TCP_RACK_PACE_RATE_SS:
 		/* Set the fixed pacing rate in Bytes per second ca */
 		RACK_OPTS_INC(tcp_rack_pace_rate_ss);
+		if (rack->r_ctl.side_chan_dis_mask & CCSP_DIS_MASK) {
+			error = EPERM;
+			break;
+		}
+		if (rack->dgp_on) {
+			/*
+			 * We are already pacing another
+			 * way.
+			 */
+			error = EBUSY;
+			break;
+		}
 		rack->r_ctl.rc_fixed_pacing_rate_ss = optval;
 		if (rack->r_ctl.rc_fixed_pacing_rate_ca == 0)
 			rack->r_ctl.rc_fixed_pacing_rate_ca = optval;
@@ -23488,6 +25113,18 @@
 	case TCP_RACK_PACE_RATE_CA:
 		/* Set the fixed pacing rate in Bytes per second ca */
 		RACK_OPTS_INC(tcp_rack_pace_rate_ca);
+		if (rack->r_ctl.side_chan_dis_mask & CCSP_DIS_MASK) {
+			error = EPERM;
+			break;
+		}
+		if (rack->dgp_on) {
+			/*
+			 * We are already pacing another
+			 * way.
+			 */
+			error = EBUSY;
+			break;
+		}
 		rack->r_ctl.rc_fixed_pacing_rate_ca = optval;
 		if (rack->r_ctl.rc_fixed_pacing_rate_ss == 0)
 			rack->r_ctl.rc_fixed_pacing_rate_ss = optval;
@@ -23571,6 +25208,41 @@
 			rack->r_rack_hw_rate_caps = 0;
 		}
 		break;
+	case TCP_DGP_UPPER_BOUNDS:
+	{
+		uint8_t val;
+		val = optval & 0x0000ff;
+		rack->r_ctl.rack_per_upper_bound_ca = val;
+		val = (optval >> 16) & 0x0000ff;
+		rack->r_ctl.rack_per_upper_bound_ss = val;
+		break;
+	}
+	case TCP_SS_EEXIT:			/*  URL:eexit */
+		if (optval > 0) {
+			rack->r_ctl.gp_rnd_thresh =  optval & 0x0ff;
+			if (optval & 0x10000) {
+				rack->r_ctl.gate_to_fs = 1;
+			} else {
+				rack->r_ctl.gate_to_fs = 0;
+			}
+			if (optval & 0x20000) {
+				rack->r_ctl.use_gp_not_last = 1;
+			} else {
+				rack->r_ctl.use_gp_not_last = 0;
+			}
+			if (optval & 0xfffc0000) {
+				uint32_t v;
+
+				v = (optval >> 18) & 0x00003fff;
+				if (v >= 1000)
+					rack->r_ctl.gp_gain_req = v;
+			}
+		} else {
+			/* We do not do ss early exit at all */
+			rack->rc_initial_ss_comp = 1;
+			rack->r_ctl.gp_rnd_thresh = 0;
+		}
+		break;
 	case TCP_RACK_SPLIT_LIMIT:
 		RACK_OPTS_INC(tcp_split_limit);
 		rack->r_ctl.rc_split_limit = optval;
@@ -23681,6 +25353,50 @@
 		else
 			rack->r_ctl.rc_rate_sample_method = optval;
 		break;
+	case TCP_HONOR_HPTS_MIN:
+		RACK_OPTS_INC(tcp_honor_hpts);
+		if (optval) {
+			rack->r_use_hpts_min = 1;
+			/*
+			 * Must be between 2 - 80% to be a reduction else
+			 * we keep the default (10%).
+			 */
+			if ((optval > 1) && (optval <= 80)) {
+				rack->r_ctl.max_reduction = optval;
+			}
+		} else
+			rack->r_use_hpts_min = 0;
+		break;
+	case TCP_REC_IS_DYN:			/*  URL:dynrec */
+		RACK_OPTS_INC(tcp_dyn_rec);
+		if (optval)
+			rack->rc_gp_no_rec_chg = 1;
+		else
+			rack->rc_gp_no_rec_chg = 0;
+		break;
+	case TCP_NO_TIMELY:
+		RACK_OPTS_INC(tcp_notimely);
+		if (optval) {
+			rack->rc_skip_timely = 1;
+			rack->r_ctl.rack_per_of_gp_rec = 90;
+			rack->r_ctl.rack_per_of_gp_ca = 100;
+			rack->r_ctl.rack_per_of_gp_ss = 250;
+		} else {
+			rack->rc_skip_timely = 0;
+		}
+		break;
+	case TCP_GP_USE_LTBW:
+		if (optval == 0) {
+			rack->use_lesser_lt_bw = 0;
+			rack->dis_lt_bw = 1;
+		} else if (optval == 1) {
+			rack->use_lesser_lt_bw = 1;
+			rack->dis_lt_bw = 0;
+		} else if (optval == 2) {
+			rack->use_lesser_lt_bw = 0;
+			rack->dis_lt_bw = 0;
+		}
+		break;
 	case TCP_DATA_AFTER_CLOSE:
 		RACK_OPTS_INC(tcp_data_after_close);
 		if (optval)
@@ -23695,6 +25411,431 @@
 	return (error);
 }
 
+static void
+rack_inherit(struct tcpcb *tp, struct inpcb *parent)
+{
+	/*
+	 * A new connection has been created (tp) and
+	 * the parent is the inpcb given. We want to
+	 * apply a read-lock to the parent (we are already
+	 * holding a write lock on the tp) and copy anything
+	 * out of the rack specific data as long as its tfb is
+	 * the same as ours i.e. we are the same stack. Otherwise
+	 * we just return.
+	 */
+	struct tcpcb *par;
+	struct tcp_rack *dest, *src;
+	int cnt = 0;
+
+	par = intotcpcb(parent);
+	if (par->t_fb != tp->t_fb) {
+		/* Not the same stack */
+		tcp_log_socket_option(tp, 0, 0, 1);
+		return;
+	}
+	/* Ok if we reach here lets setup the two rack pointers */
+	dest = (struct tcp_rack *)tp->t_fb_ptr;
+	src = (struct tcp_rack *)par->t_fb_ptr;
+	if ((src == NULL) || (dest == NULL)) {
+		/* Huh? */
+		tcp_log_socket_option(tp, 0, 0, 2);
+		return;
+	}
+	/* Now copy out anything we wish to inherit i.e. things in socket-options */
+	/* TCP_RACK_PROFILE we can't know but we can set DGP if its on */
+	if ((src->dgp_on) && (dest->dgp_on == 0)) {
+		/* Profile 1 had to be set via sock opt */
+		rack_set_dgp(dest);
+		cnt++;
+	}
+	/* TCP_RACK_SET_RXT_OPTIONS */
+	if (dest->full_size_rxt != src->full_size_rxt) {
+		dest->full_size_rxt = src->full_size_rxt;
+		cnt++;
+	}
+	if (dest->shape_rxt_to_pacing_min  != src->shape_rxt_to_pacing_min) {
+		dest->shape_rxt_to_pacing_min = src->shape_rxt_to_pacing_min;
+		cnt++;
+	}
+	/* TCP_RACK_DSACK_OPT */
+	if (dest->rc_rack_tmr_std_based != src->rc_rack_tmr_std_based) {
+		dest->rc_rack_tmr_std_based = src->rc_rack_tmr_std_based;
+		cnt++;
+	}
+	if (dest->rc_rack_use_dsack != src->rc_rack_use_dsack) {
+		dest->rc_rack_use_dsack = src->rc_rack_use_dsack;
+		cnt++;
+	}
+	/* TCP_RACK_PACING_DIVISOR */
+	if (dest->r_ctl.pace_len_divisor != src->r_ctl.pace_len_divisor) {
+		dest->r_ctl.pace_len_divisor = src->r_ctl.pace_len_divisor;
+		cnt++;
+	}
+	/* TCP_RACK_HI_BETA */
+	if (src->rack_hibeta != dest->rack_hibeta) {
+		cnt++;
+		if (src->rack_hibeta) {
+			dest->r_ctl.rc_saved_beta.beta = src->r_ctl.rc_saved_beta.beta;
+			dest->rack_hibeta = 1;
+		} else {
+			dest->rack_hibeta = 0;
+		}
+	}
+	/* TCP_RACK_TIMER_SLOP */
+	if (dest->r_ctl.timer_slop != src->r_ctl.timer_slop) {
+		dest->r_ctl.timer_slop = src->r_ctl.timer_slop;
+		cnt++;
+	}
+	/* TCP_RACK_PACING_BETA_ECN */
+	if (dest->r_ctl.rc_saved_beta.beta_ecn != src->r_ctl.rc_saved_beta.beta_ecn) {
+		dest->r_ctl.rc_saved_beta.beta_ecn = src->r_ctl.rc_saved_beta.beta_ecn;
+		cnt++;
+	}
+	if (dest->r_ctl.rc_saved_beta.newreno_flags != src->r_ctl.rc_saved_beta.newreno_flags) {
+		dest->r_ctl.rc_saved_beta.newreno_flags = src->r_ctl.rc_saved_beta.newreno_flags;
+		cnt++;
+	}
+	/* We do not do TCP_DEFER_OPTIONS */
+	/* TCP_RACK_MEASURE_CNT */
+	if (dest->r_ctl.req_measurements != src->r_ctl.req_measurements) {
+		dest->r_ctl.req_measurements = src->r_ctl.req_measurements;
+		cnt++;
+	}
+	/* TCP_HDWR_UP_ONLY */
+	if (dest->r_up_only != src->r_up_only) {
+		dest->r_up_only = src->r_up_only;
+		cnt++;
+	}
+	/* TCP_FILLCW_RATE_CAP */
+	if (dest->r_ctl.fillcw_cap != src->r_ctl.fillcw_cap) {
+		dest->r_ctl.fillcw_cap = src->r_ctl.fillcw_cap;
+		cnt++;
+	}
+	/* TCP_PACING_RATE_CAP */
+	if (dest->r_ctl.bw_rate_cap != src->r_ctl.bw_rate_cap) {
+		dest->r_ctl.bw_rate_cap = src->r_ctl.bw_rate_cap;
+		cnt++;
+	}
+	/* A listener can't set TCP_HYBRID_PACING */
+	/* TCP_SIDECHAN_DIS */
+	if (dest->r_ctl.side_chan_dis_mask != src->r_ctl.side_chan_dis_mask) {
+		dest->r_ctl.side_chan_dis_mask = src->r_ctl.side_chan_dis_mask;
+		cnt++;
+	}
+	/* TCP_SHARED_CWND_TIME_LIMIT */
+	if (dest->r_limit_scw != src->r_limit_scw) {
+		dest->r_limit_scw = src->r_limit_scw;
+		cnt++;
+	}
+	/* TCP_POLICER_DETECT */
+	if (dest->r_ctl.policer_rxt_threshold != src->r_ctl.policer_rxt_threshold) {
+		dest->r_ctl.policer_rxt_threshold = src->r_ctl.policer_rxt_threshold;
+		cnt++;
+	}
+	if (dest->r_ctl.policer_avg_threshold != src->r_ctl.policer_avg_threshold) {
+		dest->r_ctl.policer_avg_threshold = src->r_ctl.policer_avg_threshold;
+		cnt++;
+	}
+	if (dest->r_ctl.policer_med_threshold != src->r_ctl.policer_med_threshold) {
+		dest->r_ctl.policer_med_threshold = src->r_ctl.policer_med_threshold;
+		cnt++;
+	}
+	if (dest->policer_detect_on != src->policer_detect_on) {
+		dest->policer_detect_on = src->policer_detect_on;
+		cnt++;
+	}
+
+	if (dest->r_ctl.saved_policer_val != src->r_ctl.saved_policer_val) {
+		dest->r_ctl.saved_policer_val = src->r_ctl.saved_policer_val;
+		cnt++;
+	}
+	/* TCP_POLICER_MSS */
+	if (dest->r_ctl.policer_del_mss != src->r_ctl.policer_del_mss) {
+		dest->r_ctl.policer_del_mss = src->r_ctl.policer_del_mss;
+		cnt++;
+	}
+
+	if (dest->r_ctl.pol_bw_comp != src->r_ctl.pol_bw_comp) {
+		dest->r_ctl.pol_bw_comp = src->r_ctl.pol_bw_comp;
+		cnt++;
+	}
+
+	if (dest->r_ctl.policer_alt_median != src->r_ctl.policer_alt_median) {
+		dest->r_ctl.policer_alt_median = src->r_ctl.policer_alt_median;
+		cnt++;
+	}
+	/* TCP_RACK_PACE_TO_FILL */
+	if (dest->rc_pace_to_cwnd != src->rc_pace_to_cwnd) {
+		dest->rc_pace_to_cwnd = src->rc_pace_to_cwnd;
+		cnt++;
+	}
+	if (dest->rc_pace_fill_if_rttin_range != src->rc_pace_fill_if_rttin_range) {
+		dest->rc_pace_fill_if_rttin_range = src->rc_pace_fill_if_rttin_range;
+		cnt++;
+	}
+	if (dest->rtt_limit_mul != src->rtt_limit_mul) {
+		dest->rtt_limit_mul = src->rtt_limit_mul;
+		cnt++;
+	}
+	/* TCP_RACK_NO_PUSH_AT_MAX */
+	if (dest->r_ctl.rc_no_push_at_mrtt != src->r_ctl.rc_no_push_at_mrtt) {
+		dest->r_ctl.rc_no_push_at_mrtt = src->r_ctl.rc_no_push_at_mrtt;
+		cnt++;
+	}
+	/* TCP_SHARED_CWND_ENABLE */
+	if (dest->rack_enable_scwnd != src->rack_enable_scwnd) {
+		dest->rack_enable_scwnd = src->rack_enable_scwnd;
+		cnt++;
+	}
+	/* TCP_USE_CMP_ACKS */
+	if (dest->r_use_cmp_ack != src->r_use_cmp_ack) {
+		dest->r_use_cmp_ack = src->r_use_cmp_ack;
+		cnt++;
+	}
+
+	if (dest->r_mbuf_queue != src->r_mbuf_queue) {
+		dest->r_mbuf_queue = src->r_mbuf_queue;
+		cnt++;
+	}
+	/* TCP_RACK_MBUF_QUEUE */
+	if (dest->r_mbuf_queue != src->r_mbuf_queue) {
+		dest->r_mbuf_queue = src->r_mbuf_queue;
+		cnt++;
+	}
+	if  (dest->r_mbuf_queue || dest->rc_always_pace || dest->r_use_cmp_ack) {
+		tp->t_flags2 |= TF2_SUPPORTS_MBUFQ;
+	} else {
+		tp->t_flags2 &= ~TF2_SUPPORTS_MBUFQ;
+	}
+	if (dest->r_use_cmp_ack && TCPS_HAVEESTABLISHED(tp->t_state)) {
+		tp->t_flags2 |= TF2_MBUF_ACKCMP;
+	}
+	/* TCP_RACK_NONRXT_CFG_RATE */
+	if (dest->rack_rec_nonrxt_use_cr != src->rack_rec_nonrxt_use_cr) {
+		dest->rack_rec_nonrxt_use_cr = src->rack_rec_nonrxt_use_cr;
+		cnt++;
+	}
+	/* TCP_NO_PRR */
+	if (dest->rack_no_prr != src->rack_no_prr) {
+		dest->rack_no_prr = src->rack_no_prr;
+		cnt++;
+	}
+	if (dest->no_prr_addback != src->no_prr_addback) {
+		dest->no_prr_addback = src->no_prr_addback;
+		cnt++;
+	}
+	/* RACK_CSPR_IS_FCC */
+	if (dest->cspr_is_fcc != src->cspr_is_fcc) {
+		dest->cspr_is_fcc = src->cspr_is_fcc;
+		cnt++;
+	}
+	/* TCP_TIMELY_DYN_ADJ */
+	if (dest->rc_gp_dyn_mul != src->rc_gp_dyn_mul) {
+		dest->rc_gp_dyn_mul = src->rc_gp_dyn_mul;
+		cnt++;
+	}
+	if (dest->r_ctl.rack_per_of_gp_ca != src->r_ctl.rack_per_of_gp_ca) {
+		dest->r_ctl.rack_per_of_gp_ca = src->r_ctl.rack_per_of_gp_ca;
+		cnt++;
+	}
+	/* TCP_RACK_DO_DETECTION */
+	if (dest->do_detection != src->do_detection) {
+		dest->do_detection = src->do_detection;
+		cnt++;
+	}
+	/* TCP_RACK_TLP_USE */
+	if (dest->rack_tlp_threshold_use != src->rack_tlp_threshold_use) {
+		dest->rack_tlp_threshold_use = src->rack_tlp_threshold_use;
+		cnt++;
+	}
+	/* we don't allow inheritence of TCP_RACK_PACE_ALWAYS */
+	/* TCP_BBR_RACK_INIT_RATE */
+	if (dest->r_ctl.init_rate != src->r_ctl.init_rate) {
+		dest->r_ctl.init_rate = src->r_ctl.init_rate;
+		cnt++;
+	}
+	/* TCP_RACK_FORCE_MSEG */
+	if (dest->rc_force_max_seg != src->rc_force_max_seg) {
+		dest->rc_force_max_seg = src->rc_force_max_seg;
+		cnt++;
+	}
+	/* TCP_RACK_PACE_MIN_SEG */
+	if (dest->r_ctl.rc_user_set_min_segs != src->r_ctl.rc_user_set_min_segs) {
+		dest->r_ctl.rc_user_set_min_segs = src->r_ctl.rc_user_set_min_segs;
+		cnt++;
+	}
+	/* we don't allow TCP_RACK_PACE_MAX_SEG */
+	/* TCP_RACK_PACE_RATE_REC, TCP_RACK_PACE_RATE_SS,  TCP_RACK_PACE_RATE_CA */
+	if (dest->r_ctl.rc_fixed_pacing_rate_ca != src->r_ctl.rc_fixed_pacing_rate_ca) {
+		dest->r_ctl.rc_fixed_pacing_rate_ca = src->r_ctl.rc_fixed_pacing_rate_ca;
+		cnt++;
+	}
+	if (dest->r_ctl.rc_fixed_pacing_rate_ss != src->r_ctl.rc_fixed_pacing_rate_ss) {
+		dest->r_ctl.rc_fixed_pacing_rate_ss = src->r_ctl.rc_fixed_pacing_rate_ss;
+		cnt++;
+	}
+	if (dest->r_ctl.rc_fixed_pacing_rate_rec != src->r_ctl.rc_fixed_pacing_rate_rec) {
+		dest->r_ctl.rc_fixed_pacing_rate_rec = src->r_ctl.rc_fixed_pacing_rate_rec;
+		cnt++;
+	}
+	/* TCP_RACK_GP_INCREASE_REC, TCP_RACK_GP_INCREASE_CA, TCP_RACK_GP_INCREASE_SS */
+	if (dest->r_ctl.rack_per_of_gp_rec != src->r_ctl.rack_per_of_gp_rec) {
+		dest->r_ctl.rack_per_of_gp_rec = src->r_ctl.rack_per_of_gp_rec;
+		cnt++;
+	}
+	if (dest->r_ctl.rack_per_of_gp_ca != src->r_ctl.rack_per_of_gp_ca) {
+		dest->r_ctl.rack_per_of_gp_ca = src->r_ctl.rack_per_of_gp_ca;
+		cnt++;
+	}
+
+	if (dest->r_ctl.rack_per_of_gp_ss != src->r_ctl.rack_per_of_gp_ss) {
+		dest->r_ctl.rack_per_of_gp_ss = src->r_ctl.rack_per_of_gp_ss;
+		cnt++;
+	}
+	/* TCP_RACK_RR_CONF */
+	if (dest->r_rr_config != src->r_rr_config) {
+		dest->r_rr_config = src->r_rr_config;
+		cnt++;
+	}
+	/* TCP_PACING_DND */
+	if (dest->rc_pace_dnd != src->rc_pace_dnd) {
+		dest->rc_pace_dnd = src->rc_pace_dnd;
+		cnt++;
+	}
+	/* TCP_HDWR_RATE_CAP */
+	if (dest->r_rack_hw_rate_caps != src->r_rack_hw_rate_caps) {
+		dest->r_rack_hw_rate_caps = src->r_rack_hw_rate_caps;
+		cnt++;
+	}
+	/* TCP_DGP_UPPER_BOUNDS */
+	if (dest->r_ctl.rack_per_upper_bound_ca != src->r_ctl.rack_per_upper_bound_ca) {
+		dest->r_ctl.rack_per_upper_bound_ca = src->r_ctl.rack_per_upper_bound_ca;
+		cnt++;
+	}
+	if (dest->r_ctl.rack_per_upper_bound_ss != src->r_ctl.rack_per_upper_bound_ss) {
+		dest->r_ctl.rack_per_upper_bound_ss = src->r_ctl.rack_per_upper_bound_ss;
+		cnt++;
+	}
+	/* TCP_SS_EEXIT */
+	if (dest->r_ctl.gp_rnd_thresh != src->r_ctl.gp_rnd_thresh) {
+		dest->r_ctl.gp_rnd_thresh = src->r_ctl.gp_rnd_thresh;
+		cnt++;
+	}
+	if (dest->r_ctl.gate_to_fs != src->r_ctl.gate_to_fs) {
+		dest->r_ctl.gate_to_fs = src->r_ctl.gate_to_fs;
+		cnt++;
+	}
+	if (dest->r_ctl.use_gp_not_last != src->r_ctl.use_gp_not_last) {
+		dest->r_ctl.use_gp_not_last = src->r_ctl.use_gp_not_last;
+		cnt++;
+	}
+	if (dest->r_ctl.gp_gain_req != src->r_ctl.gp_gain_req) {
+		dest->r_ctl.gp_gain_req = src->r_ctl.gp_gain_req;
+		cnt++;
+	}
+	/* TCP_BBR_HDWR_PACE */
+	if (dest->rack_hdw_pace_ena != src->rack_hdw_pace_ena) {
+		dest->rack_hdw_pace_ena = src->rack_hdw_pace_ena;
+		cnt++;
+	}
+	if (dest->rack_attempt_hdwr_pace != src->rack_attempt_hdwr_pace) {
+		dest->rack_attempt_hdwr_pace = src->rack_attempt_hdwr_pace;
+		cnt++;
+	}
+	/* TCP_RACK_PRR_SENDALOT */
+	if (dest->r_ctl.rc_prr_sendalot != src->r_ctl.rc_prr_sendalot) {
+		dest->r_ctl.rc_prr_sendalot = src->r_ctl.rc_prr_sendalot;
+		cnt++;
+	}
+	/* TCP_RACK_MIN_TO */
+	if (dest->r_ctl.rc_min_to != src->r_ctl.rc_min_to) {
+		dest->r_ctl.rc_min_to = src->r_ctl.rc_min_to;
+		cnt++;
+	}
+	/* TCP_RACK_EARLY_SEG */
+	if (dest->r_ctl.rc_early_recovery_segs != src->r_ctl.rc_early_recovery_segs) {
+		dest->r_ctl.rc_early_recovery_segs = src->r_ctl.rc_early_recovery_segs;
+		cnt++;
+	}
+	/* TCP_RACK_ENABLE_HYSTART */
+	if (par->t_ccv.flags != tp->t_ccv.flags) {
+		cnt++;
+		if (par->t_ccv.flags & CCF_HYSTART_ALLOWED) {
+			tp->t_ccv.flags |= CCF_HYSTART_ALLOWED;
+			if (rack_do_hystart > RACK_HYSTART_ON)
+				tp->t_ccv.flags |= CCF_HYSTART_CAN_SH_CWND;
+			if (rack_do_hystart > RACK_HYSTART_ON_W_SC)
+				tp->t_ccv.flags |= CCF_HYSTART_CONS_SSTH;
+		} else {
+			tp->t_ccv.flags &= ~(CCF_HYSTART_ALLOWED|CCF_HYSTART_CAN_SH_CWND|CCF_HYSTART_CONS_SSTH);
+		}
+	}
+	/* TCP_RACK_REORD_THRESH */
+	if (dest->r_ctl.rc_reorder_shift != src->r_ctl.rc_reorder_shift) {
+		dest->r_ctl.rc_reorder_shift = src->r_ctl.rc_reorder_shift;
+		cnt++;
+	}
+	/* TCP_RACK_REORD_FADE */
+	if (dest->r_ctl.rc_reorder_fade != src->r_ctl.rc_reorder_fade) {
+		dest->r_ctl.rc_reorder_fade = src->r_ctl.rc_reorder_fade;
+		cnt++;
+	}
+	/* TCP_RACK_TLP_THRESH */
+	if (dest->r_ctl.rc_tlp_threshold != src->r_ctl.rc_tlp_threshold) {
+		dest->r_ctl.rc_tlp_threshold = src->r_ctl.rc_tlp_threshold;
+		cnt++;
+	}
+	/* TCP_BBR_USE_RACK_RR */
+	if (dest->use_rack_rr != src->use_rack_rr) {
+		dest->use_rack_rr = src->use_rack_rr;
+		cnt++;
+	}
+	/* TCP_RACK_PKT_DELAY */
+	if (dest->r_ctl.rc_pkt_delay != src->r_ctl.rc_pkt_delay) {
+		dest->r_ctl.rc_pkt_delay = src->r_ctl.rc_pkt_delay;
+		cnt++;
+	}
+	/* TCP_DELACK will get copied via the main code if applicable */
+	/* TCP_BBR_RACK_RTT_USE */
+	if (dest->r_ctl.rc_rate_sample_method != src->r_ctl.rc_rate_sample_method) {
+		dest->r_ctl.rc_rate_sample_method = src->r_ctl.rc_rate_sample_method;
+		cnt++;
+	}
+	/* TCP_HONOR_HPTS_MIN */
+	if (dest->r_use_hpts_min != src->r_use_hpts_min) {
+		dest->r_use_hpts_min = src->r_use_hpts_min;
+		cnt++;
+	}
+	if (dest->r_ctl.max_reduction != src->r_ctl.max_reduction) {
+		dest->r_ctl.max_reduction = src->r_ctl.max_reduction;
+		cnt++;
+	}
+	/* TCP_REC_IS_DYN */
+	if (dest->rc_gp_no_rec_chg != src->rc_gp_no_rec_chg) {
+		dest->rc_gp_no_rec_chg = src->rc_gp_no_rec_chg;
+		cnt++;
+	}
+	if (dest->rc_skip_timely != src->rc_skip_timely) {
+		dest->rc_skip_timely = src->rc_skip_timely;
+		cnt++;
+	}
+	/* TCP_DATA_AFTER_CLOSE */
+	if (dest->rc_allow_data_af_clo != src->rc_allow_data_af_clo) {
+		dest->rc_allow_data_af_clo = src->rc_allow_data_af_clo;
+		cnt++;
+	}
+	/* TCP_GP_USE_LTBW */
+	if (src->use_lesser_lt_bw != dest->use_lesser_lt_bw) {
+		dest->use_lesser_lt_bw = src->use_lesser_lt_bw;
+		cnt++;
+	}
+	if (dest->dis_lt_bw != src->dis_lt_bw) {
+		dest->dis_lt_bw = src->dis_lt_bw;
+		cnt++;
+	}
+	tcp_log_socket_option(tp, 0, cnt, 0);
+}
+
 
 static void
 rack_apply_deferred_options(struct tcp_rack *rack)
@@ -23778,7 +25919,10 @@
 	.tfb_switch_failed = rack_switch_failed,
 	.tfb_early_wake_check = rack_wake_check,
 	.tfb_compute_pipe = rack_compute_pipe,
+	.tfb_stack_info = rack_stack_information,
+	.tfb_inherit = rack_inherit,
 	.tfb_flags = TCP_FUNC_OUTPUT_CANDROP,
+
 };
 
 /*
@@ -23846,7 +25990,6 @@
 			/* Already read in and sanity checked in sosetopt(). */
 			if (inp->inp_socket) {
 				rack->client_bufferlvl = inp->inp_socket->so_peerprio;
-				rack_client_buffer_level_set(rack);
 			}
 			break;
 		}
@@ -23859,7 +26002,6 @@
 		/*  Pacing related ones */
 		case TCP_RACK_PACE_ALWAYS:		/*  URL:pace_always */
 		case TCP_BBR_RACK_INIT_RATE:		/*  URL:irate */
-		case TCP_BBR_IWINTSO:			/*  URL:tso_iwin */
 		case TCP_RACK_PACE_MIN_SEG:		/*  URL:pace_min_seg */
 		case TCP_RACK_PACE_MAX_SEG:		/*  URL:pace_max_seg */
 		case TCP_RACK_FORCE_MSEG:		/*  URL:force_max_seg */
@@ -23874,12 +26016,12 @@
 		case TCP_HDWR_RATE_CAP:			/*  URL:hdwrcap boolean */
 		case TCP_PACING_RATE_CAP:		/*  URL:cap  -- used by side-channel */
 		case TCP_HDWR_UP_ONLY:			/*  URL:uponly -- hardware pacing  boolean */
-		case TCP_RACK_PACING_BETA:		/*  URL:pacing_beta */
+		case TCP_FILLCW_RATE_CAP:		/*  URL:fillcw_cap */
 		case TCP_RACK_PACING_BETA_ECN:		/*  URL:pacing_beta_ecn */
 		case TCP_RACK_PACE_TO_FILL:		/*  URL:fillcw */
-		case TCP_RACK_DGP_IN_REC:		/*  URL:dgpinrec */
 			/* End pacing related */
-		case TCP_RXT_CLAMP:			/*  URL:rxtclamp */
+		case TCP_POLICER_DETECT:		/*  URL:pol_det */
+		case TCP_POLICER_MSS:			/*  URL:pol_mss */
 		case TCP_DELACK:			/*  URL:delack (in base TCP i.e. tcp_hints along with cc etc ) */
 		case TCP_RACK_PRR_SENDALOT:		/*  URL:prr_sendalot */
 		case TCP_RACK_MIN_TO:			/*  URL:min_to */
@@ -23901,7 +26043,8 @@
 		case TCP_RACK_NO_PUSH_AT_MAX:		/*  URL:npush */
 		case TCP_SHARED_CWND_TIME_LIMIT:	/*  URL:lscwnd */
 		case TCP_RACK_PROFILE:			/*  URL:profile */
-		case TCP_HYBRID_PACING:			/*  URL:hybrid */
+		case TCP_SIDECHAN_DIS:			/*  URL:scodm */
+		case TCP_HYBRID_PACING:			/*  URL:pacing=hybrid */
 		case TCP_USE_CMP_ACKS:			/*  URL:cmpack */
 		case TCP_RACK_ABC_VAL:			/*  URL:labc */
 		case TCP_REC_ABC_VAL:			/*  URL:reclabc */
@@ -23913,8 +26056,15 @@
 		case TCP_RACK_SET_RXT_OPTIONS:		/*  URL:rxtsz */
 		case TCP_RACK_HI_BETA:			/*  URL:hibeta */
 		case TCP_RACK_SPLIT_LIMIT:		/*  URL:split */
+		case TCP_SS_EEXIT:			/*  URL:eexit */
+		case TCP_DGP_UPPER_BOUNDS:		/*  URL:upper */
 		case TCP_RACK_PACING_DIVISOR:		/*  URL:divisor */
 		case TCP_PACING_DND:			/*  URL:dnd */
+		case TCP_NO_TIMELY:			/*  URL:notimely */
+		case RACK_CSPR_IS_FCC:			/*  URL:csprisfcc */
+		case TCP_HONOR_HPTS_MIN:		/*  URL:hptsmin */
+		case TCP_REC_IS_DYN:			/*  URL:dynrec */
+		case TCP_GP_USE_LTBW:			/*  URL:useltbw */
 			goto process_opt;
 			break;
 		default:
@@ -23922,14 +26072,14 @@
 			return (tcp_default_ctloutput(tp, sopt));
 			break;
 		}
-
 	default:
 		INP_WUNLOCK(inp);
 		return (0);
 	}
 process_opt:
 	INP_WUNLOCK(inp);
-	if (sopt->sopt_name == TCP_PACING_RATE_CAP) {
+	if ((sopt->sopt_name == TCP_PACING_RATE_CAP) ||
+	    (sopt->sopt_name == TCP_FILLCW_RATE_CAP)) {
 		error = sooptcopyin(sopt, &loptval, sizeof(loptval), sizeof(loptval));
 		/*
 		 * We truncate it down to 32 bits for the socket-option trace this
@@ -23953,11 +26103,10 @@
 	if (rack->defer_options && (rack->gp_ready == 0) &&
 	    (sopt->sopt_name != TCP_DEFER_OPTIONS) &&
 	    (sopt->sopt_name != TCP_HYBRID_PACING) &&
-	    (sopt->sopt_name != TCP_RACK_PACING_BETA) &&
 	    (sopt->sopt_name != TCP_RACK_SET_RXT_OPTIONS) &&
 	    (sopt->sopt_name != TCP_RACK_PACING_BETA_ECN) &&
 	    (sopt->sopt_name != TCP_RACK_MEASURE_CNT)) {
-		/* Options are beind deferred */
+		/* Options are being deferred */
 		if (rack_add_deferred_option(rack, sopt->sopt_name, loptval)) {
 			INP_WUNLOCK(inp);
 			return (0);
@@ -24016,6 +26165,7 @@
 	ti->tcpi_snd_zerowin = tp->t_sndzerowin;
 	ti->tcpi_total_tlp = tp->t_sndtlppack;
 	ti->tcpi_total_tlp_bytes = tp->t_sndtlpbyte;
+	ti->tcpi_rttmin = tp->t_rttlow;
 #ifdef NETFLIX_STATS
 	memcpy(&ti->tcpi_rxsyninfo, &tp->t_rxsyninfo, sizeof(struct tcpsyninfo));
 #endif
@@ -24062,21 +26212,6 @@
 	 * when you exit recovery.
 	 */
 	case TCP_RACK_PACING_BETA:
-		if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0)
-			error = EINVAL;
-		else if (rack->rc_pacing_cc_set == 0)
-			optval = rack->r_ctl.rc_saved_beta.beta;
-		else {
-			/*
-			 * Reach out into the CC data and report back what
-			 * I have previously set. Yeah it looks hackish but
-			 * we don't want to report the saved values.
-			 */
-			if (tp->t_ccv.cc_data)
-				optval = ((struct newreno *)tp->t_ccv.cc_data)->beta;
-			else
-				error = EINVAL;
-		}
 		break;
 		/*
 		 * Beta_ecn is the congestion control value for NewReno that influences how
@@ -24112,7 +26247,7 @@
 			optval |= 2;
 		}
 		break;
- 	case TCP_RACK_ENABLE_HYSTART:
+	case TCP_RACK_ENABLE_HYSTART:
 	{
 		if (tp->t_ccv.flags & CCF_HYSTART_ALLOWED) {
 			optval = RACK_HYSTART_ON;
@@ -24126,13 +26261,16 @@
 	}
 	break;
 	case TCP_RACK_DGP_IN_REC:
-		optval = rack->r_ctl.full_dgp_in_rec;
+		error = EINVAL;
 		break;
 	case TCP_RACK_HI_BETA:
 		optval = rack->rack_hibeta;
 		break;
-	case TCP_RXT_CLAMP:
-		optval = rack->r_ctl.saved_rxt_clamp_val;
+	case TCP_POLICER_MSS:
+		optval = rack->r_ctl.policer_del_mss;
+		break;
+	case TCP_POLICER_DETECT:
+		optval = rack->r_ctl.saved_policer_val;
 		break;
 	case TCP_DEFER_OPTIONS:
 		optval = rack->defer_options;
@@ -24149,6 +26287,9 @@
 	case TCP_HDWR_UP_ONLY:
 		optval= rack->r_up_only;
 		break;
+	case TCP_FILLCW_RATE_CAP:
+		loptval = rack->r_ctl.fillcw_cap;
+		break;
 	case TCP_PACING_RATE_CAP:
 		loptval = rack->r_ctl.bw_rate_cap;
 		break;
@@ -24156,6 +26297,9 @@
 		/* You cannot retrieve a profile, its write only */
 		error = EINVAL;
 		break;
+	case TCP_SIDECHAN_DIS:
+		optval = rack->r_ctl.side_chan_dis_mask;
+		break;
 	case TCP_HYBRID_PACING:
 		/* You cannot retrieve hybrid pacing information, its write only */
 		error = EINVAL;
@@ -24165,8 +26309,6 @@
 		break;
 	case TCP_RACK_PACE_TO_FILL:
 		optval = rack->rc_pace_to_cwnd;
-		if (optval && rack->r_fill_less_agg)
-			optval++;
 		break;
 	case TCP_RACK_NO_PUSH_AT_MAX:
 		optval = rack->r_ctl.rc_no_push_at_mrtt;
@@ -24185,6 +26327,18 @@
 		else
 			optval = 0;
 		break;
+	case TCP_GP_USE_LTBW:
+		if (rack->dis_lt_bw) {
+			/* It is not used */
+			optval = 0;
+		} else if (rack->use_lesser_lt_bw) {
+			/* we use min() */
+			optval = 1;
+		} else {
+			/* we use max() */
+			optval = 2;
+		}
+		break;
 	case TCP_RACK_DO_DETECTION:
 		optval = rack->do_detection;
 		break;
@@ -24192,11 +26346,14 @@
 		/* Now do we use the LRO mbuf-queue feature */
 		optval = rack->r_mbuf_queue;
 		break;
+	case RACK_CSPR_IS_FCC:
+		optval = rack->cspr_is_fcc;
+		break;
 	case TCP_TIMELY_DYN_ADJ:
 		optval = rack->rc_gp_dyn_mul;
 		break;
 	case TCP_BBR_IWINTSO:
-		optval = rack->rc_init_win;
+		error = EINVAL;
 		break;
 	case TCP_RACK_TLP_REDUCE:
 		/* RACK TLP cwnd reduction (bool) */
@@ -24242,6 +26399,18 @@
 		/* RACK reorder threshold (shift amount) */
 		optval = rack->r_ctl.rc_reorder_shift;
 		break;
+	case TCP_SS_EEXIT:
+		if (rack->r_ctl.gp_rnd_thresh) {
+			uint32_t v;
+
+			v = rack->r_ctl.gp_gain_req;
+			v <<= 17;
+			optval = v | (rack->r_ctl.gp_rnd_thresh & 0xff);
+			if (rack->r_ctl.gate_to_fs == 1)
+				optval |= 0x10000;
+		} else
+			optval = 0;
+		break;
 	case TCP_RACK_REORD_FADE:
 		/* Does reordering fade after ms time */
 		optval = rack->r_ctl.rc_reorder_fade;
@@ -24282,6 +26451,11 @@
 	case TCP_RACK_PACE_RATE_REC:
 		optval = rack->r_ctl.rc_fixed_pacing_rate_rec;
 		break;
+	case TCP_DGP_UPPER_BOUNDS:
+		optval = rack->r_ctl.rack_per_upper_bound_ss;
+		optval <<= 16;
+		optval |= rack->r_ctl.rack_per_upper_bound_ca;
+		break;
 	case TCP_RACK_GP_INCREASE_SS:
 		optval = rack->r_ctl.rack_per_of_gp_ca;
 		break;
@@ -24303,6 +26477,18 @@
 	case TCP_SHARED_CWND_TIME_LIMIT:
 		optval = rack->r_limit_scw;
 		break;
+	case TCP_HONOR_HPTS_MIN:
+		if (rack->r_use_hpts_min)
+			optval = rack->r_ctl.max_reduction;
+		else
+			optval = 0;
+		break;
+	case TCP_REC_IS_DYN:
+		optval = rack->rc_gp_no_rec_chg;
+		break;
+	case TCP_NO_TIMELY:
+		optval = rack->rc_skip_timely;
+		break;
 	case TCP_RACK_TIMER_SLOP:
 		optval = rack->r_ctl.timer_slop;
 		break;
@@ -24312,7 +26498,8 @@
 	}
 	INP_WUNLOCK(inp);
 	if (error == 0) {
-		if (TCP_PACING_RATE_CAP)
+		if ((sopt->sopt_name == TCP_PACING_RATE_CAP) ||
+		    (sopt->sopt_name == TCP_FILLCW_RATE_CAP))
 			error = sooptcopyout(sopt, &loptval, sizeof loptval);
 		else
 			error = sooptcopyout(sopt, &optval, sizeof optval);
diff --git a/sys/netinet/tcp_stacks/rack_pcm.c b/sys/netinet/tcp_stacks/rack_pcm.c
new file mode 100644
diff --git a/sys/netinet/tcp_stacks/sack_filter.h b/sys/netinet/tcp_stacks/sack_filter.h
--- a/sys/netinet/tcp_stacks/sack_filter.h
+++ b/sys/netinet/tcp_stacks/sack_filter.h
@@ -51,5 +51,10 @@
 int sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
 		     tcp_seq th_ack);
 void sack_filter_reject(struct sack_filter *sf, struct sackblk *in);
+static inline uint8_t sack_filter_blks_used(struct sack_filter *sf)
+{
+	return (sf->sf_used);
+}
+
 #endif
 #endif
diff --git a/sys/netinet/tcp_stacks/tailq_hash.h b/sys/netinet/tcp_stacks/tailq_hash.h
--- a/sys/netinet/tcp_stacks/tailq_hash.h
+++ b/sys/netinet/tcp_stacks/tailq_hash.h
@@ -13,10 +13,12 @@
 #define MAX_ALLOWED_SEQ_RANGE (SEQ_BUCKET_SIZE * (MAX_HASH_ENTRIES-1))
 
 struct tailq_hash {
-	struct rack_head ht[MAX_HASH_ENTRIES];
 	uint32_t min;
 	uint32_t max;
 	uint32_t count;
+	struct rack_sendmap *rsm_min;
+	struct rack_sendmap *rsm_max;
+	struct rack_head ht[MAX_HASH_ENTRIES];
 };
 
 struct rack_sendmap *
@@ -53,6 +55,10 @@
 int
 tqhash_trim(struct tailq_hash *hs, uint32_t th_ack);
 
+void
+tqhash_update_end(struct tailq_hash *hs, struct rack_sendmap *rsm,
+    uint32_t th_ack);
+
 
 #define	TQHASH_FOREACH(var, head) \
 	for ((var) = tqhash_min((head));		\
diff --git a/sys/netinet/tcp_stacks/tailq_hash.c b/sys/netinet/tcp_stacks/tailq_hash.c
--- a/sys/netinet/tcp_stacks/tailq_hash.c
+++ b/sys/netinet/tcp_stacks/tailq_hash.c
@@ -65,7 +65,6 @@
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_hpts.h>
-#include <netinet/tcp_ratelimit.h>
 #include <netinet/tcp_accounting.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
@@ -100,6 +99,7 @@
 #include "sack_filter.h"
 #include "tcp_rack.h"
 #include "tailq_hash.h"
+#include "opt_global.h"
 
 
 struct rack_sendmap *
@@ -107,7 +107,7 @@
 {
 	struct rack_sendmap *rsm;
 
-	rsm = tqhash_find(hs, hs->min);
+	rsm = hs->rsm_min;
 	return(rsm);
 }
 
@@ -116,7 +116,7 @@
 {
 	struct rack_sendmap *rsm;
 
-	rsm = tqhash_find(hs, (hs->max - 1));
+	rsm = hs->rsm_max;
 	return (rsm);
 }
 
@@ -224,13 +224,19 @@
 void
 tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type)
 {
-	TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next);
+
 	hs->count--;
 	if (hs->count == 0) {
 		hs->min = hs->max;
+		hs->rsm_max = hs->rsm_min = NULL;
 	} else if (type == REMOVE_TYPE_CUMACK) {
 		hs->min = rsm->r_end;
+		hs->rsm_min = tqhash_next(hs, rsm);
+	} else if (rsm == hs->rsm_max) {
+		hs->rsm_max = tqhash_prev(hs, rsm);
+		hs->max = hs->rsm_max->r_end;
 	}
+	TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next);
 }
 
 int
@@ -240,6 +246,7 @@
 	int inserted = 0;
 	uint32_t ebucket;
 
+#ifdef INVARIANTS
 	if (hs->count > 0) {
 		if ((rsm->r_end - hs->min) >  MAX_ALLOWED_SEQ_RANGE) {
 			return (-1);
@@ -249,6 +256,7 @@
 			return (-2);
 		}
 	}
+#endif
 	rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE;
 	rsm->bindex %= MAX_HASH_ENTRIES;
 	ebucket = rsm->r_end / SEQ_BUCKET_SIZE;
@@ -263,13 +271,17 @@
 		/* Special case */
 		hs->min = rsm->r_start;
 		hs->max = rsm->r_end;
+		hs->rsm_min = hs->rsm_max = rsm;
 		hs->count = 1;
 	} else {
 		hs->count++;
-		if (SEQ_GT(rsm->r_end, hs->max))
+		if (SEQ_GEQ(rsm->r_end, hs->max)) {
 			hs->max = rsm->r_end;
-		if (SEQ_LT(rsm->r_start, hs->min))
+			hs->rsm_max = rsm;
+		} if (SEQ_LEQ(rsm->r_start, hs->min)) {
 			hs->min = rsm->r_start;
+			hs->rsm_min = rsm;
+		}
 	}
 	/* Check the common case of inserting at the end */
 	l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head);
@@ -299,6 +311,7 @@
 		TAILQ_INIT(&hs->ht[i]);
 	}
 	hs->min = hs->max = 0;
+	hs->rsm_min = hs->rsm_max = NULL;
 	hs->count = 0;
 }
 
@@ -339,3 +352,11 @@
 	return (0);
 }
 
+void
+tqhash_update_end(struct tailq_hash *hs, struct rack_sendmap *rsm,
+    uint32_t th_ack)
+{
+	if (hs->max == rsm->r_end)
+		hs->max = th_ack;
+	rsm->r_end = th_ack;
+}
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -48,6 +48,8 @@
 #define RACK_MERGED	    0x080000/* The RSM was merged */
 #define RACK_PMTU_CHG	    0x100000/* The path mtu changed on this guy */
 #define RACK_STRADDLE	    0x200000/* The seq straddles the bucket line */
+#define RACK_WAS_LOST	    0x400000/* Is the rsm considered lost */
+#define RACK_IS_PCM         0x800000/* A PCM measurement is being taken */
 #define RACK_NUM_OF_RETRANS 3
 
 #define RACK_INITIAL_RTO 1000000 /* 1 second in microseconds */
@@ -63,6 +65,7 @@
 	uint32_t r_rtr_bytes;	/* How many bytes have been retransmitted */
 	uint32_t r_flags : 24,	/* Flags as defined above */
 		 r_rtr_cnt : 8;	/* Retran count, index this -1 to get time */
+	uint32_t r_act_rxt_cnt; /* The actual total count of transmits */
 	struct mbuf *m;
 	uint32_t soff;
 	uint32_t orig_m_len;	/* The original mbuf len when we sent (can update) */
@@ -174,6 +177,8 @@
 #define RACK_TO_FRM_PERSIST 5
 #define RACK_TO_FRM_DELACK 6
 
+#define RCV_PATH_RTT_MS 10	/* How many ms between recv path RTT's */
+
 struct rack_opts_stats {
 	uint64_t tcp_rack_tlp_reduce;
 	uint64_t tcp_rack_pace_always;
@@ -232,7 +237,7 @@
 	uint64_t tcp_rack_rtt_use;
 	uint64_t tcp_data_after_close;
 	uint64_t tcp_defer_opt;
-	uint64_t tcp_rxt_clamp;
+	uint64_t tcp_pol_detect;
 	uint64_t tcp_rack_beta;
 	uint64_t tcp_rack_beta_ecn;
 	uint64_t tcp_rack_timer_slop;
@@ -242,6 +247,11 @@
 	uint64_t tcp_rack_pacing_divisor;
 	uint64_t tcp_rack_min_seg;
 	uint64_t tcp_dgp_in_rec;
+	uint64_t tcp_notimely;
+	uint64_t tcp_honor_hpts;
+	uint64_t tcp_dyn_rec;
+	uint64_t tcp_fillcw_rate_cap;
+	uint64_t tcp_pol_mss;
 };
 
 /* RTT shrink reasons */
@@ -263,6 +273,9 @@
 #define TLP_USE_TWO_TWO 3	/* Use 2.2 behavior */
 #define RACK_MIN_BW 8000	/* 64kbps in Bps */
 
+#define CCSP_DIS_MASK	0x0001
+#define HYBRID_DIS_MASK	0x0002
+
 /* Rack quality indicators for GPUT measurements */
 #define RACK_QUALITY_NONE	0	/* No quality stated */
 #define RACK_QUALITY_HIGH 	1	/* A normal measurement of a GP RTT */
@@ -319,6 +332,7 @@
  *
  */
 #define RACK_GP_HIST 4	/* How much goodput history do we maintain? */
+#define RETRAN_CNT_SIZE 16
 
 #define RACK_NUM_FSB_DEBUG 16
 #ifdef _KERNEL
@@ -342,6 +356,26 @@
 
 struct tailq_hash;
 
+struct rack_pcm_info {
+	/* Base send time and s/e filled in by rack_log_output */
+	uint64_t send_time;
+	uint32_t sseq;
+	uint32_t eseq;
+	/* Ack's fill in the rest of the data */
+	uint16_t cnt;
+	/* Maximum acks present */
+	uint16_t cnt_alloc;
+};
+
+#define RACK_DEFAULT_PCM_ARRAY 16
+
+struct rack_pcm_stats {
+	uint32_t sseq;
+	uint32_t eseq;
+	uint64_t ack_time;
+};
+
+
 struct rack_control {
 	/* Second cache line 0x40 from tcp_rack */
 	struct tailq_hash *tqh; /* Tree of all segments Lock(a) */
@@ -402,6 +436,7 @@
 	uint32_t rc_rcvtime;	/* When we last received data */
 	uint32_t rc_num_split_allocs;	/* num split map entries allocated */
 	uint32_t rc_split_limit;	/* Limit from control var can be set by socket opt */
+	uint32_t rack_avg_rec_sends;
 
 	uint32_t rc_last_output_to;
 	uint32_t rc_went_idle_time;
@@ -452,19 +487,45 @@
 	struct tcp_sendfile_track *rc_last_sft;
 	uint32_t lt_seq;	/* Seq at start of lt_bw gauge */
 	int32_t rc_rtt_diff;		/* Timely style rtt diff of our gp_srtt */
-	uint64_t last_sndbytes;
-	uint64_t last_snd_rxt_bytes;
-	uint64_t rxt_threshold;
 	uint64_t last_tmit_time_acked;	/* Holds the last cumack point's last send time */
-	uint32_t last_rnd_rxt_clamped;
-	uint32_t num_of_clamps_applied;
-	uint32_t clamp_options;
-	uint32_t max_clamps;
+	/* Recovery stats */
+	uint64_t time_entered_recovery;
+	uint64_t bytes_acked_in_recovery;
+	/* Policer Detection */
+	uint64_t last_policer_sndbytes;
+	uint64_t last_policer_snd_rxt_bytes;
+	uint64_t policer_bw;
+	uint64_t last_sendtime;
+
+	uint64_t last_gpest;
+	uint64_t last_tm_mark;		/* Last tm mark used */
+	uint64_t fillcw_cap;		/* B/W cap on fill cw */
+	struct rack_pcm_info pcm_i;
+	struct rack_pcm_stats *pcm_s;
+	uint32_t gp_gain_req;		/* Percent off gp gain req */
+	uint32_t last_rnd_of_gp_rise;
+	uint32_t gp_rnd_thresh;
+	uint32_t ss_hi_fs;
+	uint32_t gate_to_fs;
+	uint32_t policer_max_seg;
+	uint32_t pol_bw_comp;
+	uint16_t policer_rxt_threshold;
+	uint8_t  policer_avg_threshold;
+	uint8_t  policer_med_threshold;
+	uint32_t pcm_max_seg;
+	uint32_t last_pcm_round;
+	uint32_t pcm_idle_rounds;
+	uint32_t current_policer_bucket;
+	uint32_t policer_bucket_size;
+	uint32_t idle_snd_una;
+	uint32_t ack_for_idle;
+	uint32_t last_amount_before_rec;
 
 	uint32_t rc_gp_srtt;		/* Current GP srtt */
 	uint32_t rc_prev_gp_srtt;	/* Previous RTT */
 	uint32_t rc_entry_gp_rtt;	/* Entry to PRTT gp-rtt */
 	uint32_t rc_loss_at_start;	/* At measurement window where was our lost value */
+	uint32_t rc_considered_lost;	/* Count in recovery of non-retransmitted bytes considered lost */
 
 	uint32_t dsack_round_end;	/* In a round of seeing a DSACK */
 	uint32_t current_round;		/* Starting at zero */
@@ -491,6 +552,8 @@
 	uint32_t rc_snd_max_at_rto;	/* For non-sack when the RTO occurred what was snd-max */
 	uint32_t rc_out_at_rto;
 	int32_t rc_scw_index;
+	uint32_t max_reduction;
+	uint32_t side_chan_dis_mask; 	/* Bit mask of socket opt's disabled */
 	uint32_t rc_tlp_threshold;	/* Socket option value Lock(a) */
 	uint32_t rc_last_timeout_snduna;
 	uint32_t last_tlp_acked_start;
@@ -503,7 +566,11 @@
 	uint32_t ack_during_sd;
 	uint32_t input_pkt;
 	uint32_t saved_input_pkt;
-	uint32_t saved_rxt_clamp_val; 	/* The encoded value we used to setup clamping */
+	uint32_t saved_policer_val; 	/* The encoded value we used to setup policer detection */
+	uint32_t cleared_app_ack_seq;
+	uint32_t last_rcv_tstmp_for_rtt;
+	uint32_t last_time_of_arm_rcv;
+	uint32_t rto_ssthresh;
 	struct newreno rc_saved_beta;	/*
 					 * For newreno cc:
 					 * rc_saved_cc are the values we have had
@@ -516,10 +583,13 @@
 					 * we also set the flag (if ecn_beta is set) to make
 					 * new_reno do less of a backoff for ecn (think abe).
 					 */
+	uint16_t rc_cnt_of_retran[RETRAN_CNT_SIZE];
 	uint16_t rc_early_recovery_segs;	/* Socket option value Lock(a) */
 	uint16_t rc_reorder_shift;	/* Socket option value Lock(a) */
+	uint8_t policer_del_mss;	/* How many mss during recovery for policer detection */
 	uint8_t rack_per_upper_bound_ss;
 	uint8_t rack_per_upper_bound_ca;
+	uint8_t cleared_app_ack;
 	uint8_t dsack_persist;
 	uint8_t rc_no_push_at_mrtt;	/* No push when we exceed max rtt */
 	uint8_t num_measurements;	/* Number of measurements (up to 0xff, we freeze at 0xff)  */
@@ -528,17 +598,19 @@
 	uint8_t rc_tlp_cwnd_reduce;	/* Socket option value Lock(a) */
 	uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */
 	uint8_t rc_rate_sample_method;
-	uint8_t rc_dgp_bl_agg;		/* Buffer Level aggression during DGP */
+	uint8_t policer_alt_median;	/* Alternate median for policer detection */
 	uint8_t full_dgp_in_rec;	/* Flag to say if we do full DGP in recovery */
 	uint8_t client_suggested_maxseg;	/* Not sure what to do with this yet */
-	uint8_t pacing_discount_amm;	/*
-					 * This is a multipler to the base discount that
-					 * can be used to increase the discount.
-					 */
+	uint8_t use_gp_not_last;
+	uint8_t pacing_method;	       /* If pace_always, what type of pacing */
 	uint8_t already_had_a_excess;
 };
 #endif
 
+#define RACK_PACING_NONE 0x00
+#define RACK_DGP_PACING  0x01
+#define RACK_REG_PACING  0x02
+
 /* DGP with no buffer level mitigations */
 #define DGP_LEVEL0	0
 
@@ -578,6 +650,10 @@
 #define HYBRID_LOG_EXTEND	14	/* We extended the end */
 #define HYBRID_LOG_SENT_LOST	15	/* A closing sent/lost report */
 
+#define LOST_ZERO	1 	/* Zero it out */
+#define LOST_ADD	2	/* Add to it */
+#define LOST_SUB	3	/* Sub from it */
+
 #define RACK_TIMELY_CNT_BOOST 5	/* At 5th increase boost */
 #define RACK_MINRTT_FILTER_TIM 10 /* Seconds */
 
@@ -590,6 +666,7 @@
 					 */
 
 #define MAX_USER_SET_SEG 0x3f	/* The max we can set is 63 which is probably too many */
+#define RACK_FREE_CNT_MAX 0x2f	/* Max our counter can do */
 
 #ifdef _KERNEL
 
@@ -601,8 +678,9 @@
 	    int32_t, int32_t, uint32_t, int, int, uint8_t);	/* Lock(a) */
 	struct tcpcb *rc_tp;	/* The tcpcb Lock(a) */
 	struct inpcb *rc_inp;	/* The inpcb Lock(a) */
-	uint8_t rc_free_cnt;	/* Number of free entries on the rc_free list
-				 * Lock(a) */
+	uint8_t rc_free_cnt : 6,
+		rc_skip_timely : 1,
+		pcm_enabled : 1;	/* Is PCM enabled */
 	uint8_t client_bufferlvl : 3, /* Expected range [0,5]: 0=unset, 1=low/empty */
 		rack_deferred_inited : 1,
 	        /* ******************************************************************** */
@@ -612,11 +690,11 @@
 		shape_rxt_to_pacing_min : 1,
 	        /* ******************************************************************** */
 		rc_ack_required: 1,
-		r_pacing_discount : 1;
+		r_use_hpts_min : 1;
 	uint8_t no_prr_addback : 1,
 		gp_ready : 1,
 		defer_options: 1,
-		excess_rxt_on: 1,	/* Are actions on for excess retransmissions? */
+		dis_lt_bw : 1,
 		rc_ack_can_sendout_data: 1, /*
 					     * If set it will override pacing restrictions on not sending
 					     * data when the pacing timer is running. I.e. you set this
@@ -659,7 +737,7 @@
 		 r_rack_hw_rate_caps: 1,
 		 r_up_only: 1,
 		 r_via_fill_cw : 1,
-		 r_fill_less_agg : 1;
+		 r_rcvpath_rtt_up : 1;
 
 	uint8_t rc_user_set_max_segs : 7,	/* Socket option value Lock(a) */
 		rc_fillcw_apply_discount;
@@ -673,7 +751,7 @@
 		rc_highly_buffered: 1,		/* The path is highly buffered */
 		rc_dragged_bottom: 1,
 		rc_pace_dnd : 1,		/* The pace do not disturb bit */
-		rc_avali2 : 1,
+		rc_initial_ss_comp : 1,
 		rc_gp_filled : 1,
 		rc_hw_nobuf : 1;
 	uint8_t r_state : 4, 	/* Current rack state Lock(a) */
@@ -696,8 +774,8 @@
 	uint8_t app_limited_needs_set : 1,
 		use_fixed_rate : 1,
 		rc_has_collapsed : 1,
-		r_cwnd_was_clamped : 1,
-		r_clamped_gets_lower : 1,
+		use_lesser_lt_bw : 1,
+		cspr_is_fcc : 1,
 		rack_hdrw_pacing : 1,  /* We are doing Hardware pacing */
 		rack_hdw_pace_ena : 1, /* Is hardware pacing enabled? */
 		rack_attempt_hdwr_pace : 1; /* Did we attempt hdwr pacing (if allowed) */
@@ -722,7 +800,14 @@
 		r_persist_lt_bw_off : 1,
 		r_collapse_point_valid : 1,
 		dgp_on : 1;
-	uint16_t rc_init_win : 8,
+	uint16_t rto_from_rec: 1,
+		avail_bit: 1,
+		pcm_in_progress: 1,
+		pcm_needed: 1,
+		policer_detect_on: 1,	/* Are we detecting policers? */
+		rc_policer_detected : 1,	/* We are beiing policed */
+		rc_policer_should_pace : 1,	/* The sizing algo thinks we should pace */
+		rc_sendvars_notset : 1,		/* Inside rack_init send variables (snd_max/una etc) were not set */
 		rc_gp_rtt_set : 1,
 		rc_gp_dyn_mul : 1,
 		rc_gp_saw_rec : 1,
@@ -735,5 +820,9 @@
 	struct rack_control r_ctl;
 }        __aligned(CACHE_LINE_SIZE);
 
+
+void rack_update_pcm_ack(struct tcp_rack *rack, int was_cumack,
+	uint32_t ss, uint32_t es);
+
 #endif
 #endif
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -287,18 +287,29 @@
 static volatile uint32_t number_of_tcp_connections_pacing = 0;
 static uint32_t shadow_num_connections = 0;
 static counter_u64_t tcp_pacing_failures;
+static counter_u64_t tcp_dgp_failures;
+static uint32_t shadow_tcp_pacing_dgp = 0;
+static volatile uint32_t number_of_dgp_connections = 0;
 
 static int tcp_pacing_limit = 10000;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pacing_limit, CTLFLAG_RW,
     &tcp_pacing_limit, 1000,
     "If the TCP stack does pacing, is there a limit (-1 = no, 0 = no pacing N = number of connections)");
 
+static int tcp_dgp_limit = -1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, dgp_limit, CTLFLAG_RW,
+    &tcp_dgp_limit, -1,
+    "If the TCP stack does DGP, is there a limit (-1 = no, 0 = no dgp N = number of connections)");
+
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pacing_count, CTLFLAG_RD,
     &shadow_num_connections, 0, "Number of TCP connections being paced");
 
 SYSCTL_COUNTER_U64(_net_inet_tcp, OID_AUTO, pacing_failures, CTLFLAG_RD,
     &tcp_pacing_failures, "Number of times we failed to enable pacing to avoid exceeding the limit");
 
+SYSCTL_COUNTER_U64(_net_inet_tcp, OID_AUTO, dgp_failures, CTLFLAG_RD,
+    &tcp_dgp_failures, "Number of times we failed to enable dgp to avoid exceeding the limit");
+
 static int	tcp_log_debug = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
     &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
@@ -1571,6 +1582,7 @@
 	tcp_uncomp_total = counter_u64_alloc(M_WAITOK);
 	tcp_bad_csums = counter_u64_alloc(M_WAITOK);
 	tcp_pacing_failures = counter_u64_alloc(M_WAITOK);
+	tcp_dgp_failures = counter_u64_alloc(M_WAITOK);
 #ifdef TCPPCAP
 	tcp_pcap_init();
 #endif
@@ -4022,6 +4034,43 @@
 	}
 }
 
+int
+tcp_incr_dgp_pacing_cnt(void)
+{
+	if ((tcp_dgp_limit == -1) ||
+	    (tcp_dgp_limit > number_of_dgp_connections)) {
+		atomic_fetchadd_int(&number_of_dgp_connections, 1);
+		shadow_tcp_pacing_dgp = number_of_dgp_connections;
+		return (1);
+	} else {
+		counter_u64_add(tcp_dgp_failures, 1);
+		return (0);
+	}
+}
+
+static uint8_t tcp_dgp_warning = 0;
+
+void
+tcp_dec_dgp_pacing_cnt(void)
+{
+	uint32_t ret;
+
+	ret = atomic_fetchadd_int(&number_of_dgp_connections, -1);
+	shadow_tcp_pacing_dgp = number_of_dgp_connections;
+	KASSERT(ret != 0, ("number_of_dgp_connections -1 would cause wrap?"));
+	if (ret == 0) {
+		if (tcp_dgp_limit != -1) {
+			printf("Warning all DGP is now disabled, count decrements invalidly!\n");
+			tcp_dgp_limit = 0;
+			tcp_dgp_warning = 1;
+		} else if (tcp_dgp_warning == 0) {
+			printf("Warning DGP pacing is invalid, invalid decrement\n");
+			tcp_dgp_warning = 1;
+		}
+	}
+
+}
+
 static uint8_t tcp_pacing_warning = 0;
 
 void
@@ -4541,7 +4590,7 @@
 	if (tp->t_tcpreq_req) {
 		for(i = 0, allocated = 0; i < MAX_TCP_TRK_REQ; i++) {
 			fil = &tp->t_tcpreq_info[i];
-			if (fil->flags != TCP_TRK_TRACK_FLG_USED)
+			if ((fil->flags & TCP_TRK_TRACK_FLG_USED) == 0)
 				continue;
 			if ((fil->timestamp == req->timestamp) &&
 			    (fil->start == req->start) &&
@@ -4573,6 +4622,7 @@
 			allocated = 1;
 			fil->flags = TCP_TRK_TRACK_FLG_USED;
 			fil->timestamp = req->timestamp;
+			fil->playout_ms = req->playout_ms;
 			fil->localtime = ts;
 			fil->start = req->start;
 			if (req->flags & TCP_LOG_HTTPD_RANGE_END) {
@@ -4589,7 +4639,10 @@
 			fil->sbcc_at_s = tptosocket(tp)->so_snd.sb_ccc;
 			fil->start_seq = tp->snd_una +
 			    tptosocket(tp)->so_snd.sb_ccc;
-			fil->end_seq = (fil->start_seq + ((uint32_t)(fil->end - fil->start)));
+			if (req->flags & TCP_LOG_HTTPD_RANGE_END)
+				fil->end_seq = (fil->start_seq + ((uint32_t)(fil->end - fil->start)));
+			else
+				fil->end_seq = 0;
 			if (tptosocket(tp)->so_snd.sb_tls_info) {
 				/*
 				 * This session is doing TLS. Take a swag guess
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1032,7 +1032,10 @@
 
 	if (!solisten_enqueue(so, SS_ISCONNECTED))
 		tp->t_flags |= TF_SONOTCONN;
-
+	/* Can we inherit anything from the listener? */
+	if (tp->t_fb->tfb_inherit != NULL) {
+		(*tp->t_fb->tfb_inherit)(tp, sotoinpcb(lso));
+	}
 	return (so);
 
 allocfail:
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -179,6 +179,12 @@
 		goto out;
 	}
 	tp->t_state = TCPS_CLOSED;
+	/* Can we inherit anything from the listener? */
+	if ((so->so_listen != NULL) &&
+	    (so->so_listen->so_pcb != NULL) &&
+	    (tp->t_fb->tfb_inherit != NULL)) {
+		(*tp->t_fb->tfb_inherit)(tp, sotoinpcb(so->so_listen));
+	}
 	tcp_bblog_pru(tp, PRU_ATTACH, error);
 	INP_WUNLOCK(inp);
 	TCPSTATES_INC(TCPS_CLOSED);
@@ -1601,6 +1607,7 @@
 	ti->tcpi_rcv_numsacks = tp->rcv_numsacks;
 	ti->tcpi_rcv_adv = tp->rcv_adv;
 	ti->tcpi_dupacks = tp->t_dupacks;
+	ti->tcpi_rttmin = tp->t_rttlow;
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE) {
 		ti->tcpi_options |= TCPI_OPT_TOE;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -138,7 +138,8 @@
 #define TCP_TRK_TRACK_FLG_OPEN  0x02	/* End is not valid (open range request) */
 #define TCP_TRK_TRACK_FLG_SEQV  0x04	/* We had a sendfile that touched it  */
 #define TCP_TRK_TRACK_FLG_COMP  0x08	/* Sendfile as placed the last bits (range req only) */
-#define TCP_TRK_TRACK_FLG_FSND	 0x10	/* First send has been done into the seq space */
+#define TCP_TRK_TRACK_FLG_FSND	0x10	/* First send has been done into the seq space */
+#define TCP_TRK_TRACK_FLG_LSND	0x20	/* We were able to set the Last Sent */
 #define MAX_TCP_TRK_REQ 5		/* Max we will have at once */
 
 struct tcp_sendfile_track {
@@ -151,11 +152,14 @@
 	uint64_t cspr;		/* Client suggested pace rate */
 	uint64_t sent_at_fs;	/* What was t_sndbytes as we begun sending */
 	uint64_t rxt_at_fs;	/* What was t_snd_rxt_bytes as we begun sending */
+	uint64_t sent_at_ls;	/* Sent value at the last send */
+	uint64_t rxt_at_ls;	/* Retransmit value at the last send */
 	tcp_seq start_seq;	/* First TCP Seq assigned */
 	tcp_seq end_seq;	/* If range req last seq */
 	uint32_t flags;		/* Type of request open etc */
 	uint32_t sbcc_at_s;	/* When we allocate what is the sb_cc */
 	uint32_t hint_maxseg;	/* Client hinted maxseg */
+	uint32_t playout_ms;	/* Client playout ms */
 	uint32_t hybrid_flags;	/* Hybrid flags on this request */
 };
 
@@ -623,6 +627,8 @@
 	void	(*tfb_switch_failed)(struct tcpcb *);
 	bool	(*tfb_early_wake_check)(struct tcpcb *);
 	int     (*tfb_compute_pipe)(struct tcpcb *tp);
+	int     (*tfb_stack_info)(struct tcpcb *tp, struct stack_specific_info *);
+	void	(*tfb_inherit)(struct tcpcb *tp, struct inpcb *h_inp);
 	volatile uint32_t tfb_refcnt;
 	uint32_t  tfb_flags;
 	uint8_t	tfb_id;
@@ -788,7 +794,7 @@
 #define	TF_TSO		0x01000000	/* TSO enabled on this connection */
 #define	TF_TOE		0x02000000	/* this connection is offloaded */
 #define	TF_CLOSED	0x04000000	/* close(2) called on socket */
-#define	TF_UNUSED1	0x08000000	/* unused */
+#define TF_SENTSYN      0x08000000      /* At least one syn has been sent */
 #define	TF_LRD		0x10000000	/* Lost Retransmission Detection */
 #define	TF_CONGRECOVERY	0x20000000	/* congestion recovery mode */
 #define	TF_WASCRECOVERY	0x40000000	/* was in congestion recovery */
@@ -1501,6 +1507,8 @@
 int	 tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
     size_t seed_len);
 int tcp_can_enable_pacing(void);
+int tcp_incr_dgp_pacing_cnt(void);
+void tcp_dec_dgp_pacing_cnt(void);
 void tcp_decrement_paced_conn(void);
 void tcp_change_time_units(struct tcpcb *, int);
 void tcp_handle_orphaned_packets(struct tcpcb *);