Index: head/sys/netinet/tcp_log_buf.c
===================================================================
--- head/sys/netinet/tcp_log_buf.c	(revision 356413)
+++ head/sys/netinet/tcp_log_buf.c	(revision 356414)
@@ -1,2438 +1,2639 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2016-2018 Netflix, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/arb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/qmath.h>
 #include <sys/queue.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/tree.h>
-#include <sys/stats.h>
+#include <sys/stats.h> /* Must come after qmath.h and tree.h */
 #include <sys/counter.h>
 
 #include <dev/tcp_log/tcp_log_dev.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_log_buf.h>
 
 /* Default expiry time */
 #define	TCP_LOG_EXPIRE_TIME	((sbintime_t)60 * SBT_1S)
 
 /* Max interval at which to run the expiry timer */
 #define	TCP_LOG_EXPIRE_INTVL	((sbintime_t)5 * SBT_1S)
 
 bool	tcp_log_verbose;
 static uma_zone_t tcp_log_bucket_zone, tcp_log_node_zone, tcp_log_zone;
 static int	tcp_log_session_limit = TCP_LOG_BUF_DEFAULT_SESSION_LIMIT;
 static uint32_t	tcp_log_version = TCP_LOG_BUF_VER;
 RB_HEAD(tcp_log_id_tree, tcp_log_id_bucket);
 static struct tcp_log_id_tree tcp_log_id_head;
 static STAILQ_HEAD(, tcp_log_id_node) tcp_log_expireq_head =
     STAILQ_HEAD_INITIALIZER(tcp_log_expireq_head);
 static struct mtx tcp_log_expireq_mtx;
 static struct callout tcp_log_expireq_callout;
 static u_long tcp_log_auto_ratio = 0;
 static volatile u_long tcp_log_auto_ratio_cur = 0;
 static uint32_t tcp_log_auto_mode = TCP_LOG_STATE_TAIL;
 static bool tcp_log_auto_all = false;
+static uint32_t tcp_disable_all_bb_logs = 0;
 
 RB_PROTOTYPE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp)
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, bb, CTLFLAG_RW, 0, "TCP Black Box controls");
 
 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_verbose, CTLFLAG_RW, &tcp_log_verbose,
     0, "Force verbose logging for TCP traces");
 
 SYSCTL_INT(_net_inet_tcp_bb, OID_AUTO, log_session_limit,
     CTLFLAG_RW, &tcp_log_session_limit, 0,
     "Maximum number of events maintained for each TCP session");
 
 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_global_limit, CTLFLAG_RW,
     &tcp_log_zone, "Maximum number of events maintained for all TCP sessions");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_global_entries, CTLFLAG_RD,
     &tcp_log_zone, "Current number of events maintained for all TCP sessions");
 
 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_limit, CTLFLAG_RW,
     &tcp_log_bucket_zone, "Maximum number of log IDs");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_entries, CTLFLAG_RD,
     &tcp_log_bucket_zone, "Current number of log IDs");
 
 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_limit, CTLFLAG_RW,
     &tcp_log_node_zone, "Maximum number of tcpcbs with log IDs");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_entries, CTLFLAG_RD,
     &tcp_log_node_zone, "Current number of tcpcbs with log IDs");
 
 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, &tcp_log_version,
     0, "Version of log formats exported");
 
+SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, disable_all, CTLFLAG_RW,
+    &tcp_disable_all_bb_logs, TCP_LOG_STATE_HEAD_AUTO,
+    "Disable all BB logging for all connections");
+
 SYSCTL_ULONG(_net_inet_tcp_bb, OID_AUTO, log_auto_ratio, CTLFLAG_RW,
     &tcp_log_auto_ratio, 0, "Do auto capturing for 1 out of N sessions");
 
 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_auto_mode, CTLFLAG_RW,
     &tcp_log_auto_mode, TCP_LOG_STATE_HEAD_AUTO,
     "Logging mode for auto-selected sessions (default is TCP_LOG_STATE_HEAD_AUTO)");
 
 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_auto_all, CTLFLAG_RW,
     &tcp_log_auto_all, false,
     "Auto-select from all sessions (rather than just those with IDs)");
 
 #ifdef TCPLOG_DEBUG_COUNTERS
 counter_u64_t tcp_log_queued;
 counter_u64_t tcp_log_que_fail1;
 counter_u64_t tcp_log_que_fail2;
 counter_u64_t tcp_log_que_fail3;
 counter_u64_t tcp_log_que_fail4;
 counter_u64_t tcp_log_que_fail5;
 counter_u64_t tcp_log_que_copyout;
 counter_u64_t tcp_log_que_read;
 counter_u64_t tcp_log_que_freed;
 
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, queued, CTLFLAG_RD,
     &tcp_log_queued, "Number of entries queued");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail1, CTLFLAG_RD,
     &tcp_log_que_fail1, "Number of entries queued but fail 1");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail2, CTLFLAG_RD,
     &tcp_log_que_fail2, "Number of entries queued but fail 2");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail3, CTLFLAG_RD,
     &tcp_log_que_fail3, "Number of entries queued but fail 3");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail4, CTLFLAG_RD,
     &tcp_log_que_fail4, "Number of entries queued but fail 4");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail5, CTLFLAG_RD,
     &tcp_log_que_fail5, "Number of entries queued but fail 4");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, copyout, CTLFLAG_RD,
     &tcp_log_que_copyout, "Number of entries copied out");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, read, CTLFLAG_RD,
     &tcp_log_que_read, "Number of entries read from the queue");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, freed, CTLFLAG_RD,
     &tcp_log_que_freed, "Number of entries freed after reading");
 #endif
 
 #ifdef INVARIANTS
 #define	TCPLOG_DEBUG_RINGBUF
 #endif
+/* Number of requests to consider a PBCID "active". */
+#define	ACTIVE_REQUEST_COUNT	10
 
+/* Statistic tracking for "active" PBCIDs. */
+static counter_u64_t tcp_log_pcb_ids_cur;
+static counter_u64_t tcp_log_pcb_ids_tot;
+
+SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_cur, CTLFLAG_RD,
+    &tcp_log_pcb_ids_cur, "Number of pcb IDs allocated in the system");
+SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_tot, CTLFLAG_RD,
+    &tcp_log_pcb_ids_tot, "Total number of pcb IDs that have been allocated");
+
 struct tcp_log_mem
 {
 	STAILQ_ENTRY(tcp_log_mem) tlm_queue;
 	struct tcp_log_buffer	tlm_buf;
 	struct tcp_log_verbose	tlm_v;
 #ifdef TCPLOG_DEBUG_RINGBUF
 	volatile int		tlm_refcnt;
 #endif
 };
 
 /* 60 bytes for the header, + 16 bytes for padding */
 static uint8_t	zerobuf[76];
 
 /*
  * Lock order:
  * 1. TCPID_TREE
  * 2. TCPID_BUCKET
  * 3. INP
  *
  * Rules:
  * A. You need a lock on the Tree to add/remove buckets.
  * B. You need a lock on the bucket to add/remove nodes from the bucket.
  * C. To change information in a node, you need the INP lock if the tln_closed
  *    field is false. Otherwise, you need the bucket lock. (Note that the
  *    tln_closed field can change at any point, so you need to recheck the
  *    entry after acquiring the INP lock.)
  * D. To remove a node from the bucket, you must have that entry locked,
  *    according to the criteria of Rule C. Also, the node must not be on
  *    the expiry queue.
  * E. The exception to C is the expiry queue fields, which are locked by
  *    the TCPLOG_EXPIREQ lock.
  *
  * Buckets have a reference count. Each node is a reference. Further,
  * other callers may add reference counts to keep a bucket from disappearing.
  * You can add a reference as long as you own a lock sufficient to keep the
  * bucket from disappearing. For example, a common use is:
  *   a. Have a locked INP, but need to lock the TCPID_BUCKET.
  *   b. Add a refcount on the bucket. (Safe because the INP lock prevents
  *      the TCPID_BUCKET from going away.)
  *   c. Drop the INP lock.
  *   d. Acquire a lock on the TCPID_BUCKET.
  *   e. Acquire a lock on the INP.
  *   f. Drop the refcount on the bucket.
  *      (At this point, the bucket may disappear.)
  *
  * Expire queue lock:
  * You can acquire this with either the bucket or INP lock. Don't reverse it.
  * When the expire code has committed to freeing a node, it resets the expiry
  * time to SBT_MAX. That is the signal to everyone else that they should
  * leave that node alone.
  */
 static struct rwlock tcp_id_tree_lock;
 #define	TCPID_TREE_WLOCK()		rw_wlock(&tcp_id_tree_lock)
 #define	TCPID_TREE_RLOCK()		rw_rlock(&tcp_id_tree_lock)
 #define	TCPID_TREE_UPGRADE()		rw_try_upgrade(&tcp_id_tree_lock)
 #define	TCPID_TREE_WUNLOCK()		rw_wunlock(&tcp_id_tree_lock)
 #define	TCPID_TREE_RUNLOCK()		rw_runlock(&tcp_id_tree_lock)
 #define	TCPID_TREE_WLOCK_ASSERT()	rw_assert(&tcp_id_tree_lock, RA_WLOCKED)
 #define	TCPID_TREE_RLOCK_ASSERT()	rw_assert(&tcp_id_tree_lock, RA_RLOCKED)
 #define	TCPID_TREE_UNLOCK_ASSERT()	rw_assert(&tcp_id_tree_lock, RA_UNLOCKED)
 
 #define	TCPID_BUCKET_LOCK_INIT(tlb)	mtx_init(&((tlb)->tlb_mtx), "tcp log id bucket", NULL, MTX_DEF)
 #define	TCPID_BUCKET_LOCK_DESTROY(tlb)	mtx_destroy(&((tlb)->tlb_mtx))
 #define	TCPID_BUCKET_LOCK(tlb)		mtx_lock(&((tlb)->tlb_mtx))
 #define	TCPID_BUCKET_UNLOCK(tlb)	mtx_unlock(&((tlb)->tlb_mtx))
 #define	TCPID_BUCKET_LOCK_ASSERT(tlb)	mtx_assert(&((tlb)->tlb_mtx), MA_OWNED)
 #define	TCPID_BUCKET_UNLOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_NOTOWNED)
 
 #define	TCPID_BUCKET_REF(tlb)		refcount_acquire(&((tlb)->tlb_refcnt))
 #define	TCPID_BUCKET_UNREF(tlb)		refcount_release(&((tlb)->tlb_refcnt))
 
 #define	TCPLOG_EXPIREQ_LOCK()		mtx_lock(&tcp_log_expireq_mtx)
 #define	TCPLOG_EXPIREQ_UNLOCK()		mtx_unlock(&tcp_log_expireq_mtx)
 
 SLIST_HEAD(tcp_log_id_head, tcp_log_id_node);
 
 struct tcp_log_id_bucket
 {
 	/*
 	 * tlb_id must be first. This lets us use strcmp on
 	 * (struct tcp_log_id_bucket *) and (char *) interchangeably.
 	 */
 	char				tlb_id[TCP_LOG_ID_LEN];
+	char				tlb_tag[TCP_LOG_TAG_LEN];
 	RB_ENTRY(tcp_log_id_bucket)	tlb_rb;
 	struct tcp_log_id_head		tlb_head;
 	struct mtx			tlb_mtx;
 	volatile u_int			tlb_refcnt;
+	volatile u_int			tlb_reqcnt;
+	uint32_t			tlb_loglimit;
+	uint8_t				tlb_logstate;
 };
 
 struct tcp_log_id_node
 {
 	SLIST_ENTRY(tcp_log_id_node) tln_list;
 	STAILQ_ENTRY(tcp_log_id_node) tln_expireq; /* Locked by the expireq lock */
 	sbintime_t		tln_expiretime;	/* Locked by the expireq lock */
 
 	/*
 	 * If INP is NULL, that means the connection has closed. We've
 	 * saved the connection endpoint information and the log entries
 	 * in the tln_ie and tln_entries members. We've also saved a pointer
 	 * to the enclosing bucket here. If INP is not NULL, the information is
 	 * in the PCB and not here.
 	 */
 	struct inpcb		*tln_inp;
 	struct tcpcb		*tln_tp;
 	struct tcp_log_id_bucket *tln_bucket;
 	struct in_endpoints	tln_ie;
 	struct tcp_log_stailq	tln_entries;
 	int			tln_count;
 	volatile int		tln_closed;
 	uint8_t			tln_af;
 };
 
 enum tree_lock_state {
 	TREE_UNLOCKED = 0,
 	TREE_RLOCKED,
 	TREE_WLOCKED,
 };
 
 /* Do we want to select this session for auto-logging? */
 static __inline bool
 tcp_log_selectauto(void)
 {
 
 	/*
 	 * If we are doing auto-capturing, figure out whether we will capture
 	 * this session.
 	 */
 	if (tcp_log_auto_ratio &&
+	    (tcp_disable_all_bb_logs == 0) &&
 	    (atomic_fetchadd_long(&tcp_log_auto_ratio_cur, 1) %
 	    tcp_log_auto_ratio) == 0)
 		return (true);
 	return (false);
 }
 
 static __inline int
 tcp_log_id_cmp(struct tcp_log_id_bucket *a, struct tcp_log_id_bucket *b)
 {
 	KASSERT(a != NULL, ("tcp_log_id_cmp: argument a is unexpectedly NULL"));
 	KASSERT(b != NULL, ("tcp_log_id_cmp: argument b is unexpectedly NULL"));
 	return strncmp(a->tlb_id, b->tlb_id, TCP_LOG_ID_LEN);
 }
 
 RB_GENERATE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp)
 
 static __inline void
 tcp_log_id_validate_tree_lock(int tree_locked)
 {
 
 #ifdef INVARIANTS
 	switch (tree_locked) {
 	case TREE_WLOCKED:
 		TCPID_TREE_WLOCK_ASSERT();
 		break;
 	case TREE_RLOCKED:
 		TCPID_TREE_RLOCK_ASSERT();
 		break;
 	case TREE_UNLOCKED:
 		TCPID_TREE_UNLOCK_ASSERT();
 		break;
 	default:
 		kassert_panic("%s:%d: unknown tree lock state", __func__,
 		    __LINE__);
 	}
 #endif
 }
 
 static __inline void
 tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb)
 {
 
 	TCPID_TREE_WLOCK_ASSERT();
 	KASSERT(SLIST_EMPTY(&tlb->tlb_head),
 	    ("%s: Attempt to remove non-empty bucket", __func__));
 	if (RB_REMOVE(tcp_log_id_tree, &tcp_log_id_head, tlb) == NULL) {
 #ifdef INVARIANTS
 		kassert_panic("%s:%d: error removing element from tree",
 			    __func__, __LINE__);
 #endif
 	}
 	TCPID_BUCKET_LOCK_DESTROY(tlb);
+	counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1);
 	uma_zfree(tcp_log_bucket_zone, tlb);
 }
 
 /*
  * Call with a referenced and locked bucket.
  * Will return true if the bucket was freed; otherwise, false.
  * tlb: The bucket to unreference.
  * tree_locked: A pointer to the state of the tree lock. If the tree lock
  *    state changes, the function will update it.
  * inp: If not NULL and the function needs to drop the inp lock to relock the
  *    tree, it will do so. (The caller must ensure inp will not become invalid,
  *    probably by holding a reference to it.)
  */
 static bool
 tcp_log_unref_bucket(struct tcp_log_id_bucket *tlb, int *tree_locked,
     struct inpcb *inp)
 {
 
 	KASSERT(tlb != NULL, ("%s: called with NULL tlb", __func__));
 	KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked",
 	    __func__));
 
 	tcp_log_id_validate_tree_lock(*tree_locked);
 
 	/*
 	 * Did we hold the last reference on the tlb? If so, we may need
 	 * to free it. (Note that we can realistically only execute the
 	 * loop twice: once without a write lock and once with a write
 	 * lock.)
 	 */
 	while (TCPID_BUCKET_UNREF(tlb)) {
 		/*
 		 * We need a write lock on the tree to free this.
 		 * If we can upgrade the tree lock, this is "easy". If we
 		 * can't upgrade the tree lock, we need to do this the
 		 * "hard" way: unwind all our locks and relock everything.
 		 * In the meantime, anything could have changed. We even
 		 * need to validate that we still need to free the bucket.
 		 */
 		if (*tree_locked == TREE_RLOCKED && TCPID_TREE_UPGRADE())
 			*tree_locked = TREE_WLOCKED;
 		else if (*tree_locked != TREE_WLOCKED) {
 			TCPID_BUCKET_REF(tlb);
 			if (inp != NULL)
 				INP_WUNLOCK(inp);
 			TCPID_BUCKET_UNLOCK(tlb);
 			if (*tree_locked == TREE_RLOCKED)
 				TCPID_TREE_RUNLOCK();
 			TCPID_TREE_WLOCK();
 			*tree_locked = TREE_WLOCKED;
 			TCPID_BUCKET_LOCK(tlb);
 			if (inp != NULL)
 				INP_WLOCK(inp);
 			continue;
 		}
 
 		/*
 		 * We have an empty bucket and a write lock on the tree.
 		 * Remove the empty bucket.
 		 */
 		tcp_log_remove_bucket(tlb);
 		return (true);
 	}
 	return (false);
 }
 
 /*
  * Call with a locked bucket. This function will release the lock on the
  * bucket before returning.
  *
  * The caller is responsible for freeing the tp->t_lin/tln node!
  *
  * Note: one of tp or both tlb and tln must be supplied.
  *
  * inp: A pointer to the inp. If the function needs to drop the inp lock to
  *    acquire the tree write lock, it will do so. (The caller must ensure inp
  *    will not become invalid, probably by holding a reference to it.)
  * tp: A pointer to the tcpcb. (optional; if specified, tlb and tln are ignored)
  * tlb: A pointer to the bucket. (optional; ignored if tp is specified)
  * tln: A pointer to the node. (optional; ignored if tp is specified)
  * tree_locked: A pointer to the state of the tree lock. If the tree lock
  *    state changes, the function will update it.
  *
  * Will return true if the INP lock was reacquired; otherwise, false.
  */
 static bool
 tcp_log_remove_id_node(struct inpcb *inp, struct tcpcb *tp,
     struct tcp_log_id_bucket *tlb, struct tcp_log_id_node *tln,
     int *tree_locked)
 {
 	int orig_tree_locked;
 
 	KASSERT(tp != NULL || (tlb != NULL && tln != NULL),
 	    ("%s: called with tp=%p, tlb=%p, tln=%p", __func__,
 	    tp, tlb, tln));
 	KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked",
 	    __func__));
 
 	if (tp != NULL) {
 		tlb = tp->t_lib;
 		tln = tp->t_lin;
 		KASSERT(tlb != NULL, ("%s: unexpectedly NULL tlb", __func__));
 		KASSERT(tln != NULL, ("%s: unexpectedly NULL tln", __func__));
 	}
 
 	tcp_log_id_validate_tree_lock(*tree_locked);
 	TCPID_BUCKET_LOCK_ASSERT(tlb);
 
 	/*
 	 * Remove the node, clear the log bucket and node from the TCPCB, and
 	 * decrement the bucket refcount. In the process, if this is the
 	 * last reference, the bucket will be freed.
 	 */
 	SLIST_REMOVE(&tlb->tlb_head, tln, tcp_log_id_node, tln_list);
 	if (tp != NULL) {
 		tp->t_lib = NULL;
 		tp->t_lin = NULL;
 	}
 	orig_tree_locked = *tree_locked;
 	if (!tcp_log_unref_bucket(tlb, tree_locked, inp))
 		TCPID_BUCKET_UNLOCK(tlb);
 	return (*tree_locked != orig_tree_locked);
 }
 
 #define	RECHECK_INP_CLEAN(cleanup)	do {			\
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {	\
 		rv = ECONNRESET;				\
 		cleanup;					\
 		goto done;					\
 	}							\
 	tp = intotcpcb(inp);					\
 } while (0)
 
 #define	RECHECK_INP()	RECHECK_INP_CLEAN(/* noop */)
 
 static void
 tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp)
 {
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 #ifdef STATS
 	if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL)
 		(void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id));
 #endif
 }
 
+static void
+tcp_log_increment_reqcnt(struct tcp_log_id_bucket *tlb)
+{
+
+	atomic_fetchadd_int(&tlb->tlb_reqcnt, 1);
+}
+
 /*
+ * Associate the specified tag with a particular TCP log ID.
+ * Called with INPCB locked. Returns with it unlocked.
+ * Returns 0 on success or EOPNOTSUPP if the connection has no TCP log ID.
+ */
+int
+tcp_log_set_tag(struct tcpcb *tp, char *tag)
+{
+	struct tcp_log_id_bucket *tlb;
+	int tree_locked;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tree_locked = TREE_UNLOCKED;
+	tlb = tp->t_lib;
+	if (tlb == NULL) {
+		INP_WUNLOCK(tp->t_inpcb);
+		return (EOPNOTSUPP);
+	}
+
+	TCPID_BUCKET_REF(tlb);
+	INP_WUNLOCK(tp->t_inpcb);
+	TCPID_BUCKET_LOCK(tlb);
+	strlcpy(tlb->tlb_tag, tag, TCP_LOG_TAG_LEN);
+	if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL))
+		TCPID_BUCKET_UNLOCK(tlb);
+
+	if (tree_locked == TREE_WLOCKED) {
+		TCPID_TREE_WLOCK_ASSERT();
+		TCPID_TREE_WUNLOCK();
+	} else if (tree_locked == TREE_RLOCKED) {
+		TCPID_TREE_RLOCK_ASSERT();
+		TCPID_TREE_RUNLOCK();
+	} else
+		TCPID_TREE_UNLOCK_ASSERT();
+
+	return (0);
+}
+
+/*
  * Set the TCP log ID for a TCPCB.
  * Called with INPCB locked. Returns with it unlocked.
  */
 int
 tcp_log_set_id(struct tcpcb *tp, char *id)
 {
 	struct tcp_log_id_bucket *tlb, *tmp_tlb;
 	struct tcp_log_id_node *tln;
 	struct inpcb *inp;
 	int tree_locked, rv;
 	bool bucket_locked;
 
 	tlb = NULL;
 	tln = NULL;
 	inp = tp->t_inpcb;
 	tree_locked = TREE_UNLOCKED;
 	bucket_locked = false;
 
 restart:
 	INP_WLOCK_ASSERT(inp);
 
 	/* See if the ID is unchanged. */
 	if ((tp->t_lib != NULL && !strcmp(tp->t_lib->tlb_id, id)) ||
 	    (tp->t_lib == NULL && *id == 0)) {
+		if (tp->t_lib != NULL) {
+			tcp_log_increment_reqcnt(tp->t_lib);
+			if ((tp->t_lib->tlb_logstate) &&
+			    (tp->t_log_state_set == 0)) {
+				/* Clone in any logging */
+
+				tp->t_logstate = tp->t_lib->tlb_logstate;
+			}
+			if ((tp->t_lib->tlb_loglimit) &&
+			    (tp->t_log_state_set == 0)) {
+				/* We also have a limit set */
+
+				tp->t_loglimit = tp->t_lib->tlb_loglimit;
+			}
+		}
 		rv = 0;
 		goto done;
 	}
 
 	/*
 	 * If the TCPCB had a previous ID, we need to extricate it from
 	 * the previous list.
 	 *
 	 * Drop the TCPCB lock and lock the tree and the bucket.
 	 * Because this is called in the socket context, we (theoretically)
 	 * don't need to worry about the INPCB completely going away
 	 * while we are gone.
 	 */
 	if (tp->t_lib != NULL) {
 		tlb = tp->t_lib;
 		TCPID_BUCKET_REF(tlb);
 		INP_WUNLOCK(inp);
 
 		if (tree_locked == TREE_UNLOCKED) {
 			TCPID_TREE_RLOCK();
 			tree_locked = TREE_RLOCKED;
 		}
 		TCPID_BUCKET_LOCK(tlb);
 		bucket_locked = true;
 		INP_WLOCK(inp);
 
 		/*
 		 * Unreference the bucket. If our bucket went away, it is no
 		 * longer locked or valid.
 		 */
 		if (tcp_log_unref_bucket(tlb, &tree_locked, inp)) {
 			bucket_locked = false;
 			tlb = NULL;
 		}
 
 		/* Validate the INP. */
 		RECHECK_INP();
 
 		/*
 		 * Evaluate whether the bucket changed while we were unlocked.
 		 *
 		 * Possible scenarios here:
 		 * 1. Bucket is unchanged and the same one we started with.
 		 * 2. The TCPCB no longer has a bucket and our bucket was
 		 *    freed.
 		 * 3. The TCPCB has a new bucket, whether ours was freed.
 		 * 4. The TCPCB no longer has a bucket and our bucket was
 		 *    not freed.
 		 *
 		 * In cases 2-4, we will start over. In case 1, we will
 		 * proceed here to remove the bucket.
 		 */
 		if (tlb == NULL || tp->t_lib != tlb) {
 			KASSERT(bucket_locked || tlb == NULL,
 			    ("%s: bucket_locked (%d) and tlb (%p) are "
 			    "inconsistent", __func__, bucket_locked, tlb));
 			
 			if (bucket_locked) {
 				TCPID_BUCKET_UNLOCK(tlb);
 				bucket_locked = false;
 				tlb = NULL;
 			}
 			goto restart;
 		}
 
 		/*
 		 * Store the (struct tcp_log_id_node) for reuse. Then, remove
 		 * it from the bucket. In the process, we may end up relocking.
 		 * If so, we need to validate that the INP is still valid, and
 		 * the TCPCB entries match we expect.
 		 *
 		 * We will clear tlb and change the bucket_locked state just
 		 * before calling tcp_log_remove_id_node(), since that function
 		 * will unlock the bucket.
 		 */
 		if (tln != NULL)
 			uma_zfree(tcp_log_node_zone, tln);
 		tln = tp->t_lin;
 		tlb = NULL;
 		bucket_locked = false;
 		if (tcp_log_remove_id_node(inp, tp, NULL, NULL, &tree_locked)) {
 			RECHECK_INP();
 
 			/*
 			 * If the TCPCB moved to a new bucket while we had
 			 * dropped the lock, restart.
 			 */
 			if (tp->t_lib != NULL || tp->t_lin != NULL)
 				goto restart;
 		}
 
 		/*
 		 * Yay! We successfully removed the TCPCB from its old
 		 * bucket. Phew!
 		 *
 		 * On to bigger and better things...
 		 */
 	}
 
 	/* At this point, the TCPCB should not be in any bucket. */
 	KASSERT(tp->t_lib == NULL, ("%s: tp->t_lib is not NULL", __func__));
 
 	/*
 	 * If the new ID is not empty, we need to now assign this TCPCB to a
 	 * new bucket.
 	 */
 	if (*id) {
 		/* Get a new tln, if we don't already have one to reuse. */
 		if (tln == NULL) {
 			tln = uma_zalloc(tcp_log_node_zone, M_NOWAIT | M_ZERO);
 			if (tln == NULL) {
 				rv = ENOBUFS;
 				goto done;
 			}
 			tln->tln_inp = inp;
 			tln->tln_tp = tp;
 		}
 
 		/*
 		 * Drop the INP lock for a bit. We don't need it, and dropping
 		 * it prevents lock order reversals.
 		 */
 		INP_WUNLOCK(inp);
 
 		/* Make sure we have at least a read lock on the tree. */
 		tcp_log_id_validate_tree_lock(tree_locked);
 		if (tree_locked == TREE_UNLOCKED) {
 			TCPID_TREE_RLOCK();
 			tree_locked = TREE_RLOCKED;
 		}
 
 refind:
 		/*
 		 * Remember that we constructed (struct tcp_log_id_node) so
 		 * we can safely cast the id to it for the purposes of finding.
 		 */
 		KASSERT(tlb == NULL, ("%s:%d tlb unexpectedly non-NULL", 
 		    __func__, __LINE__));
 		tmp_tlb = RB_FIND(tcp_log_id_tree, &tcp_log_id_head,
 		    (struct tcp_log_id_bucket *) id);
 
 		/*
 		 * If we didn't find a matching bucket, we need to add a new
 		 * one. This requires a write lock. But, of course, we will
 		 * need to recheck some things when we re-acquire the lock.
 		 */
 		if (tmp_tlb == NULL && tree_locked != TREE_WLOCKED) {
 			tree_locked = TREE_WLOCKED;
 			if (!TCPID_TREE_UPGRADE()) {
 				TCPID_TREE_RUNLOCK();
 				TCPID_TREE_WLOCK();
 
 				/*
 				 * The tree may have changed while we were
 				 * unlocked.
 				 */
 				goto refind;
 			}
 		}
 
 		/* If we need to add a new bucket, do it now. */
 		if (tmp_tlb == NULL) {
 			/* Allocate new bucket. */
 			tlb = uma_zalloc(tcp_log_bucket_zone, M_NOWAIT);
 			if (tlb == NULL) {
 				rv = ENOBUFS;
 				goto done_noinp;
 			}
+			counter_u64_add(tcp_log_pcb_ids_cur, 1);
+			counter_u64_add(tcp_log_pcb_ids_tot, 1);
 
+			if ((tcp_log_auto_all == false) &&
+			    tcp_log_auto_mode &&
+			    tcp_log_selectauto()) {
+				/* Save off the log state */
+				tlb->tlb_logstate = tcp_log_auto_mode;
+			} else
+				tlb->tlb_logstate = TCP_LOG_STATE_OFF;
+			tlb->tlb_loglimit = 0;
+			tlb->tlb_tag[0] = '\0'; /* Default to an empty tag. */
+
 			/*
 			 * Copy the ID to the bucket.
 			 * NB: Don't use strlcpy() unless you are sure
 			 * we've always validated NULL termination.
 			 *
 			 * TODO: When I'm done writing this, see if we
 			 * we have correctly validated NULL termination and
 			 * can use strlcpy(). :-)
 			 */
 			strncpy(tlb->tlb_id, id, TCP_LOG_ID_LEN - 1);
 			tlb->tlb_id[TCP_LOG_ID_LEN - 1] = '\0';
 
 			/*
 			 * Take the refcount for the first node and go ahead
 			 * and lock this. Note that we zero the tlb_mtx
 			 * structure, since 0xdeadc0de flips the right bits
 			 * for the code to think that this mutex has already
 			 * been initialized. :-(
 			 */
 			SLIST_INIT(&tlb->tlb_head);
 			refcount_init(&tlb->tlb_refcnt, 1);
+			tlb->tlb_reqcnt = 1;
 			memset(&tlb->tlb_mtx, 0, sizeof(struct mtx));
 			TCPID_BUCKET_LOCK_INIT(tlb);
 			TCPID_BUCKET_LOCK(tlb);
 			bucket_locked = true;
 
 #define	FREE_NEW_TLB()	do {				\
 	TCPID_BUCKET_LOCK_DESTROY(tlb);			\
 	uma_zfree(tcp_log_bucket_zone, tlb);		\
+	counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1);	\
+	counter_u64_add(tcp_log_pcb_ids_tot, (int64_t)-1);	\
 	bucket_locked = false;				\
 	tlb = NULL;					\
 } while (0)
 			/*
 			 * Relock the INP and make sure we are still
 			 * unassigned.
 			 */
 			INP_WLOCK(inp);
 			RECHECK_INP_CLEAN(FREE_NEW_TLB());
 			if (tp->t_lib != NULL) {
 				FREE_NEW_TLB();
 				goto restart;
 			}
 
 			/* Add the new bucket to the tree. */
 			tmp_tlb = RB_INSERT(tcp_log_id_tree, &tcp_log_id_head,
 			    tlb);
 			KASSERT(tmp_tlb == NULL,
 			    ("%s: Unexpected conflicting bucket (%p) while "
 			    "adding new bucket (%p)", __func__, tmp_tlb, tlb));
 
 			/*
 			 * If we found a conflicting bucket, free the new
 			 * one we made and fall through to use the existing
 			 * bucket.
 			 */
 			if (tmp_tlb != NULL) {
 				FREE_NEW_TLB();
 				INP_WUNLOCK(inp);
 			}
 #undef	FREE_NEW_TLB
 		}
 
 		/* If we found an existing bucket, use it. */
 		if (tmp_tlb != NULL) {
 			tlb = tmp_tlb;
 			TCPID_BUCKET_LOCK(tlb);
 			bucket_locked = true;
 
 			/*
 			 * Relock the INP and make sure we are still
 			 * unassigned.
 			 */
 			INP_UNLOCK_ASSERT(inp);
 			INP_WLOCK(inp);
 			RECHECK_INP();
 			if (tp->t_lib != NULL) {
 				TCPID_BUCKET_UNLOCK(tlb);
 				bucket_locked = false;
 				tlb = NULL;
 				goto restart;
 			}
 
 			/* Take a reference on the bucket. */
 			TCPID_BUCKET_REF(tlb);
+
+			/* Record the request. */
+			tcp_log_increment_reqcnt(tlb);
 		}
 
 		tcp_log_grow_tlb(tlb->tlb_id, tp);
 
 		/* Add the new node to the list. */
 		SLIST_INSERT_HEAD(&tlb->tlb_head, tln, tln_list);
 		tp->t_lib = tlb;
 		tp->t_lin = tln;
+		if (tp->t_lib->tlb_logstate) {
+			/* Clone in any logging */
+
+			tp->t_logstate = tp->t_lib->tlb_logstate;
+		}
+		if (tp->t_lib->tlb_loglimit) {
+			/* The loglimit too */
+
+			tp->t_loglimit = tp->t_lib->tlb_loglimit;
+		}
 		tln = NULL;
 	}
 
 	rv = 0;
 
 done:
 	/* Unlock things, as needed, and return. */
 	INP_WUNLOCK(inp);
 done_noinp:
 	INP_UNLOCK_ASSERT(inp);
 	if (bucket_locked) {
 		TCPID_BUCKET_LOCK_ASSERT(tlb);
 		TCPID_BUCKET_UNLOCK(tlb);
 	} else if (tlb != NULL)
 		TCPID_BUCKET_UNLOCK_ASSERT(tlb);
 	if (tree_locked == TREE_WLOCKED) {
 		TCPID_TREE_WLOCK_ASSERT();
 		TCPID_TREE_WUNLOCK();
 	} else if (tree_locked == TREE_RLOCKED) {
 		TCPID_TREE_RLOCK_ASSERT();
 		TCPID_TREE_RUNLOCK();
 	} else
 		TCPID_TREE_UNLOCK_ASSERT();
 	if (tln != NULL)
 		uma_zfree(tcp_log_node_zone, tln);
 	return (rv);
 }
 
 /*
  * Get the TCP log ID for a TCPCB.
  * Called with INPCB locked.
  * 'buf' must point to a buffer that is at least TCP_LOG_ID_LEN bytes long.
  * Returns number of bytes copied.
  */
 size_t
 tcp_log_get_id(struct tcpcb *tp, char *buf)
 {
 	size_t len;
 
 	INP_LOCK_ASSERT(tp->t_inpcb);
 	if (tp->t_lib != NULL) {
 		len = strlcpy(buf, tp->t_lib->tlb_id, TCP_LOG_ID_LEN);
 		KASSERT(len < TCP_LOG_ID_LEN,
 		    ("%s:%d: tp->t_lib->tlb_id too long (%zu)",
 		    __func__, __LINE__, len));
 	} else {
 		*buf = '\0';
 		len = 0;
 	}
 	return (len);
 }
 
 /*
+ * Get the tag associated with the TCPCB's log ID.
+ * Called with INPCB locked. Returns with it unlocked.
+ * 'buf' must point to a buffer that is at least TCP_LOG_TAG_LEN bytes long.
+ * Returns number of bytes copied.
+ */
+size_t
+tcp_log_get_tag(struct tcpcb *tp, char *buf)
+{
+	struct tcp_log_id_bucket *tlb;
+	size_t len;
+	int tree_locked;
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tree_locked = TREE_UNLOCKED;
+	tlb = tp->t_lib;
+
+	if (tlb != NULL) {
+		TCPID_BUCKET_REF(tlb);
+		INP_WUNLOCK(tp->t_inpcb);
+		TCPID_BUCKET_LOCK(tlb);
+		len = strlcpy(buf, tlb->tlb_tag, TCP_LOG_TAG_LEN);
+		KASSERT(len < TCP_LOG_TAG_LEN,
+		    ("%s:%d: tp->t_lib->tlb_tag too long (%zu)",
+		    __func__, __LINE__, len));
+		if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL))
+			TCPID_BUCKET_UNLOCK(tlb);
+
+		if (tree_locked == TREE_WLOCKED) {
+			TCPID_TREE_WLOCK_ASSERT();
+			TCPID_TREE_WUNLOCK();
+		} else if (tree_locked == TREE_RLOCKED) {
+			TCPID_TREE_RLOCK_ASSERT();
+			TCPID_TREE_RUNLOCK();
+		} else
+			TCPID_TREE_UNLOCK_ASSERT();
+	} else {
+		INP_WUNLOCK(tp->t_inpcb);
+		*buf = '\0';
+		len = 0;
+	}
+
+	return (len);
+}
+
+/*
  * Get number of connections with the same log ID.
  * Log ID is taken from given TCPCB.
  * Called with INPCB locked.
  */
 u_int
 tcp_log_get_id_cnt(struct tcpcb *tp)
 {
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	return ((tp->t_lib == NULL) ? 0 : tp->t_lib->tlb_refcnt);
 }
 
 #ifdef TCPLOG_DEBUG_RINGBUF
 /*
  * Functions/macros to increment/decrement reference count for a log
  * entry. This should catch when we do a double-free/double-remove or
  * a double-add.
  */
 static inline void
 _tcp_log_entry_refcnt_add(struct tcp_log_mem *log_entry, const char *func,
     int line)
 {
 	int refcnt;
 
 	refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, 1);
 	if (refcnt != 0)
 		panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 0)",
 		    func, line, log_entry, refcnt);
 }
 #define	tcp_log_entry_refcnt_add(l)	\
     _tcp_log_entry_refcnt_add((l), __func__, __LINE__)
 
 static inline void
 _tcp_log_entry_refcnt_rem(struct tcp_log_mem *log_entry, const char *func,
     int line)
 {
 	int refcnt;
 
 	refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, -1);
 	if (refcnt != 1)
 		panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 1)",
 		    func, line, log_entry, refcnt);
 }
 #define	tcp_log_entry_refcnt_rem(l)	\
     _tcp_log_entry_refcnt_rem((l), __func__, __LINE__)
 
 #else /* !TCPLOG_DEBUG_RINGBUF */
 
 #define	tcp_log_entry_refcnt_add(l)
 #define	tcp_log_entry_refcnt_rem(l)
 
 #endif
 
 /*
  * Cleanup after removing a log entry, but only decrement the count if we
  * are running INVARIANTS.
  */
 static inline void
 tcp_log_free_log_common(struct tcp_log_mem *log_entry, int *count __unused)
 {
 
 	uma_zfree(tcp_log_zone, log_entry);
 #ifdef INVARIANTS
 	(*count)--;
 	KASSERT(*count >= 0,
 	    ("%s: count unexpectedly negative", __func__));
 #endif
 }
 
 static void
 tcp_log_free_entries(struct tcp_log_stailq *head, int *count)
 {
 	struct tcp_log_mem *log_entry;
 
 	/* Free the entries. */
 	while ((log_entry = STAILQ_FIRST(head)) != NULL) {
 		STAILQ_REMOVE_HEAD(head, tlm_queue);
 		tcp_log_entry_refcnt_rem(log_entry);
 		tcp_log_free_log_common(log_entry, count);
 	}
 }
 
 /* Cleanup after removing a log entry. */
 static inline void
 tcp_log_remove_log_cleanup(struct tcpcb *tp, struct tcp_log_mem *log_entry)
 {
 	uma_zfree(tcp_log_zone, log_entry);
 	tp->t_lognum--;
 	KASSERT(tp->t_lognum >= 0,
 	    ("%s: tp->t_lognum unexpectedly negative", __func__));
 }
 
 /* Remove a log entry from the head of a list. */
 static inline void
 tcp_log_remove_log_head(struct tcpcb *tp, struct tcp_log_mem *log_entry)
 {
 
 	KASSERT(log_entry == STAILQ_FIRST(&tp->t_logs),
 	    ("%s: attempt to remove non-HEAD log entry", __func__));
 	STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue);
 	tcp_log_entry_refcnt_rem(log_entry);
 	tcp_log_remove_log_cleanup(tp, log_entry);
 }
 
 #ifdef TCPLOG_DEBUG_RINGBUF
 /*
  * Initialize the log entry's reference count, which we want to
  * survive allocations.
  */
 static int
 tcp_log_zone_init(void *mem, int size, int flags __unused)
 {
 	struct tcp_log_mem *tlm;
 
 	KASSERT(size >= sizeof(struct tcp_log_mem),
 	    ("%s: unexpectedly short (%d) allocation", __func__, size));
 	tlm = (struct tcp_log_mem *)mem;
 	tlm->tlm_refcnt = 0;
 	return (0);
 }
 
 /*
  * Double check that the refcnt is zero on allocation and return.
  */
 static int
 tcp_log_zone_ctor(void *mem, int size, void *args __unused, int flags __unused)
 {
 	struct tcp_log_mem *tlm;
 
 	KASSERT(size >= sizeof(struct tcp_log_mem),
 	    ("%s: unexpectedly short (%d) allocation", __func__, size));
 	tlm = (struct tcp_log_mem *)mem;
 	if (tlm->tlm_refcnt != 0)
 		panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)",
 		    __func__, __LINE__, tlm, tlm->tlm_refcnt);
 	return (0);
 }
 
 static void
 tcp_log_zone_dtor(void *mem, int size, void *args __unused)
 {
 	struct tcp_log_mem *tlm;
 
 	KASSERT(size >= sizeof(struct tcp_log_mem),
 	    ("%s: unexpectedly short (%d) allocation", __func__, size));
 	tlm = (struct tcp_log_mem *)mem;
 	if (tlm->tlm_refcnt != 0)
 		panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)",
 		    __func__, __LINE__, tlm, tlm->tlm_refcnt);
 }
 #endif /* TCPLOG_DEBUG_RINGBUF */
 
 /* Do global initialization. */
 void
 tcp_log_init(void)
 {
 
 	tcp_log_zone = uma_zcreate("tcp_log", sizeof(struct tcp_log_mem),
 #ifdef TCPLOG_DEBUG_RINGBUF
 	    tcp_log_zone_ctor, tcp_log_zone_dtor, tcp_log_zone_init,
 #else
 	    NULL, NULL, NULL,
 #endif
 	    NULL, UMA_ALIGN_PTR, 0);
 	(void)uma_zone_set_max(tcp_log_zone, TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT);
 	tcp_log_bucket_zone = uma_zcreate("tcp_log_bucket",
 	    sizeof(struct tcp_log_id_bucket), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	tcp_log_node_zone = uma_zcreate("tcp_log_node",
 	    sizeof(struct tcp_log_id_node), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 #ifdef TCPLOG_DEBUG_COUNTERS
 	tcp_log_queued = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail1 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail2 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail3 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail4 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail5 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_copyout = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_read = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_freed = counter_u64_alloc(M_WAITOK);
 #endif
+	tcp_log_pcb_ids_cur = counter_u64_alloc(M_WAITOK);
+	tcp_log_pcb_ids_tot = counter_u64_alloc(M_WAITOK);
 
 	rw_init_flags(&tcp_id_tree_lock, "TCP ID tree", RW_NEW);
 	mtx_init(&tcp_log_expireq_mtx, "TCP log expireq", NULL, MTX_DEF);
 	callout_init(&tcp_log_expireq_callout, 1);
 }
 
 /* Do per-TCPCB initialization. */
 void
 tcp_log_tcpcbinit(struct tcpcb *tp)
 {
 
 	/* A new TCPCB should start out zero-initialized. */
 	STAILQ_INIT(&tp->t_logs);
 
 	/*
 	 * If we are doing auto-capturing, figure out whether we will capture
 	 * this session.
 	 */
-	if (tcp_log_selectauto()) {
+	tp->t_loglimit = tcp_log_session_limit;
+	if ((tcp_log_auto_all == true) &&
+	    tcp_log_auto_mode &&
+	    tcp_log_selectauto()) {
 		tp->t_logstate = tcp_log_auto_mode;
 		tp->t_flags2 |= TF2_LOG_AUTO;
 	}
 }
 
 
 /* Remove entries */
 static void
 tcp_log_expire(void *unused __unused)
 {
 	struct tcp_log_id_bucket *tlb;
 	struct tcp_log_id_node *tln;
 	sbintime_t expiry_limit;
 	int tree_locked;
 
 	TCPLOG_EXPIREQ_LOCK();
 	if (callout_pending(&tcp_log_expireq_callout)) {
 		/* Callout was reset. */
 		TCPLOG_EXPIREQ_UNLOCK();
 		return;
 	}
 
 	/*
 	 * Process entries until we reach one that expires too far in the
 	 * future. Look one second in the future.
 	 */
 	expiry_limit = getsbinuptime() + SBT_1S;
 	tree_locked = TREE_UNLOCKED;
 
 	while ((tln = STAILQ_FIRST(&tcp_log_expireq_head)) != NULL &&
 	    tln->tln_expiretime <= expiry_limit) {
 		if (!callout_active(&tcp_log_expireq_callout)) {
 			/*
 			 * Callout was stopped. I guess we should
 			 * just quit at this point.
 			 */
 			TCPLOG_EXPIREQ_UNLOCK();
 			return;
 		}
 
 		/*
 		 * Remove the node from the head of the list and unlock
 		 * the list. Change the expiry time to SBT_MAX as a signal
 		 * to other threads that we now own this.
 		 */
 		STAILQ_REMOVE_HEAD(&tcp_log_expireq_head, tln_expireq);
 		tln->tln_expiretime = SBT_MAX;
 		TCPLOG_EXPIREQ_UNLOCK();
 
 		/*
 		 * Remove the node from the bucket.
 		 */
 		tlb = tln->tln_bucket;
 		TCPID_BUCKET_LOCK(tlb);
 		if (tcp_log_remove_id_node(NULL, NULL, tlb, tln, &tree_locked)) {
 			tcp_log_id_validate_tree_lock(tree_locked);
 			if (tree_locked == TREE_WLOCKED)
 				TCPID_TREE_WUNLOCK();
 			else
 				TCPID_TREE_RUNLOCK();
 			tree_locked = TREE_UNLOCKED;
 		}
 
 		/* Drop the INP reference. */
 		INP_WLOCK(tln->tln_inp);
 		if (!in_pcbrele_wlocked(tln->tln_inp))
 			INP_WUNLOCK(tln->tln_inp);
 
 		/* Free the log records. */
 		tcp_log_free_entries(&tln->tln_entries, &tln->tln_count);
 
 		/* Free the node. */
 		uma_zfree(tcp_log_node_zone, tln);
 
 		/* Relock the expiry queue. */
 		TCPLOG_EXPIREQ_LOCK();
 	}
 
 	/*
 	 * We've expired all the entries we can. Do we need to reschedule
 	 * ourselves?
 	 */
 	callout_deactivate(&tcp_log_expireq_callout);
 	if (tln != NULL) {
 		/*
 		 * Get max(now + TCP_LOG_EXPIRE_INTVL, tln->tln_expiretime) and
 		 * set the next callout to that. (This helps ensure we generally
 		 * run the callout no more often than desired.)
 		 */
 		expiry_limit = getsbinuptime() + TCP_LOG_EXPIRE_INTVL;
 		if (expiry_limit < tln->tln_expiretime)
 			expiry_limit = tln->tln_expiretime;
 		callout_reset_sbt(&tcp_log_expireq_callout, expiry_limit,
 		    SBT_1S, tcp_log_expire, NULL, C_ABSOLUTE);
 	}
 
 	/* We're done. */
 	TCPLOG_EXPIREQ_UNLOCK();
 	return;
 }
 
 /*
  * Move log data from the TCPCB to a new node. This will reset the TCPCB log
  * entries and log count; however, it will not touch other things from the
  * TCPCB (e.g. t_lin, t_lib).
  *
  * NOTE: Must hold a lock on the INP.
  */
 static void
 tcp_log_move_tp_to_node(struct tcpcb *tp, struct tcp_log_id_node *tln)
 {
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tln->tln_ie = tp->t_inpcb->inp_inc.inc_ie;
 	if (tp->t_inpcb->inp_inc.inc_flags & INC_ISIPV6)
 		tln->tln_af = AF_INET6;
 	else
 		tln->tln_af = AF_INET;
 	tln->tln_entries = tp->t_logs;
 	tln->tln_count = tp->t_lognum;
 	tln->tln_bucket = tp->t_lib;
 
 	/* Clear information from the PCB. */
 	STAILQ_INIT(&tp->t_logs);
 	tp->t_lognum = 0;
 }
 
 /* Do per-TCPCB cleanup */
 void
 tcp_log_tcpcbfini(struct tcpcb *tp)
 {
 	struct tcp_log_id_node *tln, *tln_first;
 	struct tcp_log_mem *log_entry;
 	sbintime_t callouttime;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
+	TCP_LOG_EVENT(tp, NULL, NULL, NULL, TCP_LOG_CONNEND, 0, 0, NULL, false);
+
 	/*
 	 * If we were gathering packets to be automatically dumped, try to do
 	 * it now. If this succeeds, the log information in the TCPCB will be
 	 * cleared. Otherwise, we'll handle the log information as we do
 	 * for other states.
 	 */
 	switch(tp->t_logstate) {
 	case TCP_LOG_STATE_HEAD_AUTO:
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head",
 		    M_NOWAIT, false);
 		break;
 	case TCP_LOG_STATE_TAIL_AUTO:
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail",
 		    M_NOWAIT, false);
 		break;
 	case TCP_LOG_STATE_CONTINUAL:
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual",
 		    M_NOWAIT, false);
 		break;
 	}
 
 	/*
 	 * There are two ways we could keep logs: per-socket or per-ID. If
 	 * we are tracking logs with an ID, then the logs survive the
 	 * destruction of the TCPCB.
 	 * 
 	 * If the TCPCB is associated with an ID node, move the logs from the
 	 * TCPCB to the ID node. In theory, this is safe, for reasons which I
 	 * will now explain for my own benefit when I next need to figure out
 	 * this code. :-)
 	 *
 	 * We own the INP lock. Therefore, no one else can change the contents
 	 * of this node (Rule C). Further, no one can remove this node from
 	 * the bucket while we hold the lock (Rule D). Basically, no one can
 	 * mess with this node. That leaves two states in which we could be:
 	 * 
 	 * 1. Another thread is currently waiting to acquire the INP lock, with
 	 *    plans to do something with this node. When we drop the INP lock,
 	 *    they will have a chance to do that. They will recheck the
 	 *    tln_closed field (see note to Rule C) and then acquire the
 	 *    bucket lock before proceeding further.
 	 *
 	 * 2. Another thread will try to acquire a lock at some point in the
 	 *    future. If they try to acquire a lock before we set the
 	 *    tln_closed field, they will follow state #1. If they try to
 	 *    acquire a lock after we set the tln_closed field, they will be
 	 *    able to make changes to the node, at will, following Rule C.
 	 *
 	 * Therefore, we currently own this node and can make any changes
 	 * we want. But, as soon as we set the tln_closed field to true, we
 	 * have effectively dropped our lock on the node. (For this reason, we
 	 * also need to make sure our writes are ordered correctly. An atomic
 	 * operation with "release" semantics should be sufficient.)
 	 */
 
 	if (tp->t_lin != NULL) {
 		/* Copy the relevant information to the log entry. */
 		tln = tp->t_lin;
 		KASSERT(tln->tln_inp == tp->t_inpcb,
 		    ("%s: Mismatched inp (tln->tln_inp=%p, tp->t_inpcb=%p)",
 		    __func__, tln->tln_inp, tp->t_inpcb));
 		tcp_log_move_tp_to_node(tp, tln);
 
 		/* Clear information from the PCB. */
 		tp->t_lin = NULL;
 		tp->t_lib = NULL;
 
 		/*
 		 * Take a reference on the INP. This ensures that the INP
 		 * remains valid while the node is on the expiry queue. This
 		 * ensures the INP is valid for other threads that may be
 		 * racing to lock this node when we move it to the expire
 		 * queue.
 		 */
 		in_pcbref(tp->t_inpcb);
 
 		/*
 		 * Store the entry on the expiry list. The exact behavior
 		 * depends on whether we have entries to keep. If so, we
 		 * put the entry at the tail of the list and expire in
 		 * TCP_LOG_EXPIRE_TIME. Otherwise, we expire "now" and put
 		 * the entry at the head of the list. (Handling the cleanup
 		 * via the expiry timer lets us avoid locking messy-ness here.)
 		 */
 		tln->tln_expiretime = getsbinuptime();
 		TCPLOG_EXPIREQ_LOCK();
 		if (tln->tln_count) {
 			tln->tln_expiretime += TCP_LOG_EXPIRE_TIME;
 			if (STAILQ_EMPTY(&tcp_log_expireq_head) &&
 			    !callout_active(&tcp_log_expireq_callout)) {
 				/*
 				 * We are adding the first entry and a callout
 				 * is not currently scheduled; therefore, we
 				 * need to schedule one.
 				 */
 				callout_reset_sbt(&tcp_log_expireq_callout,
 				    tln->tln_expiretime, SBT_1S, tcp_log_expire,
 				    NULL, C_ABSOLUTE);
 			}
 			STAILQ_INSERT_TAIL(&tcp_log_expireq_head, tln,
 			    tln_expireq);
 		} else {
 			callouttime = tln->tln_expiretime +
 			    TCP_LOG_EXPIRE_INTVL;
 			tln_first = STAILQ_FIRST(&tcp_log_expireq_head);
 
 			if ((tln_first == NULL ||
 			    callouttime < tln_first->tln_expiretime) &&
 			    (callout_pending(&tcp_log_expireq_callout) ||
 			    !callout_active(&tcp_log_expireq_callout))) {
 				/*
 				 * The list is empty, or we want to run the
 				 * expire code before the first entry's timer
 				 * fires. Also, we are in a case where a callout
 				 * is not actively running. We want to reset
 				 * the callout to occur sooner.
 				 */
 				callout_reset_sbt(&tcp_log_expireq_callout,
 				    callouttime, SBT_1S, tcp_log_expire, NULL,
 				    C_ABSOLUTE);
 			}
 
 			/*
 			 * Insert to the head, or just after the head, as
 			 * appropriate. (This might result in small
 			 * mis-orderings as a bunch of "expire now" entries
 			 * gather at the start of the list, but that should
 			 * not produce big problems, since the expire timer
 			 * will walk through all of them.)
 			 */
 			if (tln_first == NULL ||
 			    tln->tln_expiretime < tln_first->tln_expiretime)
 				STAILQ_INSERT_HEAD(&tcp_log_expireq_head, tln,
 				    tln_expireq);
 			else
 				STAILQ_INSERT_AFTER(&tcp_log_expireq_head,
 				    tln_first, tln, tln_expireq);
 		}
 		TCPLOG_EXPIREQ_UNLOCK();
 
 		/*
 		 * We are done messing with the tln. After this point, we
 		 * can't touch it. (Note that the "release" semantics should
 		 * be included with the TCPLOG_EXPIREQ_UNLOCK() call above.
 		 * Therefore, they should be unnecessary here. However, it
 		 * seems like a good idea to include them anyway, since we
 		 * really are releasing a lock here.)
 		 */
 		atomic_store_rel_int(&tln->tln_closed, 1);
 	} else {
 		/* Remove log entries. */
 		while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL)
 			tcp_log_remove_log_head(tp, log_entry);
 		KASSERT(tp->t_lognum == 0,
 		    ("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
 			__func__, tp->t_lognum));
 	}
 
 	/*
 	 * Change the log state to off (just in case anything tries to sneak
 	 * in a last-minute log).
 	 */
 	tp->t_logstate = TCP_LOG_STATE_OFF;
 }
 
+static void
+tcp_log_purge_tp_logbuf(struct tcpcb *tp)
+{
+	struct tcp_log_mem *log_entry;
+	struct inpcb *inp;
+
+	inp = tp->t_inpcb;
+	INP_WLOCK_ASSERT(inp);
+	if (tp->t_lognum == 0)
+		return;
+
+	while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL)
+		tcp_log_remove_log_head(tp, log_entry);
+	KASSERT(tp->t_lognum == 0,
+		("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
+		 __func__, tp->t_lognum));
+	tp->t_logstate = TCP_LOG_STATE_OFF;
+}
+
 /*
  * This logs an event for a TCP socket. Normally, this is called via
  * TCP_LOG_EVENT or TCP_LOG_EVENT_VERBOSE. See the documentation for
  * TCP_LOG_EVENT().
  */
 
 struct tcp_log_buffer *
 tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
     union tcp_log_stackspecific *stackinfo, int th_hostorder,
     const char *output_caller, const char *func, int line, const struct timeval *itv)
 {
 	struct tcp_log_mem *log_entry;
 	struct tcp_log_buffer *log_buf;
 	int attempt_count = 0;
 	struct tcp_log_verbose *log_verbose;
 	uint32_t logsn;
 
 	KASSERT((func == NULL && line == 0) || (func != NULL && line > 0),
 	    ("%s called with inconsistent func (%p) and line (%d) arguments",
 		__func__, func, line));
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
-
+	if (tcp_disable_all_bb_logs) {
+		/*
+		 * The global shutdown logging
+		 * switch has been thrown. Call
+		 * the purge function that frees
+		 * purges out the logs and
+		 * turns off logging.
+		 */
+		tcp_log_purge_tp_logbuf(tp);
+		return (NULL);
+	}
 	KASSERT(tp->t_logstate == TCP_LOG_STATE_HEAD ||
 	    tp->t_logstate == TCP_LOG_STATE_TAIL ||
 	    tp->t_logstate == TCP_LOG_STATE_CONTINUAL ||
 	    tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO ||
 	    tp->t_logstate == TCP_LOG_STATE_TAIL_AUTO,
 	    ("%s called with unexpected tp->t_logstate (%d)", __func__,
 		tp->t_logstate));
 
 	/*
 	 * Get the serial number. We do this early so it will
 	 * increment even if we end up skipping the log entry for some
 	 * reason.
 	 */
 	logsn = tp->t_logsn++;
 
 	/*
 	 * Can we get a new log entry? If so, increment the lognum counter
 	 * here.
 	 */
 retry:
-	if (tp->t_lognum < tcp_log_session_limit) {
+	if (tp->t_lognum < tp->t_loglimit) {
 		if ((log_entry = uma_zalloc(tcp_log_zone, M_NOWAIT)) != NULL)
 			tp->t_lognum++;
 	} else
 		log_entry = NULL;
 
 	/* Do we need to try to reuse? */
 	if (log_entry == NULL) {
 		/*
 		 * Sacrifice auto-logged sessions without a log ID if
 		 * tcp_log_auto_all is false. (If they don't have a log
 		 * ID by now, it is probable that either they won't get one
 		 * or we are resource-constrained.)
 		 */
 		if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) &&
 		    !tcp_log_auto_all) {
 			if (tcp_log_state_change(tp, TCP_LOG_STATE_CLEAR)) {
 #ifdef INVARIANTS
 				panic("%s:%d: tcp_log_state_change() failed "
 				    "to set tp %p to TCP_LOG_STATE_CLEAR",
 				    __func__, __LINE__, tp);
 #endif
 				tp->t_logstate = TCP_LOG_STATE_OFF;
 			}
 			return (NULL);
 		}
 		/*
 		 * If we are in TCP_LOG_STATE_HEAD_AUTO state, try to dump
 		 * the buffers. If successful, deactivate tracing. Otherwise,
 		 * leave it active so we will retry.
 		 */
 		if (tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO &&
 		    !tcp_log_dump_tp_logbuf(tp, "auto-dumped from head",
 		    M_NOWAIT, false)) {
 			tp->t_logstate = TCP_LOG_STATE_OFF;
 			return(NULL);
 		} else if ((tp->t_logstate == TCP_LOG_STATE_CONTINUAL) &&
 		    !tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual",
 		    M_NOWAIT, false)) {
 			if (attempt_count == 0) {
 				attempt_count++;
 				goto retry;
 			}
 #ifdef TCPLOG_DEBUG_COUNTERS
 			counter_u64_add(tcp_log_que_fail4, 1);
 #endif
 			return(NULL);
 		} else if (tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO)
 			return(NULL);
 
 		/* If in HEAD state, just deactivate the tracing and return. */
 		if (tp->t_logstate == TCP_LOG_STATE_HEAD) {
 			tp->t_logstate = TCP_LOG_STATE_OFF;
 			return(NULL);
 		}
 
 		/*
 		 * Get a buffer to reuse. If that fails, just give up.
 		 * (We can't log anything without a buffer in which to
 		 * put it.)
 		 *
 		 * Note that we don't change the t_lognum counter
 		 * here. Because we are re-using the buffer, the total
 		 * number won't change.
 		 */
 		if ((log_entry = STAILQ_FIRST(&tp->t_logs)) == NULL)
 			return(NULL);
 		STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue);
 		tcp_log_entry_refcnt_rem(log_entry);
 	}
 
 	KASSERT(log_entry != NULL,
 	    ("%s: log_entry unexpectedly NULL", __func__));
 
 	/* Extract the log buffer and verbose buffer pointers. */
 	log_buf = &log_entry->tlm_buf;
 	log_verbose = &log_entry->tlm_v;
 
 	/* Basic entries. */
 	if (itv == NULL)
 		getmicrouptime(&log_buf->tlb_tv);
 	else
 		memcpy(&log_buf->tlb_tv, itv, sizeof(struct timeval));
 	log_buf->tlb_ticks = ticks;
 	log_buf->tlb_sn = logsn;
 	log_buf->tlb_stackid = tp->t_fb->tfb_id;
 	log_buf->tlb_eventid = eventid;
 	log_buf->tlb_eventflags = 0;
 	log_buf->tlb_errno = errornum;
 
 	/* Socket buffers */
 	if (rxbuf != NULL) {
 		log_buf->tlb_eventflags |= TLB_FLAG_RXBUF;
 		log_buf->tlb_rxbuf.tls_sb_acc = rxbuf->sb_acc;
 		log_buf->tlb_rxbuf.tls_sb_ccc = rxbuf->sb_ccc;
 		log_buf->tlb_rxbuf.tls_sb_spare = 0;
 	}
 	if (txbuf != NULL) {
 		log_buf->tlb_eventflags |= TLB_FLAG_TXBUF;
 		log_buf->tlb_txbuf.tls_sb_acc = txbuf->sb_acc;
 		log_buf->tlb_txbuf.tls_sb_ccc = txbuf->sb_ccc;
 		log_buf->tlb_txbuf.tls_sb_spare = 0;
 	}
 	/* Copy values from tp to the log entry. */
 #define	COPY_STAT(f)	log_buf->tlb_ ## f = tp->f
 #define	COPY_STAT_T(f)	log_buf->tlb_ ## f = tp->t_ ## f
 	COPY_STAT_T(state);
 	COPY_STAT_T(starttime);
 	COPY_STAT(iss);
 	COPY_STAT_T(flags);
 	COPY_STAT(snd_una);
 	COPY_STAT(snd_max);
 	COPY_STAT(snd_cwnd);
 	COPY_STAT(snd_nxt);
 	COPY_STAT(snd_recover);
 	COPY_STAT(snd_wnd);
 	COPY_STAT(snd_ssthresh);
 	COPY_STAT_T(srtt);
 	COPY_STAT_T(rttvar);
 	COPY_STAT(rcv_up);
 	COPY_STAT(rcv_adv);
 	COPY_STAT(rcv_nxt);
 	COPY_STAT(sack_newdata);
 	COPY_STAT(rcv_wnd);
 	COPY_STAT_T(dupacks);
 	COPY_STAT_T(segqlen);
 	COPY_STAT(snd_numholes);
 	COPY_STAT(snd_scale);
 	COPY_STAT(rcv_scale);
 #undef COPY_STAT
 #undef COPY_STAT_T
 	log_buf->tlb_flex1 = 0;
 	log_buf->tlb_flex2 = 0;
 	/* Copy stack-specific info. */
 	if (stackinfo != NULL) {
 		memcpy(&log_buf->tlb_stackinfo, stackinfo,
 		    sizeof(log_buf->tlb_stackinfo));
 		log_buf->tlb_eventflags |= TLB_FLAG_STACKINFO;
 	}
 
 	/* The packet */
 	log_buf->tlb_len = len;
 	if (th) {
 		int optlen;
 
 		log_buf->tlb_eventflags |= TLB_FLAG_HDR;
 		log_buf->tlb_th = *th;
 		if (th_hostorder)
 			tcp_fields_to_net(&log_buf->tlb_th);
 		optlen = (th->th_off << 2) - sizeof (struct tcphdr);
 		if (optlen > 0)
 			memcpy(log_buf->tlb_opts, th + 1, optlen);
 	}
 
 	/* Verbose information */
 	if (func != NULL) {
 		log_buf->tlb_eventflags |= TLB_FLAG_VERBOSE;
 		if (output_caller != NULL)
 			strlcpy(log_verbose->tlv_snd_frm, output_caller,
 			    TCP_FUNC_LEN);
 		else
 			*log_verbose->tlv_snd_frm = 0;
 		strlcpy(log_verbose->tlv_trace_func, func, TCP_FUNC_LEN);
 		log_verbose->tlv_trace_line = line;
 	}
 
 	/* Insert the new log at the tail. */
 	STAILQ_INSERT_TAIL(&tp->t_logs, log_entry, tlm_queue);
 	tcp_log_entry_refcnt_add(log_entry);
 	return (log_buf);
 }
 
 /*
  * Change the logging state for a TCPCB. Returns 0 on success or an
  * error code on failure.
  */
 int
 tcp_log_state_change(struct tcpcb *tp, int state)
 {
 	struct tcp_log_mem *log_entry;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	switch(state) {
 	case TCP_LOG_STATE_CLEAR:
 		while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL)
 			tcp_log_remove_log_head(tp, log_entry);
 		/* Fall through */
 
 	case TCP_LOG_STATE_OFF:
 		tp->t_logstate = TCP_LOG_STATE_OFF;
 		break;
 
 	case TCP_LOG_STATE_TAIL:
 	case TCP_LOG_STATE_HEAD:
 	case TCP_LOG_STATE_CONTINUAL:
 	case TCP_LOG_STATE_HEAD_AUTO:
 	case TCP_LOG_STATE_TAIL_AUTO:
 		tp->t_logstate = state;
 		break;
 
 	default:
 		return (EINVAL);
 	}
-
+	if (tcp_disable_all_bb_logs) {
+		/* We are prohibited from doing any logs */
+		tp->t_logstate = TCP_LOG_STATE_OFF;
+	}	
 	tp->t_flags2 &= ~(TF2_LOG_AUTO);
 
 	return (0);
 }
 
 /* If tcp_drain() is called, flush half the log entries. */
 void
 tcp_log_drain(struct tcpcb *tp)
 {
 	struct tcp_log_mem *log_entry, *next;
 	int target, skip;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	if ((target = tp->t_lognum / 2) == 0)
 		return;
 
 	/*
 	 * If we are logging the "head" packets, we want to discard
 	 * from the tail of the queue. Otherwise, we want to discard
 	 * from the head.
 	 */
 	if (tp->t_logstate == TCP_LOG_STATE_HEAD ||
 	    tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO) {
 		skip = tp->t_lognum - target;
 		STAILQ_FOREACH(log_entry, &tp->t_logs, tlm_queue)
 			if (!--skip)
 				break;
 		KASSERT(log_entry != NULL,
 		    ("%s: skipped through all entries!", __func__));
 		if (log_entry == NULL)
 			return;
 		while ((next = STAILQ_NEXT(log_entry, tlm_queue)) != NULL) {
 			STAILQ_REMOVE_AFTER(&tp->t_logs, log_entry, tlm_queue);
 			tcp_log_entry_refcnt_rem(next);
 			tcp_log_remove_log_cleanup(tp, next);
 #ifdef INVARIANTS
 			target--;
 #endif
 		}
 		KASSERT(target == 0,
 		    ("%s: After removing from tail, target was %d", __func__,
 			target));
 	} else if (tp->t_logstate == TCP_LOG_STATE_CONTINUAL) {
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual",
 		    M_NOWAIT, false);
 	} else {
 		while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL &&
 		    target--)
 			tcp_log_remove_log_head(tp, log_entry);
 		KASSERT(target <= 0,
 		    ("%s: After removing from head, target was %d", __func__,
 			target));
 		KASSERT(tp->t_lognum > 0,
 		    ("%s: After removing from head, tp->t_lognum was %d",
 			__func__, target));
 		KASSERT(log_entry != NULL,
 		    ("%s: After removing from head, the tailq was empty",
 			__func__));
 	}
 }
 
 static inline int
 tcp_log_copyout(struct sockopt *sopt, void *src, void *dst, size_t len)
 {
 
 	if (sopt->sopt_td != NULL)
 		return (copyout(src, dst, len));
 	bcopy(src, dst, len);
 	return (0);
 }
 
 static int
 tcp_log_logs_to_buf(struct sockopt *sopt, struct tcp_log_stailq *log_tailqp,
     struct tcp_log_buffer **end, int count)
 {
 	struct tcp_log_buffer *out_entry;
 	struct tcp_log_mem *log_entry;
 	size_t entrysize;
 	int error;
 #ifdef INVARIANTS
 	int orig_count = count;
 #endif
 
 	/* Copy the data out. */
 	error = 0;
 	out_entry = (struct tcp_log_buffer *) sopt->sopt_val;
 	STAILQ_FOREACH(log_entry, log_tailqp, tlm_queue) {
 		count--;
 		KASSERT(count >= 0,
 		    ("%s:%d: Exceeded expected count (%d) processing list %p",
 		    __func__, __LINE__, orig_count, log_tailqp));
 
 #ifdef TCPLOG_DEBUG_COUNTERS
 		counter_u64_add(tcp_log_que_copyout, 1);
 #endif
 
 		/*
 		 * Skip copying out the header if it isn't present.
 		 * Instead, copy out zeros (to ensure we don't leak info).
 		 * TODO: Make sure we truly do zero everything we don't
 		 * explicitly set.
 		 */
 		if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR)
 			entrysize = sizeof(struct tcp_log_buffer);
 		else
 			entrysize = offsetof(struct tcp_log_buffer, tlb_th);
 		error = tcp_log_copyout(sopt, &log_entry->tlm_buf, out_entry,
 		    entrysize);
 		if (error)
 			break;
 		if (!(log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR)) {
 			error = tcp_log_copyout(sopt, zerobuf,
 			    ((uint8_t *)out_entry) + entrysize,
 			    sizeof(struct tcp_log_buffer) - entrysize);
 		}
 
 		/*
 		 * Copy out the verbose bit, if needed. Either way,
 		 * increment the output pointer the correct amount.
 		 */
 		if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_VERBOSE) {
 			error = tcp_log_copyout(sopt, &log_entry->tlm_v,
 			    out_entry->tlb_verbose,
 			    sizeof(struct tcp_log_verbose));
 			if (error)
 				break;
 			out_entry = (struct tcp_log_buffer *)
 			    (((uint8_t *) (out_entry + 1)) +
 			    sizeof(struct tcp_log_verbose));
 		} else
 			out_entry++;
 	}
 	*end = out_entry;
 	KASSERT(error || count == 0,
 	    ("%s:%d: Less than expected count (%d) processing list %p"
 	    " (%d remain)", __func__, __LINE__, orig_count,
 	    log_tailqp, count));
 
 	return (error);
 }
 
 /*
  * Copy out the buffer. Note that we do incremental copying, so
  * sooptcopyout() won't work. However, the goal is to produce the same
  * end result as if we copied in the entire user buffer, updated it,
  * and then used sooptcopyout() to copy it out.
  *
  * NOTE: This should be called with a write lock on the PCB; however,
  * the function will drop it after it extracts the data from the TCPCB.
  */
 int
 tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp)
 {
 	struct tcp_log_stailq log_tailq;
 	struct tcp_log_mem *log_entry, *log_next;
 	struct tcp_log_buffer *out_entry;
 	struct inpcb *inp;
 	size_t outsize, entrysize;
 	int error, outnum;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	inp = tp->t_inpcb;
 
 	/*
 	 * Determine which log entries will fit in the buffer. As an
 	 * optimization, skip this if all the entries will clearly fit
 	 * in the buffer. (However, get an exact size if we are using
 	 * INVARIANTS.)
 	 */
 #ifndef INVARIANTS
 	if (sopt->sopt_valsize / (sizeof(struct tcp_log_buffer) +
 	    sizeof(struct tcp_log_verbose)) >= tp->t_lognum) {
 		log_entry = STAILQ_LAST(&tp->t_logs, tcp_log_mem, tlm_queue);
 		log_next = NULL;
 		outsize = 0;
 		outnum = tp->t_lognum;
 	} else {
 #endif
 		outsize = outnum = 0;
 		log_entry = NULL;
 		STAILQ_FOREACH(log_next, &tp->t_logs, tlm_queue) {
 			entrysize = sizeof(struct tcp_log_buffer);
 			if (log_next->tlm_buf.tlb_eventflags &
 			    TLB_FLAG_VERBOSE)
 				entrysize += sizeof(struct tcp_log_verbose);
 			if ((sopt->sopt_valsize - outsize) < entrysize)
 				break;
 			outsize += entrysize;
 			outnum++;
 			log_entry = log_next;
 		}
 		KASSERT(outsize <= sopt->sopt_valsize,
 		    ("%s: calculated output size (%zu) greater than available"
 			"space (%zu)", __func__, outsize, sopt->sopt_valsize));
 #ifndef INVARIANTS
 	}
 #endif
 
 	/*
 	 * Copy traditional sooptcopyout() behavior: if sopt->sopt_val
 	 * is NULL, silently skip the copy. However, in this case, we
 	 * will leave the list alone and return. Functionally, this
 	 * gives userspace a way to poll for an approximate buffer
 	 * size they will need to get the log entries.
 	 */
 	if (sopt->sopt_val == NULL) {
 		INP_WUNLOCK(inp);
 		if (outsize == 0) {
 			outsize = outnum * (sizeof(struct tcp_log_buffer) +
 			    sizeof(struct tcp_log_verbose));
 		}
 		if (sopt->sopt_valsize > outsize)
 			sopt->sopt_valsize = outsize;
 		return (0);
 	}
 
 	/*
 	 * Break apart the list. We'll save the ones we want to copy
 	 * out locally and remove them from the TCPCB list. We can
 	 * then drop the INPCB lock while we do the copyout.
 	 *
 	 * There are roughly three cases:
 	 * 1. There was nothing to copy out. That's easy: drop the
 	 * lock and return.
 	 * 2. We are copying out the entire list. Again, that's easy:
 	 * move the whole list.
 	 * 3. We are copying out a partial list. That's harder. We
 	 * need to update the list book-keeping entries.
 	 */
 	if (log_entry != NULL && log_next == NULL) {
 		/* Move entire list. */
 		KASSERT(outnum == tp->t_lognum,
 		    ("%s:%d: outnum (%d) should match tp->t_lognum (%d)",
 			__func__, __LINE__, outnum, tp->t_lognum));
 		log_tailq = tp->t_logs;
 		tp->t_lognum = 0;
 		STAILQ_INIT(&tp->t_logs);
 	} else if (log_entry != NULL) {
 		/* Move partial list. */
 		KASSERT(outnum < tp->t_lognum,
 		    ("%s:%d: outnum (%d) not less than tp->t_lognum (%d)",
 			__func__, __LINE__, outnum, tp->t_lognum));
 		STAILQ_FIRST(&log_tailq) = STAILQ_FIRST(&tp->t_logs);
 		STAILQ_FIRST(&tp->t_logs) = STAILQ_NEXT(log_entry, tlm_queue);
 		KASSERT(STAILQ_NEXT(log_entry, tlm_queue) != NULL,
 		    ("%s:%d: tp->t_logs is unexpectedly shorter than expected"
 		    "(tp: %p, log_tailq: %p, outnum: %d, tp->t_lognum: %d)",
 		    __func__, __LINE__, tp, &log_tailq, outnum, tp->t_lognum));
 		STAILQ_NEXT(log_entry, tlm_queue) = NULL;
 		log_tailq.stqh_last = &STAILQ_NEXT(log_entry, tlm_queue);
 		tp->t_lognum -= outnum;
 	} else
 		STAILQ_INIT(&log_tailq);
 
 	/* Drop the PCB lock. */
 	INP_WUNLOCK(inp);
 
 	/* Copy the data out. */
 	error = tcp_log_logs_to_buf(sopt, &log_tailq, &out_entry, outnum);
 
 	if (error) {
 		/* Restore list */
 		INP_WLOCK(inp);
 		if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0) {
 			tp = intotcpcb(inp);
 
 			/* Merge the two lists. */
 			STAILQ_CONCAT(&log_tailq, &tp->t_logs);
 			tp->t_logs = log_tailq;
 			tp->t_lognum += outnum;
 		}
 		INP_WUNLOCK(inp);
 	} else {
 		/* Sanity check entries */
 		KASSERT(((caddr_t)out_entry - (caddr_t)sopt->sopt_val)  ==
 		    outsize, ("%s: Actual output size (%zu) != "
 			"calculated output size (%zu)", __func__,
 			(size_t)((caddr_t)out_entry - (caddr_t)sopt->sopt_val),
 			outsize));
 
 		/* Free the entries we just copied out. */
 		STAILQ_FOREACH_SAFE(log_entry, &log_tailq, tlm_queue, log_next) {
 			tcp_log_entry_refcnt_rem(log_entry);
 			uma_zfree(tcp_log_zone, log_entry);
 		}
 	}
 
 	sopt->sopt_valsize = (size_t)((caddr_t)out_entry -
 	    (caddr_t)sopt->sopt_val);
 	return (error);
 }
 
 static void
 tcp_log_free_queue(struct tcp_log_dev_queue *param)
 {
 	struct tcp_log_dev_log_queue *entry;
 
 	KASSERT(param != NULL, ("%s: called with NULL param", __func__));
 	if (param == NULL)
 		return;
 
 	entry = (struct tcp_log_dev_log_queue *)param;
 
 	/* Free the entries. */
 	tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count);
 
 	/* Free the buffer, if it is allocated. */
 	if (entry->tldl_common.tldq_buf != NULL)
 		free(entry->tldl_common.tldq_buf, M_TCPLOGDEV);
 
 	/* Free the queue entry. */
 	free(entry, M_TCPLOGDEV);
 }
 
 static struct tcp_log_common_header *
 tcp_log_expandlogbuf(struct tcp_log_dev_queue *param)
 {
 	struct tcp_log_dev_log_queue *entry;
 	struct tcp_log_header *hdr;
 	uint8_t *end;
 	struct sockopt sopt;
 	int error;
 
 	entry = (struct tcp_log_dev_log_queue *)param;
 
 	/* Take a worst-case guess at space needs. */
 	sopt.sopt_valsize = sizeof(struct tcp_log_header) +
 	    entry->tldl_count * (sizeof(struct tcp_log_buffer) +
 	    sizeof(struct tcp_log_verbose));
 	hdr = malloc(sopt.sopt_valsize, M_TCPLOGDEV, M_NOWAIT);
 	if (hdr == NULL) {
 #ifdef TCPLOG_DEBUG_COUNTERS
 		counter_u64_add(tcp_log_que_fail5, entry->tldl_count);
 #endif
 		return (NULL);
 	}
 	sopt.sopt_val = hdr + 1;
 	sopt.sopt_valsize -= sizeof(struct tcp_log_header);
 	sopt.sopt_td = NULL;
 	
 	error = tcp_log_logs_to_buf(&sopt, &entry->tldl_entries,
 	    (struct tcp_log_buffer **)&end, entry->tldl_count);
 	if (error) {
 		free(hdr, M_TCPLOGDEV);
 		return (NULL);
 	}
 
 	/* Free the entries. */
 	tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count);
 	entry->tldl_count = 0;
 
 	memset(hdr, 0, sizeof(struct tcp_log_header));
 	hdr->tlh_version = TCP_LOG_BUF_VER;
 	hdr->tlh_type = TCP_LOG_DEV_TYPE_BBR;
 	hdr->tlh_length = end - (uint8_t *)hdr;
 	hdr->tlh_ie = entry->tldl_ie;
 	hdr->tlh_af = entry->tldl_af;
 	getboottime(&hdr->tlh_offset);
 	strlcpy(hdr->tlh_id, entry->tldl_id, TCP_LOG_ID_LEN);
+	strlcpy(hdr->tlh_tag, entry->tldl_tag, TCP_LOG_TAG_LEN);
 	strlcpy(hdr->tlh_reason, entry->tldl_reason, TCP_LOG_REASON_LEN);
 	return ((struct tcp_log_common_header *)hdr);
 }
 
 /*
  * Queue the tcpcb's log buffer for transmission via the log buffer facility.
  *
  * NOTE: This should be called with a write lock on the PCB.
  *
  * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop
  * and reacquire the INP lock if it needs to do so.
  *
  * If force is false, this will only dump auto-logged sessions if
  * tcp_log_auto_all is true or if there is a log ID defined for the session.
  */
 int
 tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force)
 {
 	struct tcp_log_dev_log_queue *entry;
 	struct inpcb *inp;
 #ifdef TCPLOG_DEBUG_COUNTERS
 	int num_entries;
 #endif
 
 	inp = tp->t_inpcb;
 	INP_WLOCK_ASSERT(inp);
 
 	/* If there are no log entries, there is nothing to do. */
 	if (tp->t_lognum == 0)
 		return (0);
 
 	/* Check for a log ID. */
 	if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) &&
 	    !tcp_log_auto_all && !force) {
 		struct tcp_log_mem *log_entry;
 
 		/*
 		 * We needed a log ID and none was found. Free the log entries
 		 * and return success. Also, cancel further logging. If the
 		 * session doesn't have a log ID by now, we'll assume it isn't
 		 * going to get one.
 		 */
 		while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL)
 			tcp_log_remove_log_head(tp, log_entry);
 		KASSERT(tp->t_lognum == 0,
 		    ("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
 			__func__, tp->t_lognum));
 		tp->t_logstate = TCP_LOG_STATE_OFF;
 		return (0);
 	}
 
 	/*
 	 * Allocate memory. If we must wait, we'll need to drop the locks
 	 * and reacquire them (and do all the related business that goes
 	 * along with that).
 	 */
 	entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV,
 	    M_NOWAIT);
 	if (entry == NULL && (how & M_NOWAIT)) {
 #ifdef TCPLOG_DEBUG_COUNTERS
 		counter_u64_add(tcp_log_que_fail3, 1);
 #endif
 		return (ENOBUFS);
 	}
 	if (entry == NULL) {
 		INP_WUNLOCK(inp);
 		entry = malloc(sizeof(struct tcp_log_dev_log_queue),
 		    M_TCPLOGDEV, M_WAITOK);
 		INP_WLOCK(inp);
 		/*
 		 * Note that this check is slightly overly-restrictive in
 		 * that the TCB can survive either of these events.
 		 * However, there is currently not a good way to ensure
 		 * that is the case. So, if we hit this M_WAIT path, we
 		 * may end up dropping some entries. That seems like a
 		 * small price to pay for safety.
 		 */
 		if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 			free(entry, M_TCPLOGDEV);
 #ifdef TCPLOG_DEBUG_COUNTERS
 			counter_u64_add(tcp_log_que_fail2, 1);
 #endif
 			return (ECONNRESET);
 		}
 		tp = intotcpcb(inp);
 		if (tp->t_lognum == 0) {
 			free(entry, M_TCPLOGDEV);
 			return (0);
 		}
 	}
 
 	/* Fill in the unique parts of the queue entry. */
-	if (tp->t_lib != NULL)
+	if (tp->t_lib != NULL) {
 		strlcpy(entry->tldl_id, tp->t_lib->tlb_id, TCP_LOG_ID_LEN);
-	else
+		strlcpy(entry->tldl_tag, tp->t_lib->tlb_tag, TCP_LOG_TAG_LEN);
+	} else {
 		strlcpy(entry->tldl_id, "UNKNOWN", TCP_LOG_ID_LEN);
+		strlcpy(entry->tldl_tag, "UNKNOWN", TCP_LOG_TAG_LEN);
+	}
 	if (reason != NULL)
 		strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN);
 	else
 		strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_ID_LEN);
 	entry->tldl_ie = inp->inp_inc.inc_ie;
 	if (inp->inp_inc.inc_flags & INC_ISIPV6)
 		entry->tldl_af = AF_INET6;
 	else
 		entry->tldl_af = AF_INET;
 	entry->tldl_entries = tp->t_logs;
 	entry->tldl_count = tp->t_lognum;
 
 	/* Fill in the common parts of the queue entry. */
 	entry->tldl_common.tldq_buf = NULL;
 	entry->tldl_common.tldq_xform = tcp_log_expandlogbuf;
 	entry->tldl_common.tldq_dtor = tcp_log_free_queue;
 
 	/* Clear the log data from the TCPCB. */
 #ifdef TCPLOG_DEBUG_COUNTERS
 	num_entries = tp->t_lognum;
 #endif
 	tp->t_lognum = 0;
 	STAILQ_INIT(&tp->t_logs);
 
 	/* Add the entry. If no one is listening, free the entry. */
 	if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry)) {
 		tcp_log_free_queue((struct tcp_log_dev_queue *)entry);
 #ifdef TCPLOG_DEBUG_COUNTERS
 		counter_u64_add(tcp_log_que_fail1, num_entries);
 	} else {
 		counter_u64_add(tcp_log_queued, num_entries);
 #endif
 	}
 	return (0);
 }
 
 /*
  * Queue the log_id_node's log buffers for transmission via the log buffer
  * facility.
  *
  * NOTE: This should be called with the bucket locked and referenced.
  *
  * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop
  * and reacquire the bucket lock if it needs to do so. (The caller must
  * ensure that the tln is no longer on any lists so no one else will mess
  * with this while the lock is dropped!)
  */
 static int
 tcp_log_dump_node_logbuf(struct tcp_log_id_node *tln, char *reason, int how)
 {
 	struct tcp_log_dev_log_queue *entry;
 	struct tcp_log_id_bucket *tlb;
 
 	tlb = tln->tln_bucket;
 	TCPID_BUCKET_LOCK_ASSERT(tlb);
 	KASSERT(tlb->tlb_refcnt > 0,
 	    ("%s:%d: Called with unreferenced bucket (tln=%p, tlb=%p)",
 	    __func__, __LINE__, tln, tlb));
 	KASSERT(tln->tln_closed,
 	    ("%s:%d: Called for node with tln_closed==false (tln=%p)",
 	    __func__, __LINE__, tln));
 
 	/* If there are no log entries, there is nothing to do. */
 	if (tln->tln_count == 0)
 		return (0);
 
 	/*
 	 * Allocate memory. If we must wait, we'll need to drop the locks
 	 * and reacquire them (and do all the related business that goes
 	 * along with that).
 	 */
 	entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV,
 	    M_NOWAIT);
 	if (entry == NULL && (how & M_NOWAIT))
 		return (ENOBUFS);
 	if (entry == NULL) {
 		TCPID_BUCKET_UNLOCK(tlb);
 		entry = malloc(sizeof(struct tcp_log_dev_log_queue),
 		    M_TCPLOGDEV, M_WAITOK);
 		TCPID_BUCKET_LOCK(tlb);
 	}
 
 	/* Fill in the common parts of the queue entry.. */
 	entry->tldl_common.tldq_buf = NULL;
 	entry->tldl_common.tldq_xform = tcp_log_expandlogbuf;
 	entry->tldl_common.tldq_dtor = tcp_log_free_queue;
 
 	/* Fill in the unique parts of the queue entry. */
 	strlcpy(entry->tldl_id, tlb->tlb_id, TCP_LOG_ID_LEN);
+	strlcpy(entry->tldl_tag, tlb->tlb_tag, TCP_LOG_TAG_LEN);
 	if (reason != NULL)
 		strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN);
 	else
 		strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_ID_LEN);
 	entry->tldl_ie = tln->tln_ie;
 	entry->tldl_entries = tln->tln_entries;
 	entry->tldl_count = tln->tln_count;
 	entry->tldl_af = tln->tln_af;
 
 	/* Add the entry. If no one is listening, free the entry. */
 	if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry))
 		tcp_log_free_queue((struct tcp_log_dev_queue *)entry);
 
 	return (0);
 }
 
 
 /*
  * Queue the log buffers for all sessions in a bucket for transmissions via
  * the log buffer facility.
  *
  * NOTE: This should be called with a locked bucket; however, the function
  * will drop the lock.
  */
 #define	LOCAL_SAVE	10
 static void
 tcp_log_dumpbucketlogs(struct tcp_log_id_bucket *tlb, char *reason)
 {
 	struct tcp_log_id_node local_entries[LOCAL_SAVE];
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct tcp_log_id_node *cur_tln, *prev_tln, *tmp_tln;
 	int i, num_local_entries, tree_locked;
 	bool expireq_locked;
 
 	TCPID_BUCKET_LOCK_ASSERT(tlb);
 
 	/*
 	 * Take a reference on the bucket to keep it from disappearing until
 	 * we are done.
 	 */
 	TCPID_BUCKET_REF(tlb);
 
 	/*
 	 * We'll try to create these without dropping locks. However, we
 	 * might very well need to drop locks to get memory. If that's the
 	 * case, we'll save up to 10 on the stack, and sacrifice the rest.
 	 * (Otherwise, we need to worry about finding our place again in a
 	 * potentially changed list. It just doesn't seem worth the trouble
 	 * to do that.
 	 */
 	expireq_locked = false;
 	num_local_entries = 0;
 	prev_tln = NULL;
 	tree_locked = TREE_UNLOCKED;
 	SLIST_FOREACH_SAFE(cur_tln, &tlb->tlb_head, tln_list, tmp_tln) {
 		/*
 		 * If this isn't associated with a TCPCB, we can pull it off
 		 * the list now. We need to be careful that the expire timer
 		 * hasn't already taken ownership (tln_expiretime == SBT_MAX).
 		 * If so, we let the expire timer code free the data. 
 		 */
 		if (cur_tln->tln_closed) {
 no_inp:
 			/*
 			 * Get the expireq lock so we can get a consistent
 			 * read of tln_expiretime and so we can remove this
 			 * from the expireq.
 			 */
 			if (!expireq_locked) {
 				TCPLOG_EXPIREQ_LOCK();
 				expireq_locked = true;
 			}
 
 			/*
 			 * We ignore entries with tln_expiretime == SBT_MAX.
 			 * The expire timer code already owns those.
 			 */
 			KASSERT(cur_tln->tln_expiretime > (sbintime_t) 0,
 			    ("%s:%d: node on the expire queue without positive "
 			    "expire time", __func__, __LINE__));
 			if (cur_tln->tln_expiretime == SBT_MAX) {
 				prev_tln = cur_tln;
 				continue;
 			}
 
 			/* Remove the entry from the expireq. */
 			STAILQ_REMOVE(&tcp_log_expireq_head, cur_tln,
 			    tcp_log_id_node, tln_expireq);
 
 			/* Remove the entry from the bucket. */
 			if (prev_tln != NULL)
 				SLIST_REMOVE_AFTER(prev_tln, tln_list);
 			else
 				SLIST_REMOVE_HEAD(&tlb->tlb_head, tln_list);
 
 			/*
 			 * Drop the INP and bucket reference counts. Due to
 			 * lock-ordering rules, we need to drop the expire
 			 * queue lock.
 			 */
 			TCPLOG_EXPIREQ_UNLOCK();
 			expireq_locked = false;
 
 			/* Drop the INP reference. */
 			INP_WLOCK(cur_tln->tln_inp);
 			if (!in_pcbrele_wlocked(cur_tln->tln_inp))
 				INP_WUNLOCK(cur_tln->tln_inp);
 
 			if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) {
 #ifdef INVARIANTS
 				panic("%s: Bucket refcount unexpectedly 0.",
 				    __func__);
 #endif
 				/*
 				 * Recover as best we can: free the entry we
 				 * own.
 				 */
 				tcp_log_free_entries(&cur_tln->tln_entries,
 				    &cur_tln->tln_count);
 				uma_zfree(tcp_log_node_zone, cur_tln);
 				goto done;
 			}
 
 			if (tcp_log_dump_node_logbuf(cur_tln, reason,
 			    M_NOWAIT)) {
 				/*
 				 * If we have sapce, save the entries locally.
 				 * Otherwise, free them.
 				 */
 				if (num_local_entries < LOCAL_SAVE) {
 					local_entries[num_local_entries] =
 					    *cur_tln;
 					num_local_entries++;
 				} else {
 					tcp_log_free_entries(
 					    &cur_tln->tln_entries,
 					    &cur_tln->tln_count);
 				}
 			}
 
 			/* No matter what, we are done with the node now. */
 			uma_zfree(tcp_log_node_zone, cur_tln);
 
 			/*
 			 * Because we removed this entry from the list, prev_tln
 			 * (which tracks the previous entry still on the tlb
 			 * list) remains unchanged.
 			 */
 			continue;
 		}
 
 		/*
 		 * If we get to this point, the session data is still held in
 		 * the TCPCB. So, we need to pull the data out of that.
 		 *
 		 * We will need to drop the expireq lock so we can lock the INP.
 		 * We can then try to extract the data the "easy" way. If that
 		 * fails, we'll save the log entries for later.
 		 */
 		if (expireq_locked) {
 			TCPLOG_EXPIREQ_UNLOCK();
 			expireq_locked = false;
 		}
 
 		/* Lock the INP and then re-check the state. */
 		inp = cur_tln->tln_inp;
 		INP_WLOCK(inp);
 		/*
 		 * If we caught this while it was transitioning, the data
 		 * might have moved from the TCPCB to the tln (signified by
 		 * setting tln_closed to true. If so, treat this like an
 		 * inactive connection.
 		 */
 		if (cur_tln->tln_closed) {
 			/*
 			 * It looks like we may have caught this connection
 			 * while it was transitioning from active to inactive.
 			 * Treat this like an inactive connection.
 			 */
 			INP_WUNLOCK(inp);
 			goto no_inp;
 		}
 
 		/*
 		 * Try to dump the data from the tp without dropping the lock.
 		 * If this fails, try to save off the data locally.
 		 */
 		tp = cur_tln->tln_tp;
 		if (tcp_log_dump_tp_logbuf(tp, reason, M_NOWAIT, true) &&
 		    num_local_entries < LOCAL_SAVE) {
 			tcp_log_move_tp_to_node(tp,
 			    &local_entries[num_local_entries]);
 			local_entries[num_local_entries].tln_closed = 1;
 			KASSERT(local_entries[num_local_entries].tln_bucket ==
 			    tlb, ("%s: %d: bucket mismatch for node %p",
 			    __func__, __LINE__, cur_tln));
 			num_local_entries++;
 		}
 
 		INP_WUNLOCK(inp);
 
 		/*
 		 * We are goint to leave the current tln on the list. It will
 		 * become the previous tln.
 		 */
 		prev_tln = cur_tln;
 	}
 
 	/* Drop our locks, if any. */
 	KASSERT(tree_locked == TREE_UNLOCKED,
 	    ("%s: %d: tree unexpectedly locked", __func__, __LINE__));
 	switch (tree_locked) {
 	case TREE_WLOCKED:
 		TCPID_TREE_WUNLOCK();
 		tree_locked = TREE_UNLOCKED;
 		break;
 	case TREE_RLOCKED:
 		TCPID_TREE_RUNLOCK();
 		tree_locked = TREE_UNLOCKED;
 		break;
 	}
 	if (expireq_locked) {
 		TCPLOG_EXPIREQ_UNLOCK();
 		expireq_locked = false;
 	}
 
 	/*
 	 * Try again for any saved entries. tcp_log_dump_node_logbuf() is
 	 * guaranteed to free the log entries within the node. And, since
 	 * the node itself is on our stack, we don't need to free it.
 	 */
 	for (i = 0; i < num_local_entries; i++)
 		tcp_log_dump_node_logbuf(&local_entries[i], reason, M_WAITOK);
 
 	/* Drop our reference. */
 	if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL))
 		TCPID_BUCKET_UNLOCK(tlb);
 
 done:
 	/* Drop our locks, if any. */
 	switch (tree_locked) {
 	case TREE_WLOCKED:
 		TCPID_TREE_WUNLOCK();
 		break;
 	case TREE_RLOCKED:
 		TCPID_TREE_RUNLOCK();
 		break;
 	}
 	if (expireq_locked)
 		TCPLOG_EXPIREQ_UNLOCK();
 }
 #undef	LOCAL_SAVE
 
 
 /*
  * Queue the log buffers for all sessions in a bucket for transmissions via
  * the log buffer facility.
  *
  * NOTE: This should be called with a locked INP; however, the function
  * will drop the lock.
  */
 void
 tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason)
 {
 	struct tcp_log_id_bucket *tlb;
 	int tree_locked;
 
 	/* Figure out our bucket and lock it. */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	tlb = tp->t_lib;
 	if (tlb == NULL) {
 		/*
 		 * No bucket; treat this like a request to dump a single
 		 * session's traces.
 		 */
 		(void)tcp_log_dump_tp_logbuf(tp, reason, M_WAITOK, true);
 		INP_WUNLOCK(tp->t_inpcb);
 		return;
 	}
 	TCPID_BUCKET_REF(tlb);
 	INP_WUNLOCK(tp->t_inpcb);
 	TCPID_BUCKET_LOCK(tlb);
 
 	/* If we are the last reference, we have nothing more to do here. */
 	tree_locked = TREE_UNLOCKED;
 	if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) {
 		switch (tree_locked) {
 		case TREE_WLOCKED:
 			TCPID_TREE_WUNLOCK();
 			break;
 		case TREE_RLOCKED:
 			TCPID_TREE_RUNLOCK();
 			break;
 		}
 		return;
 	}
 
 	/* Turn this over to tcp_log_dumpbucketlogs() to finish the work. */ 
 	tcp_log_dumpbucketlogs(tlb, reason);
 }
 
 /*
  * Mark the end of a flow with the current stack. A stack can add
  * stack-specific info to this trace event by overriding this
  * function (see bbr_log_flowend() for example).
  */
 void
 tcp_log_flowend(struct tcpcb *tp)
 {
 	if (tp->t_logstate != TCP_LOG_STATE_OFF) {
 		struct socket *so = tp->t_inpcb->inp_socket;
 		TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd,
 				TCP_LOG_FLOWEND, 0, 0, NULL, false);
 	}
 }
 
Index: head/sys/netinet/tcp_log_buf.h
===================================================================
--- head/sys/netinet/tcp_log_buf.h	(revision 356413)
+++ head/sys/netinet/tcp_log_buf.h	(revision 356414)
@@ -1,376 +1,381 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2016-2018 Netflix, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __tcp_log_buf_h__
 #define __tcp_log_buf_h__
 
 #define	TCP_LOG_REASON_LEN	32
-#define	TCP_LOG_BUF_VER		(6)
+#define	TCP_LOG_TAG_LEN		32
+#define	TCP_LOG_BUF_VER		(7)
 
 /*
  * Because the (struct tcp_log_buffer) includes 8-byte uint64_t's, it requires
  * 8-byte alignment to work properly on all platforms. Therefore, we will
  * enforce 8-byte alignment for all the structures that may appear by
  * themselves (instead of being embedded in another structure) in a data
  * stream.
  */
 #define	ALIGN_TCP_LOG		__aligned(8)
 
 /* Information about the socketbuffer state. */
 struct tcp_log_sockbuf
 {
 	uint32_t	tls_sb_acc;	/* available chars (sb->sb_acc) */
 	uint32_t	tls_sb_ccc;	/* claimed chars (sb->sb_ccc) */
 	uint32_t	tls_sb_spare;	/* spare */
 };
 
 /* Optional, verbose information that may be appended to an event log. */
 struct tcp_log_verbose
 {
 #define	TCP_FUNC_LEN	32
 	char		tlv_snd_frm[TCP_FUNC_LEN]; /* tcp_output() caller */
 	char		tlv_trace_func[TCP_FUNC_LEN]; /* Function that
 							 generated trace */
 	uint32_t	tlv_trace_line;	/* Line number that generated trace */
 	uint8_t		_pad[4];
 } ALIGN_TCP_LOG;
 
 /* Internal RACK state variables. */
 struct tcp_log_rack
 {
 	uint32_t	tlr_rack_rtt;		/* rc_rack_rtt */
 	uint8_t		tlr_state;		/* Internal RACK state */
 	uint8_t		_pad[3];		/* Padding */
 };
 
 struct tcp_log_bbr {
 	uint64_t cur_del_rate;
 	uint64_t delRate;
 	uint64_t rttProp;
 	uint64_t bw_inuse;
 	uint32_t inflight;
 	uint32_t applimited;
 	uint32_t delivered;
 	uint32_t timeStamp;
 	uint32_t epoch;
 	uint32_t lt_epoch;
 	uint32_t pkts_out;
 	uint32_t flex1;
 	uint32_t flex2;
 	uint32_t flex3;
 	uint32_t flex4;
 	uint32_t flex5;
 	uint32_t flex6;
 	uint32_t lost;
 	uint16_t pacing_gain;
 	uint16_t cwnd_gain;
 	uint16_t flex7;
 	uint8_t bbr_state;
 	uint8_t bbr_substate;
 	uint8_t inhpts;
 	uint8_t ininput;
 	uint8_t use_lt_bw;
 	uint8_t flex8;
 	uint32_t pkt_epoch;
 };
 
 /* Per-stack stack-specific info. */
 union tcp_log_stackspecific
 {
 	struct tcp_log_rack u_rack;
 	struct tcp_log_bbr u_bbr;
 };
 
 struct tcp_log_buffer
 {
 	/* Event basics */
 	struct timeval	tlb_tv;		/* Timestamp of trace */
 	uint32_t	tlb_ticks;	/* Timestamp of trace */
 	uint32_t	tlb_sn;		/* Serial number */
 	uint8_t		tlb_stackid;	/* Stack ID */
 	uint8_t		tlb_eventid;	/* Event ID */
 	uint16_t	tlb_eventflags;	/* Flags for the record */
 #define	TLB_FLAG_RXBUF		0x0001	/* Includes receive buffer info */
 #define	TLB_FLAG_TXBUF		0x0002	/* Includes send buffer info */
 #define	TLB_FLAG_HDR		0x0004	/* Includes a TCP header */
 #define	TLB_FLAG_VERBOSE	0x0008	/* Includes function/line numbers */
 #define	TLB_FLAG_STACKINFO	0x0010	/* Includes stack-specific info */
 	int		tlb_errno;	/* Event error (if any) */
 
 	/* Internal session state */
 	struct tcp_log_sockbuf tlb_rxbuf; /* Receive buffer */
 	struct tcp_log_sockbuf tlb_txbuf; /* Send buffer */
 
 	int		tlb_state;	/* TCPCB t_state */
 	uint32_t	tlb_starttime;	/* TCPCB t_starttime */
 	uint32_t	tlb_iss;		/* TCPCB iss */
 	uint32_t	tlb_flags;	/* TCPCB flags */
 	uint32_t	tlb_snd_una;	/* TCPCB snd_una */
 	uint32_t	tlb_snd_max;	/* TCPCB snd_max */
 	uint32_t	tlb_snd_cwnd;	/* TCPCB snd_cwnd */
 	uint32_t	tlb_snd_nxt;	/* TCPCB snd_nxt */
 	uint32_t	tlb_snd_recover;/* TCPCB snd_recover */
 	uint32_t	tlb_snd_wnd;	/* TCPCB snd_wnd */
 	uint32_t	tlb_snd_ssthresh; /* TCPCB snd_ssthresh */
 	uint32_t	tlb_srtt;	/* TCPCB t_srtt */
 	uint32_t	tlb_rttvar;	/* TCPCB t_rttvar */
 	uint32_t	tlb_rcv_up;	/* TCPCB rcv_up */
 	uint32_t	tlb_rcv_adv;	/* TCPCB rcv_adv */
 	uint32_t	tlb_rcv_nxt;	/* TCPCB rcv_nxt */
 	tcp_seq		tlb_sack_newdata; /* TCPCB sack_newdata */
 	uint32_t       	tlb_rcv_wnd;	/* TCPCB rcv_wnd */
 	uint32_t	tlb_dupacks;	/* TCPCB t_dupacks */
 	int		tlb_segqlen;	/* TCPCB segqlen */
 	int		tlb_snd_numholes; /* TCPCB snd_numholes */
 	uint32_t 	tlb_flex1; /* Event specific information */
 	uint32_t 	tlb_flex2; /* Event specific information */
 	uint8_t		tlb_snd_scale:4, /* TCPCB snd_scale */
 			tlb_rcv_scale:4; /* TCPCB rcv_scale */
 	uint8_t		_pad[3];	/* Padding */
 
 	/* Per-stack info */
 	union tcp_log_stackspecific tlb_stackinfo;
 #define	tlb_rack	tlb_stackinfo.u_rack
 
 	/* The packet */
 	uint32_t	tlb_len;	/* The packet's data length */
 	struct tcphdr	tlb_th;		/* The TCP header */
 	uint8_t		tlb_opts[TCP_MAXOLEN]; /* The TCP options */
 
 	/* Verbose information (optional) */
 	struct tcp_log_verbose tlb_verbose[0];
 } ALIGN_TCP_LOG;
 
 enum tcp_log_events {
 	TCP_LOG_IN = 1,	/* Incoming packet                 1 */
 	TCP_LOG_OUT,	/* Transmit (without other event)  2 */
 	TCP_LOG_RTO,	/* Retransmit timeout              3 */
 	TCP_LOG_TF_ACK,	/* Transmit due to TF_ACK          4 */
 	TCP_LOG_BAD_RETRAN, /* Detected bad retransmission 5 */
 	TCP_LOG_PRR,	/* Doing PRR                       6 */
 	TCP_LOG_REORDER,/* Detected reorder                7 */
 	TCP_LOG_HPTS,	/* Hpts sending a packet          8 */
 	BBR_LOG_BBRUPD,		/* We updated BBR info     9 */
 	BBR_LOG_BBRSND,		/* We did a slot calculation and sending is done 10 */
 	BBR_LOG_ACKCLEAR,	/* A ack clears all outstanding     11 */
 	BBR_LOG_INQUEUE,	/* The tcb had a packet input to it 12 */
 	BBR_LOG_TIMERSTAR,	/* Start a timer                    13 */
 	BBR_LOG_TIMERCANC,	/* Cancel a timer                   14 */
 	BBR_LOG_ENTREC,		/* Entered recovery                 15 */
 	BBR_LOG_EXITREC,	/* Exited recovery                  16 */
 	BBR_LOG_CWND,		/* Cwnd change                      17 */
 	BBR_LOG_BWSAMP,		/* LT B/W sample has been made      18 */
 	BBR_LOG_MSGSIZE,	/* We received a EMSGSIZE error     19 */
 	BBR_LOG_BBRRTT,		/* BBR RTT is updated               20 */
 	BBR_LOG_JUSTRET,	/* We just returned out of output   21 */
 	BBR_LOG_STATE,		/* A BBR state change occured       22 */
 	BBR_LOG_PKT_EPOCH,      /* A BBR packet epoch occured       23 */
 	BBR_LOG_PERSIST,        /* BBR changed to/from a persists   24 */
 	TCP_LOG_FLOWEND,        /* End of a flow                    25 */
 	BBR_LOG_RTO,            /* BBR's timeout includes BBR info  26 */
 	BBR_LOG_DOSEG_DONE,     /* hpts do_segment completes       27 */
 	BBR_LOG_EXIT_GAIN,      /* hpts do_segment completes       28 */
 	BBR_LOG_THRESH_CALC,    /* Doing threshold calculation      29 */
 	BBR_LOG_EXTRACWNDGAIN,	/* Removed                          30 */
 	TCP_LOG_USERSEND, 	/* User level sends data            31 */
 	BBR_RSM_CLEARED,	/* RSM cleared of ACK flags         32 */
 	BBR_LOG_STATE_TARGET, 	/* Log of target at state           33 */
 	BBR_LOG_TIME_EPOCH, 	/* A timed based Epoch occured      34 */
 	BBR_LOG_TO_PROCESS,	/* A to was processed               35 */
 	BBR_LOG_BBRTSO, 	/* TSO update	                    36 */
 	BBR_LOG_HPTSDIAG,	/* Hpts diag insert                37 */
 	BBR_LOG_LOWGAIN,	/* Low gain accounting              38 */
 	BBR_LOG_PROGRESS,	/* Progress timer event             39 */
 	TCP_LOG_SOCKET_OPT,	/* A socket option is set	    40 */
 	BBR_LOG_TIMERPREP,	/* A BBR var to debug out TLP issues  41 */
 	BBR_LOG_ENOBUF_JMP,	/* We had a enobuf jump 42 */
 	BBR_LOG_HPTSI_CALC,	/* calc the hptsi time 43 */
 	BBR_LOG_RTT_SHRINKS,	/* We had a log reduction of rttProp 44 */
 	BBR_LOG_BW_RED_EV,	/* B/W reduction events 45 */
 	BBR_LOG_REDUCE,		/* old bbr log reduce for 4.1 and earlier 46*/
 	TCP_LOG_RTT,		/* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
 	BBR_LOG_SETTINGS_CHG,   /* Settings changed for loss response 48 */
 	BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining -- now not used 49 */
 	TCP_LOG_REASS,		/* Reassembly buffer logging 50 */
 	TCP_HDWR_TLS,		/* TCP Hardware TLS logs 51 */
 	BBR_LOG_HDWR_PACE,	/* TCP Hardware pacing log 52 */
 	BBR_LOG_TSTMP_VAL,	/* Temp debug timestamp validation 53 */
 	TCP_LOG_CONNEND,	/* End of connection 54 */
 	TCP_LOG_LRO,		/* LRO entry 55 */
 	TCP_SACK_FILTER_RES,	/* Results of SACK Filter 56 */
 	TCP_SAD_DETECTION,	/* Sack Attack Detection 57 */
 	TCP_LOG_END		/* End (keep at end)	   58 */
 };
 
 enum tcp_log_states {
 	TCP_LOG_STATE_CLEAR = -1,	/* Deactivate and clear tracing */
 	TCP_LOG_STATE_OFF = 0,		/* Pause */
 	TCP_LOG_STATE_TAIL=1,		/* Keep the trailing events */
 	TCP_LOG_STATE_HEAD=2,		/* Keep the leading events */
 	TCP_LOG_STATE_HEAD_AUTO=3,	/* Keep the leading events, and
 					   automatically dump them to the
 					   device  */
 	TCP_LOG_STATE_CONTINUAL=4,	/* Continually dump the data when full */
 	TCP_LOG_STATE_TAIL_AUTO=5,	/* Keep the trailing events, and
 					   automatically dump them when the
 					   session ends */
 };
 
 /* Use this if we don't know whether the operation succeeded. */
 #define	ERRNO_UNK	(-1)
 
 /*
  * If the user included dev/tcp_log/tcp_log_dev.h, then include our private
  * headers. Otherwise, there is no reason to pollute all the files with an
  * additional include.
  *
  * This structure is aligned to an 8-byte boundary to match the alignment
  * requirements of (struct tcp_log_buffer).
  */
 #ifdef __tcp_log_dev_h__
 struct tcp_log_header {
 	struct tcp_log_common_header tlh_common;
 #define	tlh_version	tlh_common.tlch_version
 #define	tlh_type	tlh_common.tlch_type
 #define	tlh_length	tlh_common.tlch_length
 	struct in_endpoints	tlh_ie;
 	struct timeval		tlh_offset;	/* Uptime -> UTC offset */
 	char			tlh_id[TCP_LOG_ID_LEN];
 	char			tlh_reason[TCP_LOG_REASON_LEN];
+	char			tlh_tag[TCP_LOG_TAG_LEN];
 	uint8_t		tlh_af;
 	uint8_t		_pad[7];
 } ALIGN_TCP_LOG;
 
 #ifdef _KERNEL
 struct tcp_log_dev_log_queue {
 	struct tcp_log_dev_queue tldl_common;
 	char			tldl_id[TCP_LOG_ID_LEN];
 	char			tldl_reason[TCP_LOG_REASON_LEN];
+	char			tldl_tag[TCP_LOG_TAG_LEN];
 	struct in_endpoints	tldl_ie;
 	struct tcp_log_stailq	tldl_entries;
 	int			tldl_count;
 	uint8_t			tldl_af;
 };
 #endif /* _KERNEL */
 #endif /* __tcp_log_dev_h__ */
 
 #ifdef _KERNEL
 
 #define	TCP_LOG_BUF_DEFAULT_SESSION_LIMIT	5000
 #define	TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT	5000000
 
 /*
  * TCP_LOG_EVENT_VERBOSE: The same as TCP_LOG_EVENT, except it always
  * tries to record verbose information.
  */
 #define	TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
 	do {								\
 		if (tp->t_logstate != TCP_LOG_STATE_OFF)		\
 			tcp_log_event_(tp, th, rxbuf, txbuf, eventid,	\
 	 	        errornum, len, stackinfo, th_hostorder,		\
 		        tp->t_output_caller, __func__, __LINE__, tv);	\
 	} while (0)
 
 /*
  * TCP_LOG_EVENT: This is a macro so we can capture function/line
  * information when needed.
  *
  * Prototype:
  * TCP_LOG_EVENT(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, 
  *     struct sockbuf *txbuf, uint8_t eventid, int errornum,
  *     union tcp_log_stackspecific *stackinfo)
  *
  * tp is mandatory and must be write locked.
  * th is optional; if present, it will appear in the record.
  * rxbuf and txbuf are optional; if present, they will appear in the record.
  * eventid is mandatory.
  * errornum is mandatory (it indicates the success or failure of the
  *     operation associated with the event).
  * len indicates the length of the packet. If no packet, use 0.
  * stackinfo is optional; if present, it will appear in the record.
  */
 #ifdef TCP_LOG_FORCEVERBOSE
 #define	TCP_LOG_EVENT	TCP_LOG_EVENT_VERBOSE
 #else
 #define	TCP_LOG_EVENT(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder) \
 	do {								\
 		if (tcp_log_verbose)					\
 			TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf,	\
 			    eventid, errornum, len, stackinfo,		\
 			    th_hostorder, NULL);				\
 		else if (tp->t_logstate != TCP_LOG_STATE_OFF)		\
 			tcp_log_event_(tp, th, rxbuf, txbuf, eventid,	\
 			    errornum, len, stackinfo, th_hostorder,	\
 			    NULL, NULL, 0, NULL);				\
 	} while (0)
 #endif /* TCP_LOG_FORCEVERBOSE */
 #define	TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
 	do {								\
 		if (tp->t_logstate != TCP_LOG_STATE_OFF)		\
 			tcp_log_event_(tp, th, rxbuf, txbuf, eventid,	\
 			    errornum, len, stackinfo, th_hostorder,	\
 			    NULL, NULL, 0, tv);				\
 	} while (0)
 
 
 #ifdef TCP_BLACKBOX
 extern bool tcp_log_verbose;
 void tcp_log_drain(struct tcpcb *tp);
 int tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force);
 void tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason);
 struct tcp_log_buffer *tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
     union tcp_log_stackspecific *stackinfo, int th_hostorder,
     const char *output_caller, const char *func, int line, const struct timeval *tv);
 size_t tcp_log_get_id(struct tcpcb *tp, char *buf);
+size_t tcp_log_get_tag(struct tcpcb *tp, char *buf);
 u_int tcp_log_get_id_cnt(struct tcpcb *tp);
 int tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp);
 void tcp_log_init(void);
 int tcp_log_set_id(struct tcpcb *tp, char *id);
+int tcp_log_set_tag(struct tcpcb *tp, char *tag);
 int tcp_log_state_change(struct tcpcb *tp, int state);
 void tcp_log_tcpcbinit(struct tcpcb *tp);
 void tcp_log_tcpcbfini(struct tcpcb *tp);
 void tcp_log_flowend(struct tcpcb *tp);
 #else /* !TCP_BLACKBOX */
 #define tcp_log_verbose	(false)
 
 static inline struct tcp_log_buffer *
 tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
     union tcp_log_stackspecific *stackinfo, int th_hostorder,
     const char *output_caller, const char *func, int line,
     const struct timeval *tv)
 {
 
 	return (NULL);
 }
 #endif /* TCP_BLACKBOX */
 
 #endif	/* _KERNEL */
 #endif	/* __tcp_log_buf_h__ */
Index: head/sys/netinet/tcp_var.h
===================================================================
--- head/sys/netinet/tcp_var.h	(revision 356413)
+++ head/sys/netinet/tcp_var.h	(revision 356414)
@@ -1,1030 +1,1032 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
  * $FreeBSD$
  */
 
 #ifndef _NETINET_TCP_VAR_H_
 #define _NETINET_TCP_VAR_H_
 
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 
 #ifdef _KERNEL
 #include <net/vnet.h>
 #include <sys/mbuf.h>
 #endif
 
 #if defined(_KERNEL) || defined(_WANT_TCPCB)
 /* TCP segment queue entry */
 struct tseg_qent {
 	TAILQ_ENTRY(tseg_qent) tqe_q;
 	struct	mbuf   *tqe_m;		/* mbuf contains packet */
 	struct  mbuf   *tqe_last;	/* last mbuf in chain */
 	tcp_seq tqe_start;		/* TCP Sequence number start */
 	int	tqe_len;		/* TCP segment data length */
 	uint32_t tqe_flags;		/* The flags from the th->th_flags */
 	uint32_t tqe_mbuf_cnt;		/* Count of mbuf overhead */
 };
 TAILQ_HEAD(tsegqe_head, tseg_qent);
 
 struct sackblk {
 	tcp_seq start;		/* start seq no. of sack block */
 	tcp_seq end;		/* end seq no. */
 };
 
 struct sackhole {
 	tcp_seq start;		/* start seq no. of hole */
 	tcp_seq end;		/* end seq no. */
 	tcp_seq rxmit;		/* next seq. no in hole to be retransmitted */
 	TAILQ_ENTRY(sackhole) scblink;	/* scoreboard linkage */
 };
 
 struct sackhint {
 	struct sackhole	*nexthole;
 	int		sack_bytes_rexmit;
 	tcp_seq		last_sack_ack;	/* Most recent/largest sacked ack */
 
 	int		ispare;		/* explicit pad for 64bit alignment */
 	int             sacked_bytes;	/*
 					 * Total sacked bytes reported by the
 					 * receiver via sack option
 					 */
 	uint32_t	_pad1[1];	/* TBD */
 	uint64_t	_pad[1];	/* TBD */
 };
 
 #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
 
 STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
 
 /*
  * Tcp control block, one per tcp; fields:
  * Organized for 64 byte cacheline efficiency based
  * on common tcp_input/tcp_output processing.
  */
 struct tcpcb {
 	/* Cache line 1 */
 	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
 	struct tcp_function_block *t_fb;/* TCP function call block */
 	void	*t_fb_ptr;		/* Pointer to t_fb specific data */
 	uint32_t t_maxseg:24,		/* maximum segment size */
 		t_logstate:8;		/* State of "black box" logging */
 	uint32_t t_port:16,		/* Tunneling (over udp) port */
 		t_state:4,		/* state of this connection */
 		t_idle_reduce : 1,
 		t_delayed_ack: 7,	/* Delayed ack variable */
 		t_fin_is_rst: 1,	/* Are fin's treated as resets */
-		bits_spare : 3;
+		t_log_state_set: 1,
+		bits_spare : 2;
 	u_int	t_flags;
 	tcp_seq	snd_una;		/* sent but unacknowledged */
 	tcp_seq	snd_max;		/* highest sequence number sent;
 					 * used to recognize retransmits
 					 */
 	tcp_seq	snd_nxt;		/* send next */
 	tcp_seq	snd_up;			/* send urgent pointer */
 	uint32_t  snd_wnd;		/* send window */
 	uint32_t  snd_cwnd;		/* congestion-controlled window */
 	uint32_t t_peakrate_thr; 	/* pre-calculated peak rate threshold */
 	/* Cache line 2 */
 	u_int32_t  ts_offset;		/* our timestamp offset */
 	u_int32_t	rfbuf_ts;	/* recv buffer autoscaling timestamp */
 	int	rcv_numsacks;		/* # distinct sack blks present */
 	u_int	t_tsomax;		/* TSO total burst length limit in bytes */
 	u_int	t_tsomaxsegcount;	/* TSO maximum segment count */
 	u_int	t_tsomaxsegsize;	/* TSO maximum segment size in bytes */
 	tcp_seq	rcv_nxt;		/* receive next */
 	tcp_seq	rcv_adv;		/* advertised window */
 	uint32_t  rcv_wnd;		/* receive window */
 	u_int	t_flags2;		/* More tcpcb flags storage */
 	int	t_srtt;			/* smoothed round-trip time */
 	int	t_rttvar;		/* variance in round-trip time */
 	u_int32_t  ts_recent;		/* timestamp echo data */
 	u_char	snd_scale;		/* window scaling for send window */
 	u_char	rcv_scale;		/* window scaling for recv window */
 	u_char	snd_limited;		/* segments limited transmitted */
 	u_char	request_r_scale;	/* pending window scaling */
 	tcp_seq	last_ack_sent;
 	u_int	t_rcvtime;		/* inactivity time */
 	/* Cache line 3 */
 	tcp_seq	rcv_up;			/* receive urgent pointer */
 	int	t_segqlen;		/* segment reassembly queue length */
 	uint32_t t_segqmbuflen;		/* Count of bytes mbufs on all entries */
 	struct	tsegqe_head t_segq;	/* segment reassembly queue */
 	struct mbuf      *t_in_pkt;
 	struct mbuf	 *t_tail_pkt;
 	struct tcp_timer *t_timers;	/* All the TCP timers in one struct */
 	struct	vnet *t_vnet;		/* back pointer to parent vnet */
 	uint32_t  snd_ssthresh;		/* snd_cwnd size threshold for
 					 * for slow start exponential to
 					 * linear switch
 					 */
 	tcp_seq	snd_wl1;		/* window update seg seq number */
 	/* Cache line 4 */
 	tcp_seq	snd_wl2;		/* window update seg ack number */
 
 	tcp_seq	irs;			/* initial receive sequence number */
 	tcp_seq	iss;		        /* initial send sequence number */
 	u_int   t_acktime;
 	u_int	ts_recent_age;		/* when last updated */
 	tcp_seq	snd_recover;		/* for use in NewReno Fast Recovery */
 	uint16_t cl4_spare;		/* Spare to adjust CL 4 */
 	char	t_oobflags;		/* have some */
 	char	t_iobc;			/* input character */
 	int	t_rxtcur;		/* current retransmit value (ticks) */
 
 	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
 	u_int	t_rtttime;		/* RTT measurement start time */
 
 	tcp_seq	t_rtseq;		/* sequence number being timed */
 	u_int	t_starttime;		/* time connection was established */
 
 	u_int	t_pmtud_saved_maxseg;	/* pre-blackhole MSS */
 	u_int	t_rttmin;		/* minimum rtt allowed */
 
 	u_int	t_rttbest;		/* best rtt we've seen */
 
 	int	t_softerror;		/* possible error not yet reported */
 	uint32_t  max_sndwnd;		/* largest window peer has offered */
 	/* Cache line 5 */
 	uint32_t  snd_cwnd_prev;	/* cwnd prior to retransmit */
 	uint32_t  snd_ssthresh_prev;	/* ssthresh prior to retransmit */
 	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
 	int	t_sndzerowin;		/* zero-window updates sent */
 	u_long	t_rttupdated;		/* number of times rtt sampled */
 	int	snd_numholes;		/* number of holes seen by sender */
 	u_int	t_badrxtwin;		/* window for retransmit recovery */
 	TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
 					/* SACK scoreboard (sorted) */
 	tcp_seq	snd_fack;		/* last seq number(+1) sack'd by rcv'r*/
 	tcp_seq sack_newdata;		/* New data xmitted in this recovery
 					   episode starts at this seq number */
 	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
 	struct sackhint	sackhint;	/* SACK scoreboard hint */
 	int	t_rttlow;		/* smallest observerved RTT */
 	int	rfbuf_cnt;		/* recv buffer autoscaling byte count */
 	struct toedev	*tod;		/* toedev handling this connection */
 	int	t_sndrexmitpack;	/* retransmit packets sent */
 	int	t_rcvoopack;		/* out-of-order packets received */
 	void	*t_toe;			/* TOE pcb pointer */
 	struct cc_algo	*cc_algo;	/* congestion control algorithm */
 	struct cc_var	*ccv;		/* congestion control specific vars */
 	struct osd	*osd;		/* storage for Khelp module data */
 	int	t_bytes_acked;		/* # bytes acked during current RTT */
 	u_int   t_maxunacktime;
 	u_int	t_keepinit;		/* time to establish connection */
 	u_int	t_keepidle;		/* time before keepalive probes begin */
 	u_int	t_keepintvl;		/* interval between keepalives */
 	u_int	t_keepcnt;		/* number of keepalives before close */
 	int	t_dupacks;		/* consecutive dup acks recd */
 	int	t_lognum;		/* Number of log entries */
+	int	t_loglimit;		/* Maximum number of log entries */
 	struct tcp_log_stailq t_logs;	/* Log buffer */
 	struct tcp_log_id_node *t_lin;
 	struct tcp_log_id_bucket *t_lib;
 	const char *t_output_caller;	/* Function that called tcp_output */
 	struct statsblob *t_stats;	/* Per-connection stats */
 	uint32_t t_logsn;		/* Log "serial number" */
 	uint32_t gput_ts;		/* Time goodput measurement started */
 	tcp_seq gput_seq;		/* Outbound measurement seq */
 	tcp_seq gput_ack;		/* Inbound measurement ack */
 	int32_t t_stats_gput_prev;	/* XXXLAS: Prev gput measurement */
 	uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
 	unsigned int *t_tfo_pending;	/* TCP Fast Open server pending counter */
 	union {
 		uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN];
 		uint64_t server;
 	} t_tfo_cookie;			/* TCP Fast Open cookie to send */
 #ifdef TCPPCAP
 	struct mbufq t_inpkts;		/* List of saved input packets. */
 	struct mbufq t_outpkts;		/* List of saved output packets. */
 #endif
 };
 #endif	/* _KERNEL || _WANT_TCPCB */
 
 #ifdef _KERNEL
 struct tcptemp {
 	u_char	tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
 	struct	tcphdr tt_t;
 };
 
 /* Minimum map entries limit value, if set */
 #define TCP_MIN_MAP_ENTRIES_LIMIT	128
 
 /* 
  * TODO: We yet need to brave plowing in
  * to tcp_input() and the pru_usrreq() block.
  * Right now these go to the old standards which
  * are somewhat ok, but in the long term may
  * need to be changed. If we do tackle tcp_input()
  * then we need to get rid of the tcp_do_segment()
  * function below.
  */
 /* Flags for tcp functions */
 #define TCP_FUNC_BEING_REMOVED 0x01   	/* Can no longer be referenced */
 
 /*
  * If defining the optional tcp_timers, in the
  * tfb_tcp_timer_stop call you must use the
  * callout_async_drain() function with the
  * tcp_timer_discard callback. You should check
  * the return of callout_async_drain() and if 0
  * increment tt_draincnt. Since the timer sub-system
  * does not know your callbacks you must provide a
  * stop_all function that loops through and calls
  * tcp_timer_stop() with each of your defined timers.
  * Adding a tfb_tcp_handoff_ok function allows the socket
  * option to change stacks to query you even if the
  * connection is in a later stage. You return 0 to
  * say you can take over and run your stack, you return
  * non-zero (an error number) to say no you can't.
  * If the function is undefined you can only change
  * in the early states (before connect or listen).
  * tfb_tcp_fb_fini is changed to add a flag to tell
  * the old stack if the tcb is being destroyed or
  * not. A one in the flag means the TCB is being
  * destroyed, a zero indicates its transitioning to
  * another stack (via socket option).
  */
 struct tcp_function_block {
 	char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
 	int	(*tfb_tcp_output)(struct tcpcb *);
 	int	(*tfb_tcp_output_wtime)(struct tcpcb *, const struct timeval *);
 	void	(*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
 			    struct socket *, struct tcpcb *,
 		        int, int, uint8_t);
 	int     (*tfb_do_queued_segments)(struct socket *, struct tcpcb *, int);
 	int      (*tfb_do_segment_nounlock)(struct mbuf *, struct tcphdr *,
 			    struct socket *, struct tcpcb *,
 			    int, int, uint8_t,
 			    int, struct timeval *);
 	void	(*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *,
 			    struct socket *, struct tcpcb *,
 			    int, int, uint8_t,
 			    int, struct timeval *);
 	int     (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
 			    struct inpcb *inp, struct tcpcb *tp);
 	/* Optional memory allocation/free routine */
 	int	(*tfb_tcp_fb_init)(struct tcpcb *);
 	void	(*tfb_tcp_fb_fini)(struct tcpcb *, int);
 	/* Optional timers, must define all if you define one */
 	int	(*tfb_tcp_timer_stop_all)(struct tcpcb *);
 	void	(*tfb_tcp_timer_activate)(struct tcpcb *,
 			    uint32_t, u_int);
 	int	(*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
 	void	(*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
 	void	(*tfb_tcp_rexmit_tmr)(struct tcpcb *);
 	int	(*tfb_tcp_handoff_ok)(struct tcpcb *);
 	void	(*tfb_tcp_mtu_chg)(struct tcpcb *);
 	volatile uint32_t tfb_refcnt;
 	uint32_t  tfb_flags;
 	uint8_t	tfb_id;
 };
 
 struct tcp_function {
 	TAILQ_ENTRY(tcp_function)	tf_next;
 	char				tf_name[TCP_FUNCTION_NAME_LEN_MAX];
 	struct tcp_function_block	*tf_fb;
 };
 
 TAILQ_HEAD(tcp_funchead, tcp_function);
 #endif	/* _KERNEL */
 
 /*
  * Flags and utility macros for the t_flags field.
  */
 #define	TF_ACKNOW	0x00000001	/* ack peer immediately */
 #define	TF_DELACK	0x00000002	/* ack, but try to delay it */
 #define	TF_NODELAY	0x00000004	/* don't delay packets to coalesce */
 #define	TF_NOOPT	0x00000008	/* don't use tcp options */
 #define	TF_SENTFIN	0x00000010	/* have sent FIN */
 #define	TF_REQ_SCALE	0x00000020	/* have/will request window scaling */
 #define	TF_RCVD_SCALE	0x00000040	/* other side has requested scaling */
 #define	TF_REQ_TSTMP	0x00000080	/* have/will request timestamps */
 #define	TF_RCVD_TSTMP	0x00000100	/* a timestamp was received in SYN */
 #define	TF_SACK_PERMIT	0x00000200	/* other side said I could SACK */
 #define	TF_NEEDSYN	0x00000400	/* send SYN (implicit state) */
 #define	TF_NEEDFIN	0x00000800	/* send FIN (implicit state) */
 #define	TF_NOPUSH	0x00001000	/* don't push */
 #define	TF_PREVVALID	0x00002000	/* saved values for bad rxmit valid */
 #define	TF_UNUSED1	0x00004000	/* unused */
 #define	TF_GPUTINPROG	0x00008000	/* Goodput measurement in progress */
 #define	TF_MORETOCOME	0x00010000	/* More data to be appended to sock */
 #define	TF_LQ_OVERFLOW	0x00020000	/* listen queue overflow */
 #define	TF_LASTIDLE	0x00040000	/* connection was previously idle */
 #define	TF_RXWIN0SENT	0x00080000	/* sent a receiver win 0 in response */
 #define	TF_FASTRECOVERY	0x00100000	/* in NewReno Fast Recovery */
 #define	TF_WASFRECOVERY	0x00200000	/* was in NewReno Fast Recovery */
 #define	TF_SIGNATURE	0x00400000	/* require MD5 digests (RFC2385) */
 #define	TF_FORCEDATA	0x00800000	/* force out a byte */
 #define	TF_TSO		0x01000000	/* TSO enabled on this connection */
 #define	TF_TOE		0x02000000	/* this connection is offloaded */
 #define	TF_UNUSED3	0x04000000	/* unused */
 #define	TF_UNUSED4	0x08000000	/* unused */
 #define	TF_UNUSED5	0x10000000	/* unused */
 #define	TF_CONGRECOVERY	0x20000000	/* congestion recovery mode */
 #define	TF_WASCRECOVERY	0x40000000	/* was in congestion recovery */
 #define	TF_FASTOPEN	0x80000000	/* TCP Fast Open indication */
 
 #define	IN_FASTRECOVERY(t_flags)	(t_flags & TF_FASTRECOVERY)
 #define	ENTER_FASTRECOVERY(t_flags)	t_flags |= TF_FASTRECOVERY
 #define	EXIT_FASTRECOVERY(t_flags)	t_flags &= ~TF_FASTRECOVERY
 
 #define	IN_CONGRECOVERY(t_flags)	(t_flags & TF_CONGRECOVERY)
 #define	ENTER_CONGRECOVERY(t_flags)	t_flags |= TF_CONGRECOVERY
 #define	EXIT_CONGRECOVERY(t_flags)	t_flags &= ~TF_CONGRECOVERY
 
 #define	IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY))
 #define	ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY)
 #define	EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY)
 
 #if defined(_KERNEL) && !defined(TCP_RFC7413)
 #define	IS_FASTOPEN(t_flags)		(false)
 #else
 #define	IS_FASTOPEN(t_flags)		(t_flags & TF_FASTOPEN)
 #endif
 
 #define	BYTES_THIS_ACK(tp, th)	(th->th_ack - tp->snd_una)
 
 /*
  * Flags for the t_oobflags field.
  */
 #define	TCPOOB_HAVEDATA	0x01
 #define	TCPOOB_HADDATA	0x02
 
 /*
  * Flags for the extended TCP flags field, t_flags2
  */
 #define	TF2_PLPMTU_BLACKHOLE	0x00000001 /* Possible PLPMTUD Black Hole. */
 #define	TF2_PLPMTU_PMTUD	0x00000002 /* Allowed to attempt PLPMTUD. */
 #define	TF2_PLPMTU_MAXSEGSNT	0x00000004 /* Last seg sent was full seg. */
 #define	TF2_LOG_AUTO		0x00000008 /* Session is auto-logging. */
 #define TF2_DROP_AF_DATA 	0x00000010 /* Drop after all data ack'd */
 #define	TF2_ECN_PERMIT		0x00000020 /* connection ECN-ready */
 #define	TF2_ECN_SND_CWR		0x00000040 /* ECN CWR in queue */
 #define	TF2_ECN_SND_ECE		0x00000080 /* ECN ECE in queue */
 #define	TF2_ACE_PERMIT		0x00000100 /* Accurate ECN mode */
 
 /*
  * Structure to hold TCP options that are only used during segment
  * processing (in tcp_input), but not held in the tcpcb.
  * It's basically used to reduce the number of parameters
  * to tcp_dooptions and tcp_addoptions.
  * The binary order of the to_flags is relevant for packing of the
  * options in tcp_addoptions.
  */
 struct tcpopt {
 	u_int32_t	to_flags;	/* which options are present */
 #define	TOF_MSS		0x0001		/* maximum segment size */
 #define	TOF_SCALE	0x0002		/* window scaling */
 #define	TOF_SACKPERM	0x0004		/* SACK permitted */
 #define	TOF_TS		0x0010		/* timestamp */
 #define	TOF_SIGNATURE	0x0040		/* TCP-MD5 signature option (RFC2385) */
 #define	TOF_SACK	0x0080		/* Peer sent SACK option */
 #define	TOF_FASTOPEN	0x0100		/* TCP Fast Open (TFO) cookie */
 #define	TOF_MAXOPT	0x0200
 	u_int32_t	to_tsval;	/* new timestamp */
 	u_int32_t	to_tsecr;	/* reflected timestamp */
 	u_char		*to_sacks;	/* pointer to the first SACK blocks */
 	u_char		*to_signature;	/* pointer to the TCP-MD5 signature */
 	u_int8_t	*to_tfo_cookie; /* pointer to the TFO cookie */
 	u_int16_t	to_mss;		/* maximum segment size */
 	u_int8_t	to_wscale;	/* window scaling */
 	u_int8_t	to_nsacks;	/* number of SACK blocks */
 	u_int8_t	to_tfo_len;	/* TFO cookie length */
 	u_int32_t	to_spare;	/* UTO */
 };
 
 /*
  * Flags for tcp_dooptions.
  */
 #define	TO_SYN		0x01		/* parse SYN-only options */
 
 struct hc_metrics_lite {	/* must stay in sync with hc_metrics */
 	uint32_t	rmx_mtu;	/* MTU for this path */
 	uint32_t	rmx_ssthresh;	/* outbound gateway buffer limit */
 	uint32_t	rmx_rtt;	/* estimated round trip time */
 	uint32_t	rmx_rttvar;	/* estimated rtt variance */
 	uint32_t	rmx_cwnd;	/* congestion window */
 	uint32_t	rmx_sendpipe;   /* outbound delay-bandwidth product */
 	uint32_t	rmx_recvpipe;   /* inbound delay-bandwidth product */
 };
 
 /*
  * Used by tcp_maxmtu() to communicate interface specific features
  * and limits at the time of connection setup.
  */
 struct tcp_ifcap {
 	int	ifcap;
 	u_int	tsomax;
 	u_int	tsomaxsegcount;
 	u_int	tsomaxsegsize;
 };
 
 #ifndef _NETINET_IN_PCB_H_
 struct in_conninfo;
 #endif /* _NETINET_IN_PCB_H_ */
 
 struct tcptw {
 	struct inpcb	*tw_inpcb;	/* XXX back pointer to internet pcb */
 	tcp_seq		snd_nxt;
 	tcp_seq		rcv_nxt;
 	tcp_seq		iss;
 	tcp_seq		irs;
 	u_short		last_win;	/* cached window value */
 	short		tw_so_options;	/* copy of so_options */
 	struct ucred	*tw_cred;	/* user credentials */
 	u_int32_t	t_recent;
 	u_int32_t	ts_offset;	/* our timestamp offset */
 	u_int		t_starttime;
 	int		tw_time;
 	TAILQ_ENTRY(tcptw) tw_2msl;
 	void		*tw_pspare;	/* TCP_SIGNATURE */
 	u_int		*tw_spare;	/* TCP_SIGNATURE */
 };
 
 #define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
 #define	intotw(ip)	((struct tcptw *)(ip)->inp_ppcb)
 #define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))
 
 /*
  * The smoothed round-trip time and estimated variance
  * are stored as fixed point numbers scaled by the values below.
  * For convenience, these scales are also used in smoothing the average
  * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
  * With these scales, srtt has 3 bits to the right of the binary point,
  * and thus an "ALPHA" of 0.875.  rttvar has 2 bits to the right of the
  * binary point, and is smoothed with an ALPHA of 0.75.
  */
 #define	TCP_RTT_SCALE		32	/* multiplier for srtt; 3 bits frac. */
 #define	TCP_RTT_SHIFT		5	/* shift for srtt; 3 bits frac. */
 #define	TCP_RTTVAR_SCALE	16	/* multiplier for rttvar; 2 bits */
 #define	TCP_RTTVAR_SHIFT	4	/* shift for rttvar; 2 bits */
 #define	TCP_DELTA_SHIFT		2	/* see tcp_input.c */
 
 /*
  * The initial retransmission should happen at rtt + 4 * rttvar.
  * Because of the way we do the smoothing, srtt and rttvar
  * will each average +1/2 tick of bias.  When we compute
  * the retransmit timer, we want 1/2 tick of rounding and
  * 1 extra tick because of +-1/2 tick uncertainty in the
  * firing of the timer.  The bias will give us exactly the
  * 1.5 tick we need.  But, because the bias is
  * statistical, we have to test that we don't drop below
  * the minimum feasible timer (which is 2 ticks).
  * This version of the macro adapted from a paper by Lawrence
  * Brakmo and Larry Peterson which outlines a problem caused
  * by insufficient precision in the original implementation,
  * which results in inappropriately large RTO values for very
  * fast networks.
  */
 #define	TCP_REXMTVAL(tp) \
 	max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT))  \
 	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
 
 /*
  * TCP statistics.
  * Many of these should be kept per connection,
  * but that's inconvenient at the moment.
  */
 struct	tcpstat {
 	uint64_t tcps_connattempt;	/* connections initiated */
 	uint64_t tcps_accepts;		/* connections accepted */
 	uint64_t tcps_connects;		/* connections established */
 	uint64_t tcps_drops;		/* connections dropped */
 	uint64_t tcps_conndrops;	/* embryonic connections dropped */
 	uint64_t tcps_minmssdrops;	/* average minmss too low drops */
 	uint64_t tcps_closed;		/* conn. closed (includes drops) */
 	uint64_t tcps_segstimed;	/* segs where we tried to get rtt */
 	uint64_t tcps_rttupdated;	/* times we succeeded */
 	uint64_t tcps_delack;		/* delayed acks sent */
 	uint64_t tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
 	uint64_t tcps_rexmttimeo;	/* retransmit timeouts */
 	uint64_t tcps_persisttimeo;	/* persist timeouts */
 	uint64_t tcps_keeptimeo;	/* keepalive timeouts */
 	uint64_t tcps_keepprobe;	/* keepalive probes sent */
 	uint64_t tcps_keepdrops;	/* connections dropped in keepalive */
 
 	uint64_t tcps_sndtotal;		/* total packets sent */
 	uint64_t tcps_sndpack;		/* data packets sent */
 	uint64_t tcps_sndbyte;		/* data bytes sent */
 	uint64_t tcps_sndrexmitpack;	/* data packets retransmitted */
 	uint64_t tcps_sndrexmitbyte;	/* data bytes retransmitted */
 	uint64_t tcps_sndrexmitbad;	/* unnecessary packet retransmissions */
 	uint64_t tcps_sndacks;		/* ack-only packets sent */
 	uint64_t tcps_sndprobe;		/* window probes sent */
 	uint64_t tcps_sndurg;		/* packets sent with URG only */
 	uint64_t tcps_sndwinup;		/* window update-only packets sent */
 	uint64_t tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
 
 	uint64_t tcps_rcvtotal;		/* total packets received */
 	uint64_t tcps_rcvpack;		/* packets received in sequence */
 	uint64_t tcps_rcvbyte;		/* bytes received in sequence */
 	uint64_t tcps_rcvbadsum;	/* packets received with ccksum errs */
 	uint64_t tcps_rcvbadoff;	/* packets received with bad offset */
 	uint64_t tcps_rcvreassfull;	/* packets dropped for no reass space */
 	uint64_t tcps_rcvshort;		/* packets received too short */
 	uint64_t tcps_rcvduppack;	/* duplicate-only packets received */
 	uint64_t tcps_rcvdupbyte;	/* duplicate-only bytes received */
 	uint64_t tcps_rcvpartduppack;	/* packets with some duplicate data */
 	uint64_t tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
 	uint64_t tcps_rcvoopack;	/* out-of-order packets received */
 	uint64_t tcps_rcvoobyte;	/* out-of-order bytes received */
 	uint64_t tcps_rcvpackafterwin;	/* packets with data after window */
 	uint64_t tcps_rcvbyteafterwin;	/* bytes rcvd after window */
 	uint64_t tcps_rcvafterclose;	/* packets rcvd after "close" */
 	uint64_t tcps_rcvwinprobe;	/* rcvd window probe packets */
 	uint64_t tcps_rcvdupack;	/* rcvd duplicate acks */
 	uint64_t tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
 	uint64_t tcps_rcvackpack;	/* rcvd ack packets */
 	uint64_t tcps_rcvackbyte;	/* bytes acked by rcvd acks */
 	uint64_t tcps_rcvwinupd;	/* rcvd window update packets */
 	uint64_t tcps_pawsdrop;		/* segments dropped due to PAWS */
 	uint64_t tcps_predack;		/* times hdr predict ok for acks */
 	uint64_t tcps_preddat;		/* times hdr predict ok for data pkts */
 	uint64_t tcps_pcbcachemiss;
 	uint64_t tcps_cachedrtt;	/* times cached RTT in route updated */
 	uint64_t tcps_cachedrttvar;	/* times cached rttvar updated */
 	uint64_t tcps_cachedssthresh;	/* times cached ssthresh updated */
 	uint64_t tcps_usedrtt;		/* times RTT initialized from route */
 	uint64_t tcps_usedrttvar;	/* times RTTVAR initialized from rt */
 	uint64_t tcps_usedssthresh;	/* times ssthresh initialized from rt*/
 	uint64_t tcps_persistdrop;	/* timeout in persist state */
 	uint64_t tcps_badsyn;		/* bogus SYN, e.g. premature ACK */
 	uint64_t tcps_mturesent;	/* resends due to MTU discovery */
 	uint64_t tcps_listendrop;	/* listen queue overflows */
 	uint64_t tcps_badrst;		/* ignored RSTs in the window */
 
 	uint64_t tcps_sc_added;		/* entry added to syncache */
 	uint64_t tcps_sc_retransmitted;	/* syncache entry was retransmitted */
 	uint64_t tcps_sc_dupsyn;	/* duplicate SYN packet */
 	uint64_t tcps_sc_dropped;	/* could not reply to packet */
 	uint64_t tcps_sc_completed;	/* successful extraction of entry */
 	uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */
 	uint64_t tcps_sc_cacheoverflow;	/* syncache cache limit hit */
 	uint64_t tcps_sc_reset;		/* RST removed entry from syncache */
 	uint64_t tcps_sc_stale;		/* timed out or listen socket gone */
 	uint64_t tcps_sc_aborted;	/* syncache entry aborted */
 	uint64_t tcps_sc_badack;	/* removed due to bad ACK */
 	uint64_t tcps_sc_unreach;	/* ICMP unreachable received */
 	uint64_t tcps_sc_zonefail;	/* zalloc() failed */
 	uint64_t tcps_sc_sendcookie;	/* SYN cookie sent */
 	uint64_t tcps_sc_recvcookie;	/* SYN cookie received */
 
 	uint64_t tcps_hc_added;		/* entry added to hostcache */
 	uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
 
 	uint64_t tcps_finwait2_drops;    /* Drop FIN_WAIT_2 connection after time limit */
 
 	/* SACK related stats */
 	uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
 	uint64_t tcps_sack_rexmits;	    /* SACK rexmit segments   */
 	uint64_t tcps_sack_rexmit_bytes;    /* SACK rexmit bytes      */
 	uint64_t tcps_sack_rcv_blocks;	    /* SACK blocks (options) received */
 	uint64_t tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
 	uint64_t tcps_sack_sboverflow;	    /* times scoreboard overflowed */
 	
 	/* ECN related stats */
 	uint64_t tcps_ecn_ce;		/* ECN Congestion Experienced */
 	uint64_t tcps_ecn_ect0;		/* ECN Capable Transport */
 	uint64_t tcps_ecn_ect1;		/* ECN Capable Transport */
 	uint64_t tcps_ecn_shs;		/* ECN successful handshakes */
 	uint64_t tcps_ecn_rcwnd;	/* # times ECN reduced the cwnd */
 
 	/* TCP_SIGNATURE related stats */
 	uint64_t tcps_sig_rcvgoodsig;	/* Total matching signature received */
 	uint64_t tcps_sig_rcvbadsig;	/* Total bad signature received */
 	uint64_t tcps_sig_err_buildsig;	/* Failed to make signature */
 	uint64_t tcps_sig_err_sigopt;	/* No signature expected by socket */
 	uint64_t tcps_sig_err_nosigopt;	/* No signature provided by segment */
 
 	/* Path MTU Discovery Black Hole Detection related stats */
 	uint64_t tcps_pmtud_blackhole_activated;	 /* Black Hole Count */
 	uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */
 	uint64_t tcps_pmtud_blackhole_failed;		 /* Black Hole Failure Count */
 
 	uint64_t _pad[12];		/* 6 UTO, 6 TBD */
 };
 
 #define	tcps_rcvmemdrop	tcps_rcvreassfull	/* compat */
 
 #ifdef _KERNEL
 #define	TI_UNLOCKED	1
 #define	TI_RLOCKED	2
 #include <sys/counter.h>
 
 VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat);	/* tcp statistics */
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
 #define	TCPSTAT_ADD(name, val)	\
     VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val))
 #define	TCPSTAT_INC(name)	TCPSTAT_ADD(name, 1)
 
 /*
  * Kernel module consumers must use this accessor macro.
  */
 void	kmod_tcpstat_inc(int statnum);
 #define	KMOD_TCPSTAT_INC(name)						\
     kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(uint64_t))
 
 /*
  * Running TCP connection count by state.
  */
 VNET_DECLARE(counter_u64_t, tcps_states[TCP_NSTATES]);
 #define	V_tcps_states	VNET(tcps_states)
 #define	TCPSTATES_INC(state)	counter_u64_add(V_tcps_states[state], 1)
 #define	TCPSTATES_DEC(state)	counter_u64_add(V_tcps_states[state], -1)
 
 /*
  * TCP specific helper hook point identifiers.
  */
 #define	HHOOK_TCP_EST_IN		0
 #define	HHOOK_TCP_EST_OUT		1
 #define	HHOOK_TCP_LAST			HHOOK_TCP_EST_OUT
 
 struct tcp_hhook_data {
 	struct tcpcb	*tp;
 	struct tcphdr	*th;
 	struct tcpopt	*to;
 	uint32_t	len;
 	int		tso;
 	tcp_seq		curack;
 };
 #ifdef TCP_HHOOK
 void hhook_run_tcp_est_out(struct tcpcb *tp,
 	struct tcphdr *th, struct tcpopt *to,
 	uint32_t len, int tso);
 #endif
 #endif
 
 /*
  * TCB structure exported to user-land via sysctl(3).
  *
  * Fields prefixed with "xt_" are unique to the export structure, and fields
  * with "t_" or other prefixes match corresponding fields of 'struct tcpcb'.
  *
  * Legend:
  * (s) - used by userland utilities in src
  * (p) - used by utilities in ports
  * (3) - is known to be used by third party software not in ports
  * (n) - no known usage
  *
  * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
  * included.  Not all of our clients do.
  */
 #if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
 struct xtcpcb {
 	ksize_t	xt_len;		/* length of this structure */
 	struct xinpcb	xt_inp;
 	char		xt_stack[TCP_FUNCTION_NAME_LEN_MAX];	/* (s) */
 	char		xt_logid[TCP_LOG_ID_LEN];	/* (s) */
 	int64_t		spare64[8];
 	int32_t		t_state;		/* (s,p) */
 	uint32_t	t_flags;		/* (s,p) */
 	int32_t		t_sndzerowin;		/* (s) */
 	int32_t		t_sndrexmitpack;	/* (s) */
 	int32_t		t_rcvoopack;		/* (s) */
 	int32_t		t_rcvtime;		/* (s) */
 	int32_t		tt_rexmt;		/* (s) */
 	int32_t		tt_persist;		/* (s) */
 	int32_t		tt_keep;		/* (s) */
 	int32_t		tt_2msl;		/* (s) */
 	int32_t		tt_delack;		/* (s) */
 	int32_t		t_logstate;		/* (3) */
 	int32_t		spare32[32];
 } __aligned(8);
 
 #ifdef _KERNEL
 void	tcp_inptoxtp(const struct inpcb *, struct xtcpcb *);
 #endif
 #endif
 
 /*
  * TCP function information (name-to-id mapping, aliases, and refcnt)
  * exported to user-land via sysctl(3).
  */
 struct tcp_function_info {
 	uint32_t	tfi_refcnt;
 	uint8_t		tfi_id;
 	char		tfi_name[TCP_FUNCTION_NAME_LEN_MAX];
 	char		tfi_alias[TCP_FUNCTION_NAME_LEN_MAX];
 };
 
 /*
  * Identifiers for TCP sysctl nodes
  */
 #define	TCPCTL_DO_RFC1323	1	/* use RFC-1323 extensions */
 #define	TCPCTL_MSSDFLT		3	/* MSS default */
 #define TCPCTL_STATS		4	/* statistics */
 #define	TCPCTL_RTTDFLT		5	/* default RTT estimate */
 #define	TCPCTL_KEEPIDLE		6	/* keepalive idle timer */
 #define	TCPCTL_KEEPINTVL	7	/* interval to send keepalives */
 #define	TCPCTL_SENDSPACE	8	/* send buffer space */
 #define	TCPCTL_RECVSPACE	9	/* receive buffer space */
 #define	TCPCTL_KEEPINIT		10	/* timeout for establishing syn */
 #define	TCPCTL_PCBLIST		11	/* list of all outstanding PCBs */
 #define	TCPCTL_DELACKTIME	12	/* time before sending delayed ACK */
 #define	TCPCTL_V6MSSDFLT	13	/* MSS default for IPv6 */
 #define	TCPCTL_SACK		14	/* Selective Acknowledgement,rfc 2018 */
 #define	TCPCTL_DROP		15	/* drop tcp connection */
 #define	TCPCTL_STATES		16	/* connection counts by TCP state */
 
 #ifdef _KERNEL
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_tcp);
 SYSCTL_DECL(_net_inet_tcp_sack);
 MALLOC_DECLARE(M_TCPLOG);
 #endif
 
 extern	int tcp_log_in_vain;
 
 /*
  * Global TCP tunables shared between different stacks.
  * Please keep the list sorted.
  */
 VNET_DECLARE(int, drop_synfin);
 VNET_DECLARE(int, path_mtu_discovery);
 VNET_DECLARE(int, tcp_abc_l_var);
 VNET_DECLARE(int, tcp_autorcvbuf_max);
 VNET_DECLARE(int, tcp_autosndbuf_inc);
 VNET_DECLARE(int, tcp_autosndbuf_max);
 VNET_DECLARE(int, tcp_delack_enabled);
 VNET_DECLARE(int, tcp_do_autorcvbuf);
 VNET_DECLARE(int, tcp_do_autosndbuf);
 VNET_DECLARE(int, tcp_do_ecn);
 VNET_DECLARE(int, tcp_do_newcwv);
 VNET_DECLARE(int, tcp_do_rfc1323);
 VNET_DECLARE(int, tcp_do_rfc3042);
 VNET_DECLARE(int, tcp_do_rfc3390);
 VNET_DECLARE(int, tcp_do_rfc3465);
 VNET_DECLARE(int, tcp_do_rfc6675_pipe);
 VNET_DECLARE(int, tcp_do_sack);
 VNET_DECLARE(int, tcp_do_tso);
 VNET_DECLARE(int, tcp_ecn_maxretries);
 VNET_DECLARE(int, tcp_initcwnd_segments);
 VNET_DECLARE(int, tcp_insecure_rst);
 VNET_DECLARE(int, tcp_insecure_syn);
 VNET_DECLARE(uint32_t, tcp_map_entries_limit);
 VNET_DECLARE(uint32_t, tcp_map_split_limit);
 VNET_DECLARE(int, tcp_minmss);
 VNET_DECLARE(int, tcp_mssdflt);
 #ifdef STATS
 VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
 VNET_DECLARE(int, tcp_perconn_stats_enable);
 #endif /* STATS */
 VNET_DECLARE(int, tcp_recvspace);
 VNET_DECLARE(int, tcp_sack_globalholes);
 VNET_DECLARE(int, tcp_sack_globalmaxholes);
 VNET_DECLARE(int, tcp_sack_maxholes);
 VNET_DECLARE(int, tcp_sc_rst_sock_fail);
 VNET_DECLARE(int, tcp_sendspace);
 VNET_DECLARE(struct inpcbhead, tcb);
 VNET_DECLARE(struct inpcbinfo, tcbinfo);
 
 #define	V_tcp_do_newcwv			VNET(tcp_do_newcwv)
 #define	V_drop_synfin			VNET(drop_synfin)
 #define	V_path_mtu_discovery		VNET(path_mtu_discovery)
 #define	V_tcb				VNET(tcb)
 #define	V_tcbinfo			VNET(tcbinfo)
 #define	V_tcp_abc_l_var			VNET(tcp_abc_l_var)
 #define	V_tcp_autorcvbuf_max		VNET(tcp_autorcvbuf_max)
 #define	V_tcp_autosndbuf_inc		VNET(tcp_autosndbuf_inc)
 #define	V_tcp_autosndbuf_max		VNET(tcp_autosndbuf_max)
 #define	V_tcp_delack_enabled		VNET(tcp_delack_enabled)
 #define	V_tcp_do_autorcvbuf		VNET(tcp_do_autorcvbuf)
 #define	V_tcp_do_autosndbuf		VNET(tcp_do_autosndbuf)
 #define	V_tcp_do_ecn			VNET(tcp_do_ecn)
 #define	V_tcp_do_rfc1323		VNET(tcp_do_rfc1323)
 #define V_tcp_ts_offset_per_conn	VNET(tcp_ts_offset_per_conn)
 #define	V_tcp_do_rfc3042		VNET(tcp_do_rfc3042)
 #define	V_tcp_do_rfc3390		VNET(tcp_do_rfc3390)
 #define	V_tcp_do_rfc3465		VNET(tcp_do_rfc3465)
 #define	V_tcp_do_rfc6675_pipe		VNET(tcp_do_rfc6675_pipe)
 #define	V_tcp_do_sack			VNET(tcp_do_sack)
 #define	V_tcp_do_tso			VNET(tcp_do_tso)
 #define	V_tcp_ecn_maxretries		VNET(tcp_ecn_maxretries)
 #define	V_tcp_initcwnd_segments		VNET(tcp_initcwnd_segments)
 #define	V_tcp_insecure_rst		VNET(tcp_insecure_rst)
 #define	V_tcp_insecure_syn		VNET(tcp_insecure_syn)
 #define	V_tcp_map_entries_limit		VNET(tcp_map_entries_limit)
 #define	V_tcp_map_split_limit		VNET(tcp_map_split_limit)
 #define	V_tcp_minmss			VNET(tcp_minmss)
 #define	V_tcp_mssdflt			VNET(tcp_mssdflt)
 #ifdef STATS
 #define	V_tcp_perconn_stats_dflt_tpl	VNET(tcp_perconn_stats_dflt_tpl)
 #define	V_tcp_perconn_stats_enable	VNET(tcp_perconn_stats_enable)
 #endif /* STATS */
 #define	V_tcp_recvspace			VNET(tcp_recvspace)
 #define	V_tcp_sack_globalholes		VNET(tcp_sack_globalholes)
 #define	V_tcp_sack_globalmaxholes	VNET(tcp_sack_globalmaxholes)
 #define	V_tcp_sack_maxholes		VNET(tcp_sack_maxholes)
 #define	V_tcp_sc_rst_sock_fail		VNET(tcp_sc_rst_sock_fail)
 #define	V_tcp_sendspace			VNET(tcp_sendspace)
 #define	V_tcp_udp_tunneling_overhead	VNET(tcp_udp_tunneling_overhead)
 #define	V_tcp_udp_tunneling_port	VNET(tcp_udp_tunneling_port)
 
 #ifdef TCP_HHOOK
 VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
 #define	V_tcp_hhh		VNET(tcp_hhh)
 #endif
 
 int	 tcp_addoptions(struct tcpopt *, u_char *);
 int	 tcp_ccalgounload(struct cc_algo *unload_algo);
 struct tcpcb *
 	 tcp_close(struct tcpcb *);
 void	 tcp_discardcb(struct tcpcb *);
 void	 tcp_twstart(struct tcpcb *);
 void	 tcp_twclose(struct tcptw *, int);
 void	 tcp_ctlinput(int, struct sockaddr *, void *);
 int	 tcp_ctloutput(struct socket *, struct sockopt *);
 struct tcpcb *
 	 tcp_drop(struct tcpcb *, int);
 void	 tcp_drain(void);
 void	 tcp_init(void);
 void	 tcp_fini(void *);
 char	*tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
 	    const void *);
 char	*tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
 	    const void *);
 int	 tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *);
 void	 tcp_reass_global_init(void);
 void	 tcp_reass_flush(struct tcpcb *);
 void	 tcp_dooptions(struct tcpopt *, u_char *, int, int);
 void	tcp_dropwithreset(struct mbuf *, struct tcphdr *,
 		     struct tcpcb *, int, int);
 void	tcp_pulloutofband(struct socket *,
 		     struct tcphdr *, struct mbuf *, int);
 void	tcp_xmit_timer(struct tcpcb *, int);
 void	tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
 void	cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
 			    uint16_t nsegs, uint16_t type);
 void 	cc_conn_init(struct tcpcb *tp);
 void 	cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
 void	cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
 #ifdef TCP_HHOOK
 void	hhook_run_tcp_est_in(struct tcpcb *tp,
 			    struct tcphdr *th, struct tcpopt *to);
 #endif
 
 int	 tcp_input(struct mbuf **, int *, int);
 int	 tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
 	    struct tcpcb *, int);
 void	 tcp_do_segment(struct mbuf *, struct tcphdr *,
 			struct socket *, struct tcpcb *, int, int, uint8_t);
 
 int register_tcp_functions(struct tcp_function_block *blk, int wait);
 int register_tcp_functions_as_names(struct tcp_function_block *blk,
     int wait, const char *names[], int *num_names);
 int register_tcp_functions_as_name(struct tcp_function_block *blk,
     const char *name, int wait);
 int deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
     bool force);
 struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs);
 void tcp_switch_back_to_default(struct tcpcb *tp);
 struct tcp_function_block *
 find_and_ref_tcp_fb(struct tcp_function_block *fs);
 int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp);
 
 extern counter_u64_t tcp_inp_lro_direct_queue;
 extern counter_u64_t tcp_inp_lro_wokeup_queue;
 extern counter_u64_t tcp_inp_lro_compressed;
 extern counter_u64_t tcp_inp_lro_single_push;
 extern counter_u64_t tcp_inp_lro_locks_taken;
 extern counter_u64_t tcp_inp_lro_sack_wake;
 
 #ifdef NETFLIX_EXP_DETECTION
 /* Various SACK attack thresholds */
 extern int32_t tcp_force_detection;
 extern int32_t tcp_sack_to_ack_thresh;
 extern int32_t tcp_sack_to_move_thresh;
 extern int32_t tcp_restoral_thresh;
 extern int32_t tcp_sad_decay_val;
 extern int32_t tcp_sad_pacing_interval;
 extern int32_t tcp_sad_low_pps;
 extern int32_t tcp_map_minimum;
 extern int32_t tcp_attack_on_turns_on_logging;
 #endif
 
 uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
 uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
 u_int	 tcp_maxseg(const struct tcpcb *);
 void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
 	    struct tcp_ifcap *);
 void	 tcp_mss(struct tcpcb *, int);
 int	 tcp_mssopt(struct in_conninfo *);
 struct inpcb *
 	 tcp_drop_syn_sent(struct inpcb *, int);
 struct tcpcb *
 	 tcp_newtcpcb(struct inpcb *);
 int	 tcp_output(struct tcpcb *);
 void	 tcp_state_change(struct tcpcb *, int);
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
 void	 tcp_tw_init(void);
 #ifdef VIMAGE
 void	 tcp_tw_destroy(void);
 #endif
 void	 tcp_tw_zone_change(void);
 int	 tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
 	    struct mbuf *, int);
 void	 tcp_setpersist(struct tcpcb *);
 void	 tcp_slowtimo(void);
 struct tcptemp *
 	 tcpip_maketemplate(struct inpcb *);
 void	 tcpip_fillheaders(struct inpcb *, void *, void *);
 void	 tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
 int	 tcp_timer_suspend(struct tcpcb *, uint32_t);
 void	 tcp_timers_unsuspend(struct tcpcb *, uint32_t);
 int	 tcp_timer_active(struct tcpcb *, uint32_t);
 void	 tcp_timer_stop(struct tcpcb *, uint32_t);
 void	 tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
 int	 inp_to_cpuid(struct inpcb *inp);
 /*
  * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
  */
 void	 tcp_hc_init(void);
 #ifdef VIMAGE
 void	 tcp_hc_destroy(void);
 #endif
 void	 tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *);
 uint32_t tcp_hc_getmtu(struct in_conninfo *);
 void	 tcp_hc_updatemtu(struct in_conninfo *, uint32_t);
 void	 tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
 
 extern	struct pr_usrreqs tcp_usrreqs;
 
 uint32_t tcp_new_ts_offset(struct in_conninfo *);
 tcp_seq	 tcp_new_isn(struct in_conninfo *);
 
 int	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
 void	 tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
 void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
 void	 tcp_clean_dsack_blocks(struct tcpcb *tp);
 void	 tcp_clean_sackreport(struct tcpcb *tp);
 void	 tcp_sack_adjust(struct tcpcb *tp);
 struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
 void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
 void	 tcp_free_sackholes(struct tcpcb *tp);
 int	 tcp_newreno(struct tcpcb *, struct tcphdr *);
 int	 tcp_compute_pipe(struct tcpcb *);
 uint32_t tcp_compute_initwnd(uint32_t);
 void	 tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
 int	 tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
     size_t seed_len);
 struct mbuf *
 	 tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
 	   int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
 
 int	tcp_stats_init(void);
 
 static inline void
 tcp_fields_to_host(struct tcphdr *th)
 {
 
 	th->th_seq = ntohl(th->th_seq);
 	th->th_ack = ntohl(th->th_ack);
 	th->th_win = ntohs(th->th_win);
 	th->th_urp = ntohs(th->th_urp);
 }
 
 static inline void
 tcp_fields_to_net(struct tcphdr *th)
 {
 
 	th->th_seq = htonl(th->th_seq);
 	th->th_ack = htonl(th->th_ack);
 	th->th_win = htons(th->th_win);
 	th->th_urp = htons(th->th_urp);
 }
 #endif /* _KERNEL */
 
 #endif /* _NETINET_TCP_VAR_H_ */