diff --git a/share/man/man4/ddb.4 b/share/man/man4/ddb.4
index 9a9af553b29d..3c4894c03d62 100644
--- a/share/man/man4/ddb.4
+++ b/share/man/man4/ddb.4
@@ -1,1701 +1,1707 @@
 .\"
 .\" Mach Operating System
 .\" Copyright (c) 1991,1990 Carnegie Mellon University
 .\" Copyright (c) 2007 Robert N. M. Watson
 .\" All Rights Reserved.
 .\"
 .\" Permission to use, copy, modify and distribute this software and its
 .\" documentation is hereby granted, provided that both the copyright
 .\" notice and this permission notice appear in all copies of the
 .\" software, derivative works or modified versions, and any portions
 .\" thereof, and that both notices appear in supporting documentation.
 .\"
 .\" CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 .\" CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 .\" ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 .\"
 .\" Carnegie Mellon requests users of this software to return to
 .\"
 .\"  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 .\"  School of Computer Science
 .\"  Carnegie Mellon University
 .\"  Pittsburgh PA 15213-3890
 .\"
 .\" any improvements or extensions that they make and grant Carnegie Mellon
 .\" the rights to redistribute these changes.
 .\"
-.Dd May 28, 2025
+.Dd June 10, 2025
 .Dt DDB 4
 .Os
 .Sh NAME
 .Nm ddb
 .Nd interactive kernel debugger
 .Sh SYNOPSIS
 In order to enable kernel debugging facilities include:
 .Bd -ragged -offset indent
 .Cd options KDB
 .Cd options DDB
 .Ed
 .Pp
 To prevent activation of the debugger on kernel
 .Xr panic 9 :
 .Bd -ragged -offset indent
 .Cd options KDB_UNATTENDED
 .Ed
 .Pp
 In order to print a stack trace of the current thread on the console
 for a panic:
 .Bd -ragged -offset indent
 .Cd options KDB_TRACE
 .Ed
 .Pp
 To print the numerical value of symbols in addition to the symbolic
 representation, define:
 .Bd -ragged -offset indent
 .Cd options DDB_NUMSYM
 .Ed
 .Pp
 To enable the
 .Xr gdb 4
 backend, so that remote debugging with
 .Xr kgdb 1 Pq Pa ports/devel/gdb
 is possible, include:
 .Bd -ragged -offset indent
 .Cd options GDB
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 kernel debugger is an interactive debugger with a syntax inspired by
 .Xr gdb 1 Pq Pa ports/devel/gdb .
 If linked into the running kernel,
 it can be invoked locally with the
 .Ql debug
 .Xr keymap 5
 action, usually mapped to Ctrl+Alt+Esc, or by setting the
 .Va debug.kdb.enter
 sysctl to 1.
 The debugger is also invoked on kernel
 .Xr panic 9
 if the
 .Va debug.debugger_on_panic
 .Xr sysctl 8
 MIB variable is set non-zero,
 which is the default
 unless the
 .Dv KDB_UNATTENDED
 option is specified.
 Similarly, if the
 .Va debug.debugger_on_recursive_panic
 variable is set to
 .Dv 1 ,
 then the debugger will be invoked on a recursive kernel panic.
 This variable has a default value of
 .Dv 0 ,
 and has no effect if
 .Va debug.debugger_on_panic
 is already set non-zero.
 .Pp
 The current location is called
 .Va dot .
 The
 .Va dot
 is displayed with
 a hexadecimal format at a prompt.
 The commands
 .Ic examine
 and
 .Ic write
 update
 .Va dot
 to the address of the last line
 examined or the last location modified, and set
 .Va next
 to the address of
 the next location to be examined or changed.
 Other commands do not change
 .Va dot ,
 and set
 .Va next
 to be the same as
 .Va dot .
 .Pp
 The general command syntax is:
 .Ar command Ns Op Li / Ns Ar modifier
 .Oo Ar addr Oc Ns Op , Ns Ar count
 .Pp
 A blank line repeats the previous command from the address
 .Va next
 with
 count 1 and no modifiers.
 Specifying
 .Ar addr
 sets
 .Va dot
 to the address.
 Omitting
 .Ar addr
 uses
 .Va dot .
 A missing
 .Ar count
 is taken
 to be 1 for printing commands or infinity for stack traces.
 A
 .Ar count
 of -1 is equivalent to a missing
 .Ar count .
 Options that are supplied but not supported by the given
 .Ar command
 are usually ignored.
 .Pp
 The
 .Nm
 debugger has a pager feature (like the
 .Xr more 1
 command)
 for the output.
 If an output line exceeds the number set in the
 .Va lines
 variable, it displays
 .Dq Li --More--
 and waits for a response.
 The valid responses for it are:
 .Pp
 .Bl -tag -compact -width ".Li SPC"
 .It Li SPC
 one more page
 .It Li RET
 one more line
 .It Li q
 abort the current command, and return to the command input mode
 .El
 .Pp
 Finally,
 .Nm
 provides a small (currently 10 items) command history, and offers
 simple
 .Nm emacs Ns -style
 command line editing capabilities.
 In addition to
 the
 .Nm emacs
 control keys, the usual ANSI arrow keys may be used to browse through the
 history buffer, and move the cursor within the current line.
 .Sh COMMANDS
 .Ss COMMON DEBUGGER COMMANDS
 .Bl -tag -width indent -compact
 .It Ic help
 Print a short summary of the available commands and command
 abbreviations.
 .Pp
 .It Xo
 .Ic examine Ns Op Li / Ns Cm AISabcdghilmorsuxz ...
 .Oo Ar addr Oc Ns Op , Ns Ar count
 .Xc
 .It Xo
 .Ic x       Ns Op Li / Ns Cm AISabcdghilmorsuxz ...
 .Oo Ar addr Oc Ns Op , Ns Ar count
 .Xc
 Display the addressed locations according to the formats in the modifier.
 Multiple modifier formats display multiple locations.
 If no format is specified, the last format specified for this command
 is used.
 .Pp
 The format characters are:
 .Bl -tag -compact -width indent
 .It Cm b
 look at by bytes (8 bits)
 .It Cm h
 look at by half words (16 bits)
 .It Cm l
 look at by long words (32 bits)
 .It Cm g
 look at by quad words (64 bits)
 .It Cm a
 print the location being displayed
 .It Cm A
 print the location with a line number if possible
 .It Cm x
 display in unsigned hex
 .It Cm z
 display in signed hex
 .It Cm o
 display in unsigned octal
 .It Cm d
 display in signed decimal
 .It Cm u
 display in unsigned decimal
 .It Cm r
 display in current radix, signed
 .It Cm c
 display low 8 bits as a character.
 Non-printing characters are displayed as an octal escape code (e.g.,
 .Ql \e000 ) .
 .It Cm s
 display the null-terminated string at the location.
 Non-printing characters are displayed as octal escapes.
 .It Cm m
 display in unsigned hex with character dump at the end of each line.
 The location is also displayed in hex at the beginning of each line.
 .It Cm i
 display as a disassembled instruction
 .It Cm I
 display as a disassembled instruction with possible alternate formats
 depending on the machine.
 On i386, this selects the alternate format for the instruction decoding
 (16 bits in a 32-bit code segment and vice versa).
 .It Cm S
 display a symbol name for the pointer stored at the address
 .El
 .Pp
 .It Ic xf
 Examine forward:
 execute an
 .Ic examine
 command with the last specified parameters to it
 except that the next address displayed by it is used as the start address.
 .Pp
 .It Ic xb
 Examine backward:
 execute an
 .Ic examine
 command with the last specified parameters to it
 except that the last start address subtracted by the size displayed by it
 is used as the start address.
 .Pp
 .It Ic print Ns Op Li / Ns Cm acdoruxz
 .It Ic p Ns Op Li / Ns Cm acdoruxz
 Print
 .Ar addr Ns s
 according to the modifier character (as described above for
 .Cm examine ) .
 Valid formats are:
 .Cm a , x , z , o , d , u , r ,
 and
 .Cm c .
 If no modifier is specified, the last one specified to it is used.
 The argument
 .Ar addr
 can be a string, in which case it is printed as it is.
 For example:
 .Bd -literal -offset indent
 print/x "eax = " $eax "\enecx = " $ecx "\en"
 .Ed
 .Pp
 will print like:
 .Bd -literal -offset indent
 eax = xxxxxx
 ecx = yyyyyy
 .Ed
 .Pp
 .It Ic pprint Ns Oo Li / Ns Cm d depth Oc Oo Ar name Oc
 Pretty-print symbol specified by
 .Ar name
 using CTF debugging data. Works for all symbols exported by the kernel and loaded kernel modules.
 .Pp
 If the
 .Cm d
 modifier has been specified, contents of structs nested up to
 .Ar depth
 levels deep will also be included in the output.
 .Pp
 .It Ic pprint struct Ns Oo Li / Ns Cm d depth Ic Oc Oo Ar name Oc Ns Op Ns Ar addr
 Print memory at
 .Ar addr
 as struct
 .Ar name Ns .
 Works for all structs defined by the kernel and loaded kernel modules.
 .Pp
 If the
 .Cm d
 modifier has been specified, contents of structs nested up to
 .Ar depth
 levels deep will also be included in the output.
 .Pp
 .It Xo
 .Ic write Ns Op Li / Ns Cm bhl
 .Ar addr expr1 Op Ar expr2 ...
 .Xc
 .It Xo
 .Ic w Ns Op Li / Ns Cm bhl
 .Ar addr expr1 Op Ar expr2 ...
 .Xc
 Write the expressions specified after
 .Ar addr
 on the command line at succeeding locations starting with
 .Ar addr .
 The write unit size can be specified in the modifier with a letter
 .Cm b
 (byte),
 .Cm h
 (half word) or
 .Cm l
 (long word) respectively.
 If omitted,
 long word is assumed.
 .Pp
 .Sy Warning :
 since there is no delimiter between expressions, strange
 things may happen.
 It is best to enclose each expression in parentheses.
 .Pp
 .It Ic set Li $ Ns Ar variable Oo Li = Oc Ar expr
 Set the named variable or register with the value of
 .Ar expr .
 Valid variable names are described below.
 .Pp
 .It Ic break Ns Oo Li / Ns Cm u Oc Oo Ar addr Oc Ns Op , Ns Ar count
 .It Ic b     Ns Oo Li / Ns Cm u Oc Oo Ar addr Oc Ns Op , Ns Ar count
 Set a break point at
 .Ar addr .
 If
 .Ar count
 is supplied, the
 .Ic continue
 command will not stop at this break point on the first
 .Ar count
 \- 1 times that it is hit.
 If the break point is set, a break point number is
 printed with
 .Ql # .
 This number can be used in deleting the break point
 or adding conditions to it.
 .Pp
 If the
 .Cm u
 modifier is specified, this command sets a break point in user
 address space.
 Without the
 .Cm u
 option, the address is considered to be in the kernel
 space, and a wrong space address is rejected with an error message.
 This modifier can be used only if it is supported by machine dependent
 routines.
 .Pp
 .Sy Warning :
 If a user text is shadowed by a normal user space debugger,
 user space break points may not work correctly.
 Setting a break
 point at the low-level code paths may also cause strange behavior.
 .Pp
 .It Ic delete Op Ar addr
 .It Ic d      Op Ar addr
 .It Ic delete Li # Ns Ar number
 .It Ic d      Li # Ns Ar number
 Delete the specified break point.
 The break point can be specified by a
 break point number with
 .Ql # ,
 or by using the same
 .Ar addr
 specified in the original
 .Ic break
 command, or by omitting
 .Ar addr
 to get the default address of
 .Va dot .
 .Pp
 .It Ic halt
 Halt the system.
 .Pp
 .It Ic watch Oo Ar addr Oc Ns Op , Ns Ar size
 Set a watchpoint for a region.
 Execution stops when an attempt to modify the region occurs.
 The
 .Ar size
 argument defaults to 4.
 If you specify a wrong space address, the request is rejected
 with an error message.
 .Pp
 .Sy Warning :
 Attempts to watch wired kernel memory
 may cause unrecoverable error in some systems such as i386.
 Watchpoints on user addresses work best.
 .Pp
 .It Ic hwatch Oo Ar addr Oc Ns Op , Ns Ar size
 Set a hardware watchpoint for a region if supported by the
 architecture.
 Execution stops when an attempt to modify the region occurs.
 The
 .Ar size
 argument defaults to 4.
 .Pp
 .Sy Warning :
 The hardware debug facilities do not have a concept of separate
 address spaces like the watch command does.
 Use
 .Ic hwatch
 for setting watchpoints on kernel address locations only, and avoid
 its use on user mode address spaces.
 .Pp
 .It Ic dhwatch Oo Ar addr Oc Ns Op , Ns Ar size
 Delete specified hardware watchpoint.
 .Pp
 .It Ic kill Ar sig pid
 Send signal
 .Ar sig
 to process
 .Ar pid .
 The signal is acted on upon returning from the debugger.
 This command can be used to kill a process causing resource contention
 in the case of a hung system.
 See
 .Xr signal 3
 for a list of signals.
 Note that the arguments are reversed relative to
 .Xr kill 2 .
 .Pp
 .It Ic step Ns Oo Li / Ns Cm p Oc Ns Op , Ns Ar count
 .It Ic s    Ns Oo Li / Ns Cm p Oc Ns Op , Ns Ar count
 Single step
 .Ar count
 times.
 If the
 .Cm p
 modifier is specified, print each instruction at each step.
 Otherwise, only print the last instruction.
 .Pp
 .Sy Warning :
 depending on machine type, it may not be possible to
 single-step through some low-level code paths or user space code.
 On machines with software-emulated single-stepping (e.g., pmax),
 stepping through code executed by interrupt handlers will probably
 do the wrong thing.
 .Pp
 .It Ic continue Ns Op Li / Ns Cm c
 .It Ic c Ns Op Li / Ns Cm c
 Continue execution until a breakpoint or watchpoint.
 If the
 .Cm c
 modifier is specified, count instructions while executing.
 Some machines (e.g., pmax) also count loads and stores.
 .Pp
 .Sy Warning :
 when counting, the debugger is really silently single-stepping.
 This means that single-stepping on low-level code may cause strange
 behavior.
 .Pp
 .It Ic until Ns Op Li / Ns Cm p
 Stop at the next call or return instruction.
 If the
 .Cm p
 modifier is specified, print the call nesting depth and the
 cumulative instruction count at each call or return.
 Otherwise,
 only print when the matching return is hit.
 .Pp
 .It Ic next Ns Op Li / Ns Cm p
 .It Ic match Ns Op Li / Ns Cm p
 Stop at the matching return instruction.
 If the
 .Cm p
 modifier is specified, print the call nesting depth and the
 cumulative instruction count at each call or return.
 Otherwise, only print when the matching return is hit.
 .Pp
 .It Xo
 .Ic trace Ns Op Li / Ns Cm u
 .Op Ar pid | tid Ns
 .Op , Ns Ar count
 .Xc
 .It Xo
 .Ic t Ns Op Li / Ns Cm u
 .Op Ar pid | tid Ns
 .Op , Ns Ar count
 .Xc
 .It Xo
 .Ic where Ns Op Li / Ns Cm u
 .Op Ar pid | tid Ns
 .Op , Ns Ar count
 .Xc
 .It Xo
 .Ic bt Ns Op Li / Ns Cm u
 .Op Ar pid | tid Ns
 .Op , Ns Ar count
 .Xc
 Stack trace.
 The
 .Cm u
 option traces user space; if omitted,
 .Ic trace
 only traces
 kernel space.
 The optional argument
 .Ar count
 is the number of frames to be traced.
 If
 .Ar count
 is omitted, all frames are printed.
 .Pp
 .Sy Warning :
 User space stack trace is valid
 only if the machine dependent code supports it.
 .Pp
 .It Xo
 .Ic search Ns Op Li / Ns Cm bhl
 .Ar addr
 .Ar value
 .Op Ar mask Ns
 .Op , Ns Ar count
 .Xc
 Search memory for
 .Ar value .
 The optional
 .Ar count
 argument limits the search.
 .\"
 .Pp
 .It Xo
 .Ic Ic reboot Ns Op Li / Ns Cm s
 .Op Ar seconds
 .Xc
 .It Xo
 .Ic Ic reset Ns Op Li / Ns Cm s
 .Op Ar seconds
 .Xc
 Hard reset the system.
 If the optional argument
 .Ar seconds
 is given, the debugger will wait for this long, at most a week,
 before rebooting.
 When the
 .Cm s
 modifier is given, the command will skip running any registered shutdown
 handlers and attempt the most basic reset.
 .Pp
 .It Ic thread Ar addr | tid
 Switch the debugger to the thread with ID
 .Ar tid ,
 if the argument is a decimal number, or address
 .Ar addr ,
 otherwise.
 .Pp
 .It Ic watchdog Op Ar exp
 Program the
 .Xr watchdog 4
 timer to fire in
 .Pf 2^ Ar exp
 seconds.
 If no argument is provided, the watchdog timer is disabled.
 .El
 .Ss SPECIALIZED HELPER COMMANDS
 .Bl -tag -width indent -compact
 .It Xo
 .Ic findstack
 .Ar addr
 .Xc
 Prints the address of the thread whose kernel-mode stack contains
 .Ar addr ,
 if any.
 .Pp
 .It Ic show Cm active trace
 .It Ic acttrace
 Show a stack trace for every thread running on a CPU.
 .Pp
 .It Ic show Cm all procs Ns Op Li / Ns Cm a
 .It Ic ps Ns Op Li / Ns Cm a
 Display all process information.
 The process information may not be shown if it is not
 supported in the machine, or the bottom of the stack of the
 target process is not in the main memory at that time.
 The
 .Cm a
 modifier will print command line arguments for each process.
 .\"
 .Pp
-.It Ic show Cm all tcpcbs Ns Op Li / Ns Cm l
+.It Ic show Cm all tcpcbs Ns Op Li / Ns Cm b Ns Cm l
 Show the same output as "show tcpcb" does, but for all
 TCP control blocks within the system.
+The
+.Cm b
+modifier will request BBLog entries to be printed.
 Using the
 .Cm l
 modifier will limit the output to TCP control blocks, which are locked.
 .\"
 .Pp
 .It Ic show Cm all trace
 .It Ic alltrace
 Show a stack trace for every thread in the system.
 .Pp
 .It Ic show Cm all ttys
 Show all TTY's within the system.
 Output is similar to
 .Xr pstat 8 ,
 but also includes the address of the TTY structure.
 .\"
 .Pp
 .It Ic show Cm all vnets
 Show the same output as "show vnet" does, but lists all
 virtualized network stacks within the system.
 .\"
 .Pp
 .It Ic show Cm allchains
 Show the same information like "show lockchain" does, but
 for every thread in the system.
 .\"
 .Pp
 .It Ic show Cm alllocks
 Show all locks that are currently held.
 This command is only available if
 .Xr witness 4
 is included in the kernel.
 .\"
 .Pp
 .It Ic show Cm allpcpu
 The same as "show pcpu", but for every CPU present in the system.
 .\"
 .Pp
 .It Ic show Cm allrman
 Show information related with resource management, including
 interrupt request lines, DMA request lines, I/O ports, I/O memory
 addresses, and Resource IDs.
 .\"
 .Pp
 .It Ic show Cm apic
 Dump data about APIC IDT vector mappings.
 .\"
 .Pp
 .It Ic show Cm badstacks
 Walk the
 .Xr witness 4
 graph and print any lock-order violations.
 This command is only available if
 .Xr witness 4
 is included in the kernel.
 .\"
 .Pp
 .It Ic show Cm breaks
 Show breakpoints set with the "break" command.
 .\"
 .Pp
 .It Ic show Cm bio Ar addr
 Show information about the bio structure
 .Vt struct bio
 present at
 .Ar addr .
 See the
 .Pa sys/bio.h
 header file and
 .Xr g_bio 9
 for more details on the exact meaning of the structure fields.
 .\"
 .Pp
 .It Ic show Cm buffer Ar addr
 Show information about the buf structure
 .Vt struct buf
 present at
 .Ar addr .
 See the
 .Pa sys/buf.h
 header file for more details on the exact meaning of the structure fields.
 .\"
 .Pp
 .It Ic show Cm callout Ar addr
 Show information about the callout structure
 .Vt struct callout
 present at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm cdev Op Ar addr
 Show the internal devfs state of the cdev structure located at
 .Ar addr .
 If no argument is provided, show the list of all created cdevs, consisting of
 the devfs node name and the
 .Vt struct cdev
 address.
 .\"
 .Pp
 .It Ic show Cm conifhk
 Lists hooks currently waiting for completion in
 .Fn run_interrupt_driven_config_hooks .
 .\"
 .Pp
 .It Ic show Cm cpusets
 Print numbered root and assigned CPU affinity sets.
 See
 .Xr cpuset 2
 for more details.
 .\"
 .Pp
 .It Ic show Cm cyrixreg
 Show registers specific to the Cyrix processor.
 .\"
 .Pp
 .It Ic show Cm devmap
 Prints the contents of the static device mapping table.
 Currently only available on the
 ARM
 architecture.
 .\"
 .Pp
 .It Ic show Cm domain Ar addr
 Print protocol domain structure
 .Vt struct domain
 at address
 .Ar addr .
 See the
 .Pa sys/domain.h
 header file for more details on the exact meaning of the structure fields.
 .\"
 .Pp
 .It Ic show Cm ffs Op Ar addr
 Show brief information about ffs mount at the address
 .Ar addr ,
 if argument is given.
 Otherwise, provides the summary about each ffs mount.
 .\"
 .Pp
 .It Ic show Cm file Ar addr
 Show information about the file structure
 .Vt struct file
 present at address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm files
 Show information about every file structure in the system.
 .\"
 .Pp
 .It Ic show Cm freepages
 Show the number of physical pages in each of the free lists.
 .\"
 .Pp
 .It Ic show Cm geom Op Ar addr
 If the
 .Ar addr
 argument is not given, displays the entire GEOM topology.
 If
 .Ar addr
 is given, displays details about the given GEOM object (class, geom,
 provider or consumer).
 .\"
 .Pp
 .It Ic show Cm idt
 Show IDT layout.
 The first column specifies the IDT vector.
 The second one is the name of the interrupt/trap handler.
 Those functions are machine dependent.
 .\"
 .Pp
 .It Ic show Cm igi_list Ar addr
 Show information about the IGMP structure
 .Vt struct igmp_ifsoftc
 present at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm iosched Ar addr
 Show information about the I/O scheduler
 .Vt struct cam_iosched_softc
 located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm inodedeps Op Ar addr
 Show brief information about each inodedep structure.
 If
 .Ar addr
 is given, only inodedeps belonging to the fs located at the
 supplied address are shown.
 .\"
 .Pp
 .It Ic show Cm inpcb Ar addr
 Show information on IP Control Block
 .Vt struct in_pcb
 present at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm intr
 Dump information about interrupt handlers.
 .\"
 .Pp
 .It Ic show Cm intrcnt
 Dump the interrupt statistics.
 .\"
 .Pp
 .It Ic show Cm irqs
 Show interrupt lines and their respective kernel threads.
 .\"
 .Pp
 .It Ic show Cm ktr Ns Op Li / Ns Cm a Ns Cm v Ns Cm V
 Print the contents of the
 .Xr ktr 4
 trace buffer.
 The
 .Cm v
 modifier will request fully verbose output, causing the file, line number, and
 timestamp to be printed for each trace entry.
 The
 .Cm V
 modifier will request only the timestamps to be printed.
 The
 .Cm a
 modifier will request that the output be unpaginated.
 .\"
 .Pp
 .It Ic show Cm lapic
 Show information from the local APIC registers for this CPU.
 .\"
 .Pp
 .It Ic show Cm lock Ar addr
 Show lock structure.
 The output format is as follows:
 .Bl -tag -width "flags"
 .It Ic class :
 Class of the lock.
 Possible types include
 .Xr mutex 9 ,
 .Xr rmlock 9 ,
 .Xr rwlock 9 ,
 .Xr sx 9 .
 .It Ic name :
 Name of the lock.
 .It Ic flags :
 Flags passed to the lock initialization function.
 .Em flags
 values are lock class specific.
 .It Ic state :
 Current state of a lock.
 .Em state
 values are lock class specific.
 .It Ic owner :
 Lock owner.
 .El
 .\"
 .Pp
 .It Ic show Cm lockchain Ar addr
 Show all threads a particular thread at address
 .Ar addr
 is waiting on based on non-spin locks.
 .\"
 .Pp
 .It Ic show Cm lockedbufs
 Show the same information as "show buf", but for every locked
 .Vt struct buf
 object.
 .\"
 .Pp
 .It Ic show Cm lockedvnods
 List all locked vnodes in the system.
 .\"
 .Pp
 .It Ic show Cm locks
 Prints all locks that are currently acquired.
 This command is only available if
 .Xr witness 4
 is included in the kernel.
 .\"
 .Pp
 .It Ic show Cm locktree
 .\"
 .Pp
 .It Ic show Cm malloc Ns Op Li / Ns Cm i
 Prints
 .Xr malloc 9
 memory allocator statistics.
 If the
 .Cm i
 modifier is specified, format output as machine-parseable comma-separated
 values ("CSV").
 The output columns are as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "Requests"
 .It Ic Type
 Specifies a type of memory.
 It is the same as a description string used while defining the
 given memory type with
 .Xr MALLOC_DECLARE 9 .
 .It Ic InUse
 Number of memory allocations of the given type, for which
 .Xr free 9
 has not been called yet.
 .It Ic MemUse
 Total memory consumed by the given allocation type.
 .It Ic Requests
 Number of memory allocation requests for the given
 memory type.
 .El
 .Pp
 The same information can be gathered in userspace with
 .Dq Nm vmstat Fl m .
 .\"
 .Pp
 .It Ic show Cm map Ns Oo Li / Ns Cm f Oc Ar addr
 Prints the VM map at
 .Ar addr .
 If the
 .Cm f
 modifier is specified the
 complete map is printed.
 .\"
 .Pp
 .It Ic show Cm msgbuf
 Print the system's message buffer.
 It is the same output as in the
 .Dq Nm dmesg
 case.
 It is useful if you got a kernel panic, attached a serial cable
 to the machine and want to get the boot messages from before the
 system hang.
 .\"
 .Pp
 .It Ic show Cm mount Op Ar addr
 Displays details about the mount point located at
 .Ar addr .
 If no
 .Ar addr
 is specified,
 displays short info about all currently mounted file systems.
 .\"
 .Pp
 .It Ic show Cm object Ns Oo Li / Ns Cm f Oc Ar addr
 Prints the VM object at
 .Ar addr .
 If the
 .Cm f
 option is specified the
 complete object is printed.
 .\"
 .Pp
 .It Ic show Cm panic
 Print the panic message if set.
 .\"
 .Pp
 .It Ic show Cm page
 Show statistics on VM pages.
 .\"
 .Pp
 .It Ic show Cm pageq
 Show statistics on VM page queues.
 .\"
 .Pp
 .It Ic show Cm pciregs
 Print PCI bus registers.
 The same information can be gathered in userspace by running
 .Dq Nm pciconf Fl lv .
 .\"
 .Pp
 .It Ic show Cm pcpu
 Print current processor state.
 The output format is as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "spin locks held:"
 .It Ic cpuid
 Processor identifier.
 .It Ic curthread
 Thread pointer, process identifier and the name of the process.
 .It Ic curpcb
 Control block pointer.
 .It Ic fpcurthread
 FPU thread pointer.
 .It Ic idlethread
 Idle thread pointer.
 .It Ic APIC ID
 CPU identifier coming from APIC.
 .It Ic currentldt
 LDT pointer.
 .It Ic spin locks held
 Names of spin locks held.
 .El
 .\"
 .Pp
 .It Ic show Cm pgrpdump
 Dump process groups present within the system.
 .\"
 .Pp
 .It Ic show Cm prison Op Ar addr
 Show the prison structure located at
 .Ar addr .
 If no
 .Ar addr
 argument is specified, show information about all prisons in the system.
 .\"
 .Pp
 .It Ic show Cm proc Op Ar addr
 Show information about the process structure located at address
 .Ar addr ,
 or the current process if no argument is specified.
 .\"
 .Pp
 .It Ic show Cm procvm Op Ar addr
 Show process virtual memory layout for the process located at
 .Ar addr ,
 or the current process if no argument is specified.
 .\"
 .Pp
 .It Ic show Cm protosw Ar addr
 Print protocol switch structure
 .Vt struct protosw
 at address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm registers Ns Op Li / Ns Cm u
 Display the register set.
 If the
 .Cm u
 modifier is specified, the register contents of the thread's previous
 trapframe are displayed instead.
 Usually, this corresponds to the saved state from userspace.
 .\"
 .Pp
 .It Ic show Cm rman Ar addr
 Show resource manager object
 .Vt struct rman
 at address
 .Ar addr .
 Addresses of particular pointers can be gathered with "show allrman"
 command.
 .\"
 .Pp
 .It Ic show Cm route Ar addr
 Show route table result for destination
 .Ar addr .
 At this time, INET and INET6 formatted addresses are supported.
 .\"
 .Pp
 .It Ic show Cm routetable Oo Ar af Oc
 Show full route table or tables.
 If
 .Ar af
 is specified, show only routes for the given numeric address family.
 If no argument is specified, dump the route table for all address families.
 .\"
 .Pp
 .It Ic show Cm rtc
 Show real time clock value.
 Useful for long debugging sessions.
 .\"
 .Pp
 .It Ic show Cm sleepchain
 Deprecated.
 Now an alias for
 .Ic show Cm lockchain .
 .\"
 .Pp
 .It Ic show Cm sleepq Ar addr
 .It Ic show Cm sleepqueue Ar addr
 Show the
 .Xr sleepqueue 9
 structure located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm sockbuf Ar addr
 Show the socket buffer
 .Va struct sockbuf
 located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm socket Ar addr
 Show the socket object
 .Vt struct socket
 located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm sysregs
 Show system registers (e.g.,
 .Li cr0-4
 on i386.)
 Not present on some platforms.
 .\"
 .Pp
-.It Ic show Cm tcpcb Ar addr
+.It Ic show Cm tcpcb Ns Oo Li / Ns Cm b Oc Ar addr
 Print TCP control block
 .Vt struct tcpcb
 lying at address
 .Ar addr .
 For exact interpretation of output, visit
 .Pa netinet/tcp.h
 header file.
+The
+.Cm b
+modifier will request BBLog entries to be printed.
 .\"
 .Pp
 .It Ic show Cm thread Op Ar addr | tid
 If no
 .Ar addr
 or
 .Ar tid
 is specified, show detailed information about current thread.
 Otherwise, print information about the thread with ID
 .Ar tid
 or kernel address
 .Ar addr .
 (If the argument is a decimal number, it is assumed to be a tid.)
 .\"
 .Pp
 .It Ic show Cm threads
 Show all threads within the system.
 Output format is as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "Second column"
 .It Ic First column
 Thread identifier (TID)
 .It Ic Second column
 Thread structure address
 .It Ic Third column
 Backtrace.
 .El
 .\"
 .Pp
 .It Ic show Cm tty Ar addr
 Display the contents of a TTY structure in a readable form.
 .\"
 .Pp
 .It Ic show Cm turnstile Ar addr
 Show turnstile
 .Vt struct turnstile
 structure at address
 .Ar addr .
 Turnstiles are structures used within the
 .Fx
 kernel to implement
 synchronization primitives which, while holding a specific type of lock, cannot
 sleep or context switch to another thread.
 Currently, those are:
 .Xr mutex 9 ,
 .Xr rwlock 9 ,
 .Xr rmlock 9 .
 .\"
 .Pp
 .It Ic show Cm uma Ns Op Li / Ns Cm i
 Show UMA allocator statistics.
 If the
 .Cm i
 modifier is specified, format output as machine-parseable comma-separated
 values ("CSV").
 The output contains the following columns:
 .Pp
 .Bl -tag -compact -offset indent -width "Total Mem"
 .It Cm "Zone"
 Name of the UMA zone.
 The same string that was passed to
 .Xr uma_zcreate 9
 as a first argument.
 .It Cm "Size"
 Size of a given memory object (slab).
 .It Cm "Used"
 Number of slabs being currently used.
 .It Cm "Free"
 Number of free slabs within the UMA zone.
 .It Cm "Requests"
 Number of allocations requests to the given zone.
 .It Cm "Total Mem"
 Total memory in use (either allocated or free) by a zone, in bytes.
 .It Cm "XFree"
 Number of free slabs within the UMA zone that were freed on a different NUMA
 domain than allocated.
 (The count in the
 .Cm "Free"
 column is inclusive of
 .Cm "XFree" . )
 .El
 .Pp
 The same information might be gathered in the userspace
 with the help of
 .Dq Nm vmstat Fl z .
 .\"
 .Pp
 .It Ic show Cm unpcb Ar addr
 Shows UNIX domain socket private control block
 .Vt struct unpcb
 present at the address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm vmochk
 Prints, whether the internal VM objects are in a map somewhere
 and none have zero ref counts.
 .\"
 .Pp
 .It Ic show Cm vmopag
 Walk the list of VM objects in the system, printing the indices and physical
 addresses of the VM pages belonging to each object.
 .\"
 .Pp
 .It Ic show Cm vnet Ar addr
 Prints virtualized network stack
 .Vt struct vnet
 structure present at the address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm vnode Ar addr
 Prints vnode
 .Vt struct vnode
 structure lying at
 .Ar addr .
 For the exact interpretation of the output, look at the
 .Pa sys/vnode.h
 header file.
 .\"
 .Pp
 .It Ic show Cm vnodebufs Ar addr
 Shows clean/dirty buffer lists of the vnode located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm vpath Ar addr
 Walk the namecache to lookup the pathname of the vnode located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm watches
 Displays all watchpoints.
 Shows watchpoints set with "watch" command.
 .\"
 .Pp
 .It Ic show Cm witness
 Shows information about lock acquisition coming from the
 .Xr witness 4
 subsystem.
 .El
 .Ss OFFLINE DEBUGGING COMMANDS
 .Bl -tag -width indent -compact
 .It Ic dump
 Initiate a kernel core dump to the device(s) configured by
 .Xr dumpon 8 .
 .Pp
 .It Ic gdb
 Switches to remote GDB mode.
 In remote GDB mode, another machine is required that runs
 .Xr gdb 1 Pq Pa ports/devel/gdb
 using the remote debug feature, with a connection to the serial
 console port on the target machine.
 .Pp
 .It Ic netdump Fl s Ar server Oo Fl g Ar gateway Fl c Ar client Fl i Ar iface Oc
 Configure
 .Xr netdump 4
 with the provided parameters, and immediately perform a netdump.
 .Pp
 There are some known limitations.
 Principally,
 .Xr netdump 4
 only supports IPv4 at this time.
 The address arguments to the
 .Ic netdump
 command must be dotted decimal IPv4 addresses.
 (Hostnames are not supported.)
 At present, the command only works if the machine is in a panic state.
 Finally, the
 .Nm
 .Ic netdump
 command does not provide any way to configure compression or encryption.
 .Pp
 .It Ic netgdb Fl s Ar server Oo Fl g Ar gateway Fl c Ar client Fl i Ar iface Oc
 Initiate a
 .Xr netgdb 4
 session with the provided parameters.
 .Pp
 .Ic netgdb
 has identical limitations to
 .Ic netdump .
 .Pp
 .It Ic capture on
 .It Ic capture off
 .It Ic capture reset
 .It Ic capture status
 .Nm
 supports a basic output capture facility, which can be used to retrieve the
 results of debugging commands from userspace using
 .Xr sysctl 3 .
 .Ic capture on
 enables output capture;
 .Ic capture off
 disables capture.
 .Ic capture reset
 will clear the capture buffer and disable capture.
 .Ic capture status
 will report current buffer use, buffer size, and disposition of output
 capture.
 .Pp
 Userspace processes may inspect and manage
 .Nm
 capture state using
 .Xr sysctl 8 :
 .Pp
 .Va debug.ddb.capture.bufsize
 may be used to query or set the current capture buffer size.
 .Pp
 .Va debug.ddb.capture.maxbufsize
 may be used to query the compile-time limit on the capture buffer size.
 .Pp
 .Va debug.ddb.capture.bytes
 may be used to query the number of bytes of output currently in the capture
 buffer.
 .Pp
 .Va debug.ddb.capture.data
 returns the contents of the buffer as a string to an appropriately privileged
 process.
 .Pp
 This facility is particularly useful in concert with the scripting and
 .Xr textdump 4
 facilities, allowing scripted debugging output to be captured and
 committed to disk as part of a textdump for later analysis.
 The contents of the capture buffer may also be inspected in a kernel core dump
 using
 .Xr kgdb 1 Pq Pa ports/devel/gdb .
 .Pp
 .It Ic run
 .It Ic script
 .It Ic scripts
 .It Ic unscript
 Run, define, list, and delete scripts.
 See the
 .Sx SCRIPTING
 section for more information on the scripting facility.
 .Pp
 .It Ic textdump dump
 .It Ic textdump set
 .It Ic textdump status
 .It Ic textdump unset
 Use the
 .Ic textdump dump
 command to immediately perform a textdump.
 More information may be found in
 .Xr textdump 4 .
 The
 .Ic textdump set
 command may be used to force the next kernel core dump to be a textdump
 rather than a traditional memory dump or minidump.
 .Ic textdump status
 reports whether a textdump has been scheduled.
 .Ic textdump unset
 cancels a request to perform a textdump as the next kernel core dump.
 .El
 .Sh VARIABLES
 The debugger accesses registers and variables as
 .Li $ Ns Ar name .
 Register names are as in the
 .Dq Ic show Cm registers
 command.
 Some variables are suffixed with numbers, and may have some modifier
 following a colon immediately after the variable name.
 For example, register variables can have a
 .Cm u
 modifier to indicate user register (e.g.,
 .Dq Li $eax:u ) .
 .Pp
 Built-in variables currently supported are:
 .Pp
 .Bl -tag -width ".Va tabstops" -compact
 .It Va radix
 Input and output radix.
 .It Va maxoff
 Addresses are printed as
 .Dq Ar symbol Ns Li + Ns Ar offset
 unless
 .Ar offset
 is greater than
 .Va maxoff .
 .It Va maxwidth
 The width of the displayed line.
 .It Va lines
 The number of lines.
 It is used by the built-in pager.
 Setting it to 0 disables paging.
 .It Va tabstops
 Tab stop width.
 .It Va work Ns Ar xx
 Work variable;
 .Ar xx
 can take values from 0 to 31.
 .El
 .Sh EXPRESSIONS
 Most expression operators in C are supported except
 .Ql ~ ,
 .Ql ^ ,
 and unary
 .Ql & .
 Special rules in
 .Nm
 are:
 .Bl -tag -width ".No Identifiers"
 .It Identifiers
 The name of a symbol is translated to the value of the symbol, which
 is the address of the corresponding object.
 .Ql \&.
 and
 .Ql \&:
 can be used in the identifier.
 If supported by an object format dependent routine,
 .Sm off
 .Oo Ar filename : Oc Ar func : lineno ,
 .Sm on
 .Oo Ar filename : Oc Ns Ar variable ,
 and
 .Oo Ar filename : Oc Ns Ar lineno
 can be accepted as a symbol.
 .It Numbers
 Radix is determined by the first two letters:
 .Ql 0x :
 hex,
 .Ql 0o :
 octal,
 .Ql 0t :
 decimal; otherwise, follow current radix.
 .It Li \&.
 .Va dot
 .It Li +
 .Va next
 .It Li ..
 address of the start of the last line examined.
 Unlike
 .Va dot
 or
 .Va next ,
 this is only changed by
 .Ic examine
 or
 .Ic write
 command.
 .It Li '
 last address explicitly specified.
 .It Li $ Ns Ar variable
 Translated to the value of the specified variable.
 It may be followed by a
 .Ql \&:
 and modifiers as described above.
 .It Ar a Ns Li # Ns Ar b
 A binary operator which rounds up the left hand side to the next
 multiple of right hand side.
 .It Li * Ns Ar expr
 Indirection.
 It may be followed by a
 .Ql \&:
 and modifiers as described above.
 .El
 .Sh SCRIPTING
 .Nm
 supports a basic scripting facility to allow automating tasks or responses to
 specific events.
 Each script consists of a list of DDB commands to be executed sequentially,
 and is assigned a unique name.
 Certain script names have special meaning, and will be automatically run on
 various
 .Nm
 events if scripts by those names have been defined.
 .Pp
 The
 .Ic script
 command may be used to define a script by name.
 Scripts consist of a series of
 .Nm
 commands separated with the
 .Ql \&;
 character.
 For example:
 .Bd -literal -offset indent
 script kdb.enter.panic=bt; show pcpu
 script lockinfo=show alllocks; show lockedvnods
 .Ed
 .Pp
 The
 .Ic scripts
 command lists currently defined scripts.
 .Pp
 The
 .Ic run
 command execute a script by name.
 For example:
 .Bd -literal -offset indent
 run lockinfo
 .Ed
 .Pp
 The
 .Ic unscript
 command may be used to delete a script by name.
 For example:
 .Bd -literal -offset indent
 unscript kdb.enter.panic
 .Ed
 .Pp
 These functions may also be performed from userspace using the
 .Xr ddb 8
 command.
 .Pp
 Certain scripts are run automatically, if defined, for specific
 .Nm
 events.
 The follow scripts are run when various events occur:
 .Bl -tag -width kdb.enter.powerfail
 .It Va kdb.enter.acpi
 The kernel debugger was entered as a result of an
 .Xr acpi 4
 event.
 .It Va kdb.enter.bootflags
 The kernel debugger was entered at boot as a result of the debugger boot
 flag being set.
 .It Va kdb.enter.break
 The kernel debugger was entered as a result of a serial or console break.
 .It Va kdb.enter.cam
 The kernel debugger was entered as a result of a
 .Xr CAM 4
 event.
 .It Va kdb.enter.mac
 The kernel debugger was entered as a result of an assertion failure in the
 .Xr mac_test 4
 module of the
 TrustedBSD MAC Framework.
 .It Va kdb.enter.netgraph
 The kernel debugger was entered as a result of a
 .Xr netgraph 4
 event.
 .It Va kdb.enter.panic
 .Xr panic 9
 was called.
 .It Va kdb.enter.powerpc
 The kernel debugger was entered as a result of an unimplemented interrupt
 type on the powerpc platform.
 .It Va kdb.enter.sysctl
 The kernel debugger was entered as a result of the
 .Va debug.kdb.enter
 sysctl being set.
 .It Va kdb.enter.unionfs
 The kernel debugger was entered as a result of an assertion failure in the
 union file system.
 .It Va kdb.enter.unknown
 The kernel debugger was entered, but no reason has been set.
 .It Va kdb.enter.vfslock
 The kernel debugger was entered as a result of a VFS lock violation.
 .It Va kdb.enter.watchdog
 The kernel debugger was entered as a result of a watchdog firing.
 .It Va kdb.enter.witness
 The kernel debugger was entered as a result of a
 .Xr witness 4
 violation.
 .El
 .Pp
 In the event that none of these scripts is found,
 .Nm
 will attempt to execute a default script:
 .Bl -tag -width kdb.enter.powerfail
 .It Va kdb.enter.default
 The kernel debugger was entered, but a script exactly matching the reason for
 entering was not defined.
 This can be used as a catch-all to handle cases not specifically of interest;
 for example,
 .Va kdb.enter.witness
 might be defined to have special handling, and
 .Va kdb.enter.default
 might be defined to simply panic and reboot.
 .El
 .Sh HINTS
 On machines with an ISA expansion bus, a simple NMI generation card can be
 constructed by connecting a push button between the A01 and B01 (CHCHK# and
 GND) card fingers.
 Momentarily shorting these two fingers together may cause the bridge chipset to
 generate an NMI, which causes the kernel to pass control to
 .Nm .
 Some bridge chipsets do not generate a NMI on CHCHK#, so your mileage may vary.
 The NMI allows one to break into the debugger on a wedged machine to
 diagnose problems.
 Other bus' bridge chipsets may be able to generate NMI using bus specific
 methods.
 There are many PCI and PCIe add-in cards which can generate NMI for
 debugging.
 Modern server systems typically use IPMI to generate signals to enter the
 debugger.
 The
 .Va devel/ipmitool
 port can be used to send the
 .Cd chassis power diag
 command which delivers an NMI to the processor.
 Embedded systems often use JTAG for debugging, but rarely use it in
 combination with
 .Nm .
 .Pp
 Serial consoles can break to the debugger by sending a BREAK
 condition on the serial line.
 This requires a kernel built with
 .Cd options BREAK_TO_DEBUGGER
 is specified in the kernel.
 Most terminal emulation programs can send a break sequence with a
 special key sequence or menu selection.
 Sending the break can be difficult or even happen spuriously in some setups.
 An alternative method is to build a kernel with
 .Cd options ALT_BREAK_TO_DEBUGGER
 then the sequence of CR TILDE CTRL-B enters the debugger;
 CR TILDE CTRL-P causes a panic; and
 CR TILDE CTRL-R causes an immediate reboot.
 In all these sequences, CR represents Carriage Return and is usually
 sent by pressing the Enter or Return key.
 TILDE is the ASCII tilde character (~).
 CTRL-x is Control x, sent by pressing the Control key, then x, then releasing
 both.
 .Pp
 The break-to-debugger behavior can be enabled by setting
 .Xr sysctl 8
 .Va debug.kdb.break_to_debugger
 to 1.
 The alt-break-to-debugger behavior can be enabled by setting
 .Xr sysctl 8
 .Va debug.kdb.alt_break_to_debugger
 to 1.
 The debugger can be entered by setting
 .Xr sysctl 8
 .Va debug.kdb.enter
 to 1.
 .Pp
 Output can be interrupted, paused, and resumed with the control
 characters CTRL-C, CTRL-S, and CTRL-Q.
 Because these control characters are received as in-band data from the
 console, there is an input buffer, and once that buffer fills
 .Nm
 must either stop responding to control characters or drop additional
 input while continuing to search for control characters.
 This behavior is controlled by the tunable
 .Xr sysctl 8
 .Va debug.ddb.prioritize_control_input ,
 which defaults to 1.
 The input buffer size is 512 bytes.
 .Sh FILES
 Header files mentioned in this manual page can be found below
 .Pa /usr/include
 directory.
 .Pp
 .Bl -dash -compact
 .It
 .Pa sys/buf.h
 .It
 .Pa sys/domain.h
 .It
 .Pa netinet/in_pcb.h
 .It
 .Pa sys/socket.h
 .It
 .Pa sys/vnode.h
 .El
 .Sh SEE ALSO
 .Xr gdb 1 Pq Pa ports/devel/gdb ,
 .Xr kgdb 1 Pq Pa ports/devel/gdb ,
 .Xr acpi 4 ,
 .Xr CAM 4 ,
 .Xr gdb 4 ,
 .Xr mac_ddb 4 ,
 .Xr mac_test 4 ,
 .Xr netgraph 4 ,
 .Xr textdump 4 ,
 .Xr witness 4 ,
 .Xr ddb 8 ,
 .Xr sysctl 8 ,
 .Xr panic 9
 .Sh HISTORY
 The
 .Nm
 debugger was developed for Mach, and ported to
 .Bx 386 0.1 .
 This manual page translated from
 .Xr man 7
 macros by
 .An Garrett Wollman .
 .Pp
 .An Robert N. M. Watson
 added support for
 .Nm
 output capture,
 .Xr textdump 4
 and scripting in
 .Fx 7.1 .
diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c
index f540c7d3e70e..3c62d3b07f99 100644
--- a/sys/netinet/tcp_log_buf.c
+++ b/sys/netinet/tcp_log_buf.c
@@ -1,2974 +1,3355 @@
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2016-2018 Netflix, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
+#include "opt_ddb.h"
 #include <sys/param.h>
 #include <sys/arb.h>
 #include <sys/hash.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/qmath.h>
 #include <sys/queue.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
+#ifdef DDB
+#include <sys/time.h>
+#endif
 #include <sys/tree.h>
 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
 #include <sys/counter.h>
 #include <dev/tcp_log/tcp_log_dev.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_hpts.h>
 
 /* Default expiry time */
 #define	TCP_LOG_EXPIRE_TIME	((sbintime_t)60 * SBT_1S)
 
 /* Max interval at which to run the expiry timer */
 #define	TCP_LOG_EXPIRE_INTVL	((sbintime_t)5 * SBT_1S)
 
 bool	tcp_log_verbose;
 static uma_zone_t tcp_log_id_bucket_zone, tcp_log_id_node_zone, tcp_log_zone;
 static int	tcp_log_session_limit = TCP_LOG_BUF_DEFAULT_SESSION_LIMIT;
 static uint32_t	tcp_log_version = TCP_LOG_BUF_VER;
 RB_HEAD(tcp_log_id_tree, tcp_log_id_bucket);
 static struct tcp_log_id_tree tcp_log_id_head;
 static STAILQ_HEAD(, tcp_log_id_node) tcp_log_expireq_head =
     STAILQ_HEAD_INITIALIZER(tcp_log_expireq_head);
 static struct mtx tcp_log_expireq_mtx;
 static struct callout tcp_log_expireq_callout;
 static u_long tcp_log_auto_ratio = 0;
 static volatile u_long tcp_log_auto_ratio_cur = 0;
 static uint32_t tcp_log_auto_mode = TCP_LOG_STATE_TAIL;
 static bool tcp_log_auto_all = false;
 static uint32_t tcp_disable_all_bb_logs = 0;
 
 RB_PROTOTYPE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp)
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, bb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP Black Box controls");
 
 SYSCTL_NODE(_net_inet_tcp_bb, OID_AUTO, tp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP Black Box Trace Point controls");
 
 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_verbose, CTLFLAG_RW, &tcp_log_verbose,
     0, "Force verbose logging for TCP traces");
 
 SYSCTL_INT(_net_inet_tcp_bb, OID_AUTO, log_session_limit,
     CTLFLAG_RW, &tcp_log_session_limit, 0,
     "Maximum number of events maintained for each TCP session");
 
 uint32_t tcp_trace_point_config = 0;
 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, number, CTLFLAG_RW,
     &tcp_trace_point_config, TCP_LOG_STATE_HEAD_AUTO,
     "What is the trace point number to activate (0=none, 0xffffffff = all)?");
 
 uint32_t tcp_trace_point_bb_mode = TCP_LOG_STATE_CONTINUAL;
 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, bbmode, CTLFLAG_RW,
     &tcp_trace_point_bb_mode, TCP_LOG_STATE_HEAD_AUTO,
     "What is BB logging mode that is activated?");
 
 int32_t tcp_trace_point_count = 0;
 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, count, CTLFLAG_RW,
     &tcp_trace_point_count, TCP_LOG_STATE_HEAD_AUTO,
     "How many connections will have BB logging turned on that hit the tracepoint?");
 
 
 
 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_global_limit, CTLFLAG_RW,
     &tcp_log_zone, "Maximum number of events maintained for all TCP sessions");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_global_entries, CTLFLAG_RD,
     &tcp_log_zone, "Current number of events maintained for all TCP sessions");
 
 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_limit, CTLFLAG_RW,
     &tcp_log_id_bucket_zone, "Maximum number of log IDs");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_entries, CTLFLAG_RD,
     &tcp_log_id_bucket_zone, "Current number of log IDs");
 
 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_limit, CTLFLAG_RW,
     &tcp_log_id_node_zone, "Maximum number of tcpcbs with log IDs");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_entries, CTLFLAG_RD,
     &tcp_log_id_node_zone, "Current number of tcpcbs with log IDs");
 
 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, &tcp_log_version,
     0, "Version of log formats exported");
 
 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, disable_all, CTLFLAG_RW,
     &tcp_disable_all_bb_logs, 0,
     "Disable all BB logging for all connections");
 
 SYSCTL_ULONG(_net_inet_tcp_bb, OID_AUTO, log_auto_ratio, CTLFLAG_RW,
     &tcp_log_auto_ratio, 0, "Do auto capturing for 1 out of N sessions");
 
 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_auto_mode, CTLFLAG_RW,
     &tcp_log_auto_mode, 0,
     "Logging mode for auto-selected sessions (default is TCP_LOG_STATE_TAIL)");
 
 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_auto_all, CTLFLAG_RW,
     &tcp_log_auto_all, 0,
     "Auto-select from all sessions (rather than just those with IDs)");
 
 #ifdef TCPLOG_DEBUG_COUNTERS
 counter_u64_t tcp_log_queued;
 counter_u64_t tcp_log_que_fail1;
 counter_u64_t tcp_log_que_fail2;
 counter_u64_t tcp_log_que_fail3;
 counter_u64_t tcp_log_que_fail4;
 counter_u64_t tcp_log_que_fail5;
 counter_u64_t tcp_log_que_copyout;
 counter_u64_t tcp_log_que_read;
 counter_u64_t tcp_log_que_freed;
 
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, queued, CTLFLAG_RD,
     &tcp_log_queued, "Number of entries queued");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail1, CTLFLAG_RD,
     &tcp_log_que_fail1, "Number of entries queued but fail 1");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail2, CTLFLAG_RD,
     &tcp_log_que_fail2, "Number of entries queued but fail 2");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail3, CTLFLAG_RD,
     &tcp_log_que_fail3, "Number of entries queued but fail 3");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail4, CTLFLAG_RD,
     &tcp_log_que_fail4, "Number of entries queued but fail 4");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail5, CTLFLAG_RD,
     &tcp_log_que_fail5, "Number of entries queued but fail 4");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, copyout, CTLFLAG_RD,
     &tcp_log_que_copyout, "Number of entries copied out");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, read, CTLFLAG_RD,
     &tcp_log_que_read, "Number of entries read from the queue");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, freed, CTLFLAG_RD,
     &tcp_log_que_freed, "Number of entries freed after reading");
 #endif
 
 #ifdef INVARIANTS
 #define	TCPLOG_DEBUG_RINGBUF
 #endif
 /* Number of requests to consider a PBCID "active". */
 #define	ACTIVE_REQUEST_COUNT	10
 
 /* Statistic tracking for "active" PBCIDs. */
 static counter_u64_t tcp_log_pcb_ids_cur;
 static counter_u64_t tcp_log_pcb_ids_tot;
 
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_cur, CTLFLAG_RD,
     &tcp_log_pcb_ids_cur, "Number of pcb IDs allocated in the system");
 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_tot, CTLFLAG_RD,
     &tcp_log_pcb_ids_tot, "Total number of pcb IDs that have been allocated");
 
 struct tcp_log_mem
 {
 	STAILQ_ENTRY(tcp_log_mem) tlm_queue;
 	struct tcp_log_buffer	tlm_buf;
 	struct tcp_log_verbose	tlm_v;
 #ifdef TCPLOG_DEBUG_RINGBUF
 	volatile int		tlm_refcnt;
 #endif
 };
 
 /* 60 bytes for the header, + 16 bytes for padding */
 static uint8_t	zerobuf[76];
 
 /*
  * Lock order:
  * 1. TCPID_TREE
  * 2. TCPID_BUCKET
  * 3. INP
  *
  * Rules:
  * A. You need a lock on the Tree to add/remove buckets.
  * B. You need a lock on the bucket to add/remove nodes from the bucket.
  * C. To change information in a node, you need the INP lock if the tln_closed
  *    field is false. Otherwise, you need the bucket lock. (Note that the
  *    tln_closed field can change at any point, so you need to recheck the
  *    entry after acquiring the INP lock.)
  * D. To remove a node from the bucket, you must have that entry locked,
  *    according to the criteria of Rule C. Also, the node must not be on
  *    the expiry queue.
  * E. The exception to C is the expiry queue fields, which are locked by
  *    the TCPLOG_EXPIREQ lock.
  *
  * Buckets have a reference count. Each node is a reference. Further,
  * other callers may add reference counts to keep a bucket from disappearing.
  * You can add a reference as long as you own a lock sufficient to keep the
  * bucket from disappearing. For example, a common use is:
  *   a. Have a locked INP, but need to lock the TCPID_BUCKET.
  *   b. Add a refcount on the bucket. (Safe because the INP lock prevents
  *      the TCPID_BUCKET from going away.)
  *   c. Drop the INP lock.
  *   d. Acquire a lock on the TCPID_BUCKET.
  *   e. Acquire a lock on the INP.
  *   f. Drop the refcount on the bucket.
  *      (At this point, the bucket may disappear.)
  *
  * Expire queue lock:
  * You can acquire this with either the bucket or INP lock. Don't reverse it.
  * When the expire code has committed to freeing a node, it resets the expiry
  * time to SBT_MAX. That is the signal to everyone else that they should
  * leave that node alone.
  */
 static struct rwlock tcp_id_tree_lock;
 #define	TCPID_TREE_WLOCK()		rw_wlock(&tcp_id_tree_lock)
 #define	TCPID_TREE_RLOCK()		rw_rlock(&tcp_id_tree_lock)
 #define	TCPID_TREE_UPGRADE()		rw_try_upgrade(&tcp_id_tree_lock)
 #define	TCPID_TREE_WUNLOCK()		rw_wunlock(&tcp_id_tree_lock)
 #define	TCPID_TREE_RUNLOCK()		rw_runlock(&tcp_id_tree_lock)
 #define	TCPID_TREE_WLOCK_ASSERT()	rw_assert(&tcp_id_tree_lock, RA_WLOCKED)
 #define	TCPID_TREE_RLOCK_ASSERT()	rw_assert(&tcp_id_tree_lock, RA_RLOCKED)
 #define	TCPID_TREE_UNLOCK_ASSERT()	rw_assert(&tcp_id_tree_lock, RA_UNLOCKED)
 
 #define	TCPID_BUCKET_LOCK_INIT(tlb)	mtx_init(&((tlb)->tlb_mtx), "tcp log id bucket", NULL, MTX_DEF)
 #define	TCPID_BUCKET_LOCK_DESTROY(tlb)	mtx_destroy(&((tlb)->tlb_mtx))
 #define	TCPID_BUCKET_LOCK(tlb)		mtx_lock(&((tlb)->tlb_mtx))
 #define	TCPID_BUCKET_UNLOCK(tlb)	mtx_unlock(&((tlb)->tlb_mtx))
 #define	TCPID_BUCKET_LOCK_ASSERT(tlb)	mtx_assert(&((tlb)->tlb_mtx), MA_OWNED)
 #define	TCPID_BUCKET_UNLOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_NOTOWNED)
 
 #define	TCPID_BUCKET_REF(tlb)		refcount_acquire(&((tlb)->tlb_refcnt))
 #define	TCPID_BUCKET_UNREF(tlb)		refcount_release(&((tlb)->tlb_refcnt))
 
 #define	TCPLOG_EXPIREQ_LOCK()		mtx_lock(&tcp_log_expireq_mtx)
 #define	TCPLOG_EXPIREQ_UNLOCK()		mtx_unlock(&tcp_log_expireq_mtx)
 
 SLIST_HEAD(tcp_log_id_head, tcp_log_id_node);
 
 struct tcp_log_id_bucket
 {
 	/*
 	 * tlb_id must be first. This lets us use strcmp on
 	 * (struct tcp_log_id_bucket *) and (char *) interchangeably.
 	 */
 	char				tlb_id[TCP_LOG_ID_LEN];
 	char				tlb_tag[TCP_LOG_TAG_LEN];
 	RB_ENTRY(tcp_log_id_bucket)	tlb_rb;
 	struct tcp_log_id_head		tlb_head;
 	struct mtx			tlb_mtx;
 	volatile u_int			tlb_refcnt;
 	volatile u_int			tlb_reqcnt;
 	uint32_t			tlb_loglimit;
 	int8_t				tlb_logstate;
 };
 
 struct tcp_log_id_node
 {
 	SLIST_ENTRY(tcp_log_id_node) tln_list;
 	STAILQ_ENTRY(tcp_log_id_node) tln_expireq; /* Locked by the expireq lock */
 	sbintime_t		tln_expiretime;	/* Locked by the expireq lock */
 
 	/*
 	 * If INP is NULL, that means the connection has closed. We've
 	 * saved the connection endpoint information and the log entries
 	 * in the tln_ie and tln_entries members. We've also saved a pointer
 	 * to the enclosing bucket here. If INP is not NULL, the information is
 	 * in the PCB and not here.
 	 */
 	struct inpcb		*tln_inp;
 	struct tcpcb		*tln_tp;
 	struct tcp_log_id_bucket *tln_bucket;
 	struct in_endpoints	tln_ie;
 	struct tcp_log_stailq	tln_entries;
 	int			tln_count;
 	volatile int		tln_closed;
 	uint8_t			tln_af;
 };
 
 enum tree_lock_state {
 	TREE_UNLOCKED = 0,
 	TREE_RLOCKED,
 	TREE_WLOCKED,
 };
 
 /* Do we want to select this session for auto-logging? */
 static __inline bool
 tcp_log_selectauto(void)
 {
 
 	/*
 	 * If we are doing auto-capturing, figure out whether we will capture
 	 * this session.
 	 */
 	if (tcp_log_auto_ratio &&
 	    (tcp_disable_all_bb_logs == 0) &&
 	    (atomic_fetchadd_long(&tcp_log_auto_ratio_cur, 1) %
 	    tcp_log_auto_ratio) == 0)
 		return (true);
 	return (false);
 }
 
 static __inline int
 tcp_log_id_cmp(struct tcp_log_id_bucket *a, struct tcp_log_id_bucket *b)
 {
 	KASSERT(a != NULL, ("tcp_log_id_cmp: argument a is unexpectedly NULL"));
 	KASSERT(b != NULL, ("tcp_log_id_cmp: argument b is unexpectedly NULL"));
 	return strncmp(a->tlb_id, b->tlb_id, TCP_LOG_ID_LEN);
 }
 
 RB_GENERATE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp)
 
 static __inline void
 tcp_log_id_validate_tree_lock(int tree_locked)
 {
 
 #ifdef INVARIANTS
 	switch (tree_locked) {
 	case TREE_WLOCKED:
 		TCPID_TREE_WLOCK_ASSERT();
 		break;
 	case TREE_RLOCKED:
 		TCPID_TREE_RLOCK_ASSERT();
 		break;
 	case TREE_UNLOCKED:
 		TCPID_TREE_UNLOCK_ASSERT();
 		break;
 	default:
 		kassert_panic("%s:%d: unknown tree lock state", __func__,
 		    __LINE__);
 	}
 #endif
 }
 
 static __inline void
 tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb)
 {
 
 	TCPID_TREE_WLOCK_ASSERT();
 	KASSERT(SLIST_EMPTY(&tlb->tlb_head),
 	    ("%s: Attempt to remove non-empty bucket", __func__));
 	if (RB_REMOVE(tcp_log_id_tree, &tcp_log_id_head, tlb) == NULL) {
 #ifdef INVARIANTS
 		kassert_panic("%s:%d: error removing element from tree",
 			    __func__, __LINE__);
 #endif
 	}
 	TCPID_BUCKET_LOCK_DESTROY(tlb);
 	counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1);
 	uma_zfree(tcp_log_id_bucket_zone, tlb);
 }
 
 /*
  * Call with a referenced and locked bucket.
  * Will return true if the bucket was freed; otherwise, false.
  * tlb: The bucket to unreference.
  * tree_locked: A pointer to the state of the tree lock. If the tree lock
  *    state changes, the function will update it.
  * inp: If not NULL and the function needs to drop the inp lock to relock the
  *    tree, it will do so. (The caller must ensure inp will not become invalid,
  *    probably by holding a reference to it.)
  */
 static bool
 tcp_log_unref_bucket(struct tcp_log_id_bucket *tlb, int *tree_locked,
     struct inpcb *inp)
 {
 
 	KASSERT(tlb != NULL, ("%s: called with NULL tlb", __func__));
 	KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked",
 	    __func__));
 
 	tcp_log_id_validate_tree_lock(*tree_locked);
 
 	/*
 	 * Did we hold the last reference on the tlb? If so, we may need
 	 * to free it. (Note that we can realistically only execute the
 	 * loop twice: once without a write lock and once with a write
 	 * lock.)
 	 */
 	while (TCPID_BUCKET_UNREF(tlb)) {
 		/*
 		 * We need a write lock on the tree to free this.
 		 * If we can upgrade the tree lock, this is "easy". If we
 		 * can't upgrade the tree lock, we need to do this the
 		 * "hard" way: unwind all our locks and relock everything.
 		 * In the meantime, anything could have changed. We even
 		 * need to validate that we still need to free the bucket.
 		 */
 		if (*tree_locked == TREE_RLOCKED && TCPID_TREE_UPGRADE())
 			*tree_locked = TREE_WLOCKED;
 		else if (*tree_locked != TREE_WLOCKED) {
 			TCPID_BUCKET_REF(tlb);
 			if (inp != NULL)
 				INP_WUNLOCK(inp);
 			TCPID_BUCKET_UNLOCK(tlb);
 			if (*tree_locked == TREE_RLOCKED)
 				TCPID_TREE_RUNLOCK();
 			TCPID_TREE_WLOCK();
 			*tree_locked = TREE_WLOCKED;
 			TCPID_BUCKET_LOCK(tlb);
 			if (inp != NULL)
 				INP_WLOCK(inp);
 			continue;
 		}
 
 		/*
 		 * We have an empty bucket and a write lock on the tree.
 		 * Remove the empty bucket.
 		 */
 		tcp_log_remove_bucket(tlb);
 		return (true);
 	}
 	return (false);
 }
 
 /*
  * Call with a locked bucket. This function will release the lock on the
  * bucket before returning.
  *
  * The caller is responsible for freeing the tp->t_lin/tln node!
  *
  * Note: one of tp or both tlb and tln must be supplied.
  *
  * inp: A pointer to the inp. If the function needs to drop the inp lock to
  *    acquire the tree write lock, it will do so. (The caller must ensure inp
  *    will not become invalid, probably by holding a reference to it.)
  * tp: A pointer to the tcpcb. (optional; if specified, tlb and tln are ignored)
  * tlb: A pointer to the bucket. (optional; ignored if tp is specified)
  * tln: A pointer to the node. (optional; ignored if tp is specified)
  * tree_locked: A pointer to the state of the tree lock. If the tree lock
  *    state changes, the function will update it.
  *
  * Will return true if the INP lock was reacquired; otherwise, false.
  */
 static bool
 tcp_log_remove_id_node(struct inpcb *inp, struct tcpcb *tp,
     struct tcp_log_id_bucket *tlb, struct tcp_log_id_node *tln,
     int *tree_locked)
 {
 	int orig_tree_locked;
 
 	KASSERT(tp != NULL || (tlb != NULL && tln != NULL),
 	    ("%s: called with tp=%p, tlb=%p, tln=%p", __func__,
 	    tp, tlb, tln));
 	KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked",
 	    __func__));
 
 	if (tp != NULL) {
 		tlb = tp->t_lib;
 		tln = tp->t_lin;
 		KASSERT(tlb != NULL, ("%s: unexpectedly NULL tlb", __func__));
 		KASSERT(tln != NULL, ("%s: unexpectedly NULL tln", __func__));
 	}
 
 	tcp_log_id_validate_tree_lock(*tree_locked);
 	TCPID_BUCKET_LOCK_ASSERT(tlb);
 
 	/*
 	 * Remove the node, clear the log bucket and node from the TCPCB, and
 	 * decrement the bucket refcount. In the process, if this is the
 	 * last reference, the bucket will be freed.
 	 */
 	SLIST_REMOVE(&tlb->tlb_head, tln, tcp_log_id_node, tln_list);
 	if (tp != NULL) {
 		tp->t_lib = NULL;
 		tp->t_lin = NULL;
 	}
 	orig_tree_locked = *tree_locked;
 	if (!tcp_log_unref_bucket(tlb, tree_locked, inp))
 		TCPID_BUCKET_UNLOCK(tlb);
 	return (*tree_locked != orig_tree_locked);
 }
 
 #define	RECHECK_INP_CLEAN(cleanup)	do {			\
 	if (inp->inp_flags & INP_DROPPED) {			\
 		rv = ECONNRESET;				\
 		cleanup;					\
 		goto done;					\
 	}							\
 	tp = intotcpcb(inp);					\
 } while (0)
 
 #define	RECHECK_INP()	RECHECK_INP_CLEAN(/* noop */)
 
 static void
 tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp)
 {
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
 #ifdef STATS
 	if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL)
 		(void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id));
 #endif
 }
 
 static void
 tcp_log_increment_reqcnt(struct tcp_log_id_bucket *tlb)
 {
 
 	atomic_fetchadd_int(&tlb->tlb_reqcnt, 1);
 }
 
 int
 tcp_log_apply_ratio(struct tcpcb *tp, int ratio)
 {
 	struct tcp_log_id_bucket *tlb;
 	struct inpcb *inp = tptoinpcb(tp);
 	uint32_t hash, ratio_hash_thresh;
 	int rv, tree_locked;
 
 	rv = 0;
 	tree_locked = TREE_UNLOCKED;
 	tlb = tp->t_lib;
 
 	INP_WLOCK_ASSERT(inp);
 	if (tlb == NULL) {
 		INP_WUNLOCK(inp);
 		return (EOPNOTSUPP);
 	}
 	if (ratio)
 		ratio_hash_thresh = max(1, UINT32_MAX / ratio);
 	else
 		ratio_hash_thresh = 0;
 	TCPID_BUCKET_REF(tlb);
 	INP_WUNLOCK(inp);
 	TCPID_BUCKET_LOCK(tlb);
 
 	hash = hash32_buf(tlb->tlb_id, strlen(tlb->tlb_id), 0);
 	if (hash > ratio_hash_thresh && tp->_t_logstate == TCP_LOG_STATE_OFF &&
 	    tlb->tlb_logstate == TCP_LOG_STATE_OFF) {
 		/*
 		 * Ratio decision not to log this log ID (and this connection by
 		 * way of association). We only apply a log ratio log disable
 		 * decision if it would not interfere with a log enable decision
 		 * made elsewhere e.g. tcp_log_selectauto() or setsockopt().
 		 */
 		tlb->tlb_logstate = TCP_LOG_STATE_RATIO_OFF;
 		INP_WLOCK(inp);
 		RECHECK_INP();
 		(void)tcp_log_state_change(tp, TCP_LOG_STATE_OFF);
 done:
 		INP_WUNLOCK(inp);
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 	if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL))
 		TCPID_BUCKET_UNLOCK(tlb);
 
 	if (tree_locked == TREE_WLOCKED) {
 		TCPID_TREE_WLOCK_ASSERT();
 		TCPID_TREE_WUNLOCK();
 	} else if (tree_locked == TREE_RLOCKED) {
 		TCPID_TREE_RLOCK_ASSERT();
 		TCPID_TREE_RUNLOCK();
 	} else
 		TCPID_TREE_UNLOCK_ASSERT();
 
 	return (rv);
 }
 
 /*
  * Associate the specified tag with a particular TCP log ID.
  * Called with INPCB locked. Returns with it unlocked.
  * Returns 0 on success or EOPNOTSUPP if the connection has no TCP log ID.
  */
 int
 tcp_log_set_tag(struct tcpcb *tp, char *tag)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct tcp_log_id_bucket *tlb;
 	int tree_locked;
 
 	INP_WLOCK_ASSERT(inp);
 
 	tree_locked = TREE_UNLOCKED;
 	tlb = tp->t_lib;
 	if (tlb == NULL) {
 		INP_WUNLOCK(inp);
 		return (EOPNOTSUPP);
 	}
 
 	TCPID_BUCKET_REF(tlb);
 	INP_WUNLOCK(inp);
 	TCPID_BUCKET_LOCK(tlb);
 	strlcpy(tlb->tlb_tag, tag, TCP_LOG_TAG_LEN);
 	if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL))
 		TCPID_BUCKET_UNLOCK(tlb);
 
 	if (tree_locked == TREE_WLOCKED) {
 		TCPID_TREE_WLOCK_ASSERT();
 		TCPID_TREE_WUNLOCK();
 	} else if (tree_locked == TREE_RLOCKED) {
 		TCPID_TREE_RLOCK_ASSERT();
 		TCPID_TREE_RUNLOCK();
 	} else
 		TCPID_TREE_UNLOCK_ASSERT();
 
 	return (0);
 }
 
 /*
  * Set the TCP log ID for a TCPCB.
  * Called with INPCB locked. Returns with it unlocked.
  */
 int
 tcp_log_set_id(struct tcpcb *tp, char *id)
 {
 	struct tcp_log_id_bucket *tlb, *tmp_tlb;
 	struct tcp_log_id_node *tln;
 	struct inpcb *inp = tptoinpcb(tp);
 	int tree_locked, rv;
 	bool bucket_locked, same;
 
 	tlb = NULL;
 	tln = NULL;
 	tree_locked = TREE_UNLOCKED;
 	bucket_locked = false;
 
 restart:
 	INP_WLOCK_ASSERT(inp);
 	/* See if the ID is unchanged. */
 	same = ((tp->t_lib != NULL && !strcmp(tp->t_lib->tlb_id, id)) ||
 		(tp->t_lib == NULL && *id == 0));
 	if (tp->_t_logstate && STAILQ_FIRST(&tp->t_logs) && !same) {
 		/*
 		 * There are residual logs left we may
 		 * be changing id's so dump what we can.
 		 */
 		switch(tp->_t_logstate) {
 		case TCP_LOG_STATE_HEAD_AUTO:
 			(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head at id switch",
 						     M_NOWAIT, false);
 			break;
 		case TCP_LOG_STATE_TAIL_AUTO:
 			(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail at id switch",
 						     M_NOWAIT, false);
 			break;
 		case TCP_LOG_STATE_CONTINUAL:
 			(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual at id switch",
 						     M_NOWAIT, false);
 			break;
 		case TCP_LOG_VIA_BBPOINTS:
 			(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints at id switch",
 						     M_NOWAIT, false);
 			break;
 		}
 	}
 	if (same) {
 		if (tp->t_lib != NULL) {
 			tcp_log_increment_reqcnt(tp->t_lib);
 			if ((tp->t_lib->tlb_logstate > TCP_LOG_STATE_OFF) &&
 			    (tp->t_log_state_set == 0)) {
 				/* Clone in any logging */
 
 				tp->_t_logstate = tp->t_lib->tlb_logstate;
 			}
 			if ((tp->t_lib->tlb_loglimit) &&
 			    (tp->t_log_state_set == 0)) {
 				/* We also have a limit set */
 
 				tp->t_loglimit = tp->t_lib->tlb_loglimit;
 			}
 		}
 		rv = 0;
 		goto done;
 	}
 
 	/*
 	 * If the TCPCB had a previous ID, we need to extricate it from
 	 * the previous list.
 	 *
 	 * Drop the TCPCB lock and lock the tree and the bucket.
 	 * Because this is called in the socket context, we (theoretically)
 	 * don't need to worry about the INPCB completely going away
 	 * while we are gone.
 	 */
 	if (tp->t_lib != NULL) {
 		tlb = tp->t_lib;
 		TCPID_BUCKET_REF(tlb);
 		INP_WUNLOCK(inp);
 
 		if (tree_locked == TREE_UNLOCKED) {
 			TCPID_TREE_RLOCK();
 			tree_locked = TREE_RLOCKED;
 		}
 		TCPID_BUCKET_LOCK(tlb);
 		bucket_locked = true;
 		INP_WLOCK(inp);
 
 		/*
 		 * Unreference the bucket. If our bucket went away, it is no
 		 * longer locked or valid.
 		 */
 		if (tcp_log_unref_bucket(tlb, &tree_locked, inp)) {
 			bucket_locked = false;
 			tlb = NULL;
 		}
 
 		/* Validate the INP. */
 		RECHECK_INP();
 
 		/*
 		 * Evaluate whether the bucket changed while we were unlocked.
 		 *
 		 * Possible scenarios here:
 		 * 1. Bucket is unchanged and the same one we started with.
 		 * 2. The TCPCB no longer has a bucket and our bucket was
 		 *    freed.
 		 * 3. The TCPCB has a new bucket, whether ours was freed.
 		 * 4. The TCPCB no longer has a bucket and our bucket was
 		 *    not freed.
 		 *
 		 * In cases 2-4, we will start over. In case 1, we will
 		 * proceed here to remove the bucket.
 		 */
 		if (tlb == NULL || tp->t_lib != tlb) {
 			KASSERT(bucket_locked || tlb == NULL,
 			    ("%s: bucket_locked (%d) and tlb (%p) are "
 			    "inconsistent", __func__, bucket_locked, tlb));
 
 			if (bucket_locked) {
 				TCPID_BUCKET_UNLOCK(tlb);
 				bucket_locked = false;
 				tlb = NULL;
 			}
 			goto restart;
 		}
 
 		/*
 		 * Store the (struct tcp_log_id_node) for reuse. Then, remove
 		 * it from the bucket. In the process, we may end up relocking.
 		 * If so, we need to validate that the INP is still valid, and
 		 * the TCPCB entries match we expect.
 		 *
 		 * We will clear tlb and change the bucket_locked state just
 		 * before calling tcp_log_remove_id_node(), since that function
 		 * will unlock the bucket.
 		 */
 		if (tln != NULL)
 			uma_zfree(tcp_log_id_node_zone, tln);
 		tln = tp->t_lin;
 		tlb = NULL;
 		bucket_locked = false;
 		if (tcp_log_remove_id_node(inp, tp, NULL, NULL, &tree_locked)) {
 			RECHECK_INP();
 
 			/*
 			 * If the TCPCB moved to a new bucket while we had
 			 * dropped the lock, restart.
 			 */
 			if (tp->t_lib != NULL || tp->t_lin != NULL)
 				goto restart;
 		}
 
 		/*
 		 * Yay! We successfully removed the TCPCB from its old
 		 * bucket. Phew!
 		 *
 		 * On to bigger and better things...
 		 */
 	}
 
 	/* At this point, the TCPCB should not be in any bucket. */
 	KASSERT(tp->t_lib == NULL, ("%s: tp->t_lib is not NULL", __func__));
 
 	/*
 	 * If the new ID is not empty, we need to now assign this TCPCB to a
 	 * new bucket.
 	 */
 	if (*id) {
 		/* Get a new tln, if we don't already have one to reuse. */
 		if (tln == NULL) {
 			tln = uma_zalloc(tcp_log_id_node_zone,
 				M_NOWAIT | M_ZERO);
 			if (tln == NULL) {
 				rv = ENOBUFS;
 				goto done;
 			}
 			tln->tln_inp = inp;
 			tln->tln_tp = tp;
 		}
 
 		/*
 		 * Drop the INP lock for a bit. We don't need it, and dropping
 		 * it prevents lock order reversals.
 		 */
 		INP_WUNLOCK(inp);
 
 		/* Make sure we have at least a read lock on the tree. */
 		tcp_log_id_validate_tree_lock(tree_locked);
 		if (tree_locked == TREE_UNLOCKED) {
 			TCPID_TREE_RLOCK();
 			tree_locked = TREE_RLOCKED;
 		}
 
 refind:
 		/*
 		 * Remember that we constructed (struct tcp_log_id_node) so
 		 * we can safely cast the id to it for the purposes of finding.
 		 */
 		KASSERT(tlb == NULL, ("%s:%d tlb unexpectedly non-NULL",
 		    __func__, __LINE__));
 		tmp_tlb = RB_FIND(tcp_log_id_tree, &tcp_log_id_head,
 		    (struct tcp_log_id_bucket *) id);
 
 		/*
 		 * If we didn't find a matching bucket, we need to add a new
 		 * one. This requires a write lock. But, of course, we will
 		 * need to recheck some things when we re-acquire the lock.
 		 */
 		if (tmp_tlb == NULL && tree_locked != TREE_WLOCKED) {
 			tree_locked = TREE_WLOCKED;
 			if (!TCPID_TREE_UPGRADE()) {
 				TCPID_TREE_RUNLOCK();
 				TCPID_TREE_WLOCK();
 
 				/*
 				 * The tree may have changed while we were
 				 * unlocked.
 				 */
 				goto refind;
 			}
 		}
 
 		/* If we need to add a new bucket, do it now. */
 		if (tmp_tlb == NULL) {
 			/* Allocate new bucket. */
 			tlb = uma_zalloc(tcp_log_id_bucket_zone, M_NOWAIT);
 			if (tlb == NULL) {
 				rv = ENOBUFS;
 				goto done_noinp;
 			}
 			counter_u64_add(tcp_log_pcb_ids_cur, 1);
 			counter_u64_add(tcp_log_pcb_ids_tot, 1);
 
 			if ((tcp_log_auto_all == false) &&
 			    tcp_log_auto_mode &&
 			    tcp_log_selectauto()) {
 				/* Save off the log state */
 				tlb->tlb_logstate = tcp_log_auto_mode;
 			} else
 				tlb->tlb_logstate = TCP_LOG_STATE_OFF;
 			tlb->tlb_loglimit = 0;
 			tlb->tlb_tag[0] = '\0'; /* Default to an empty tag. */
 
 			/*
 			 * Copy the ID to the bucket.
 			 * NB: Don't use strlcpy() unless you are sure
 			 * we've always validated NULL termination.
 			 *
 			 * TODO: When I'm done writing this, see if we
 			 * we have correctly validated NULL termination and
 			 * can use strlcpy(). :-)
 			 */
 			strncpy(tlb->tlb_id, id, TCP_LOG_ID_LEN - 1);
 			tlb->tlb_id[TCP_LOG_ID_LEN - 1] = '\0';
 
 			/*
 			 * Take the refcount for the first node and go ahead
 			 * and lock this. Note that we zero the tlb_mtx
 			 * structure, since 0xdeadc0de flips the right bits
 			 * for the code to think that this mutex has already
 			 * been initialized. :-(
 			 */
 			SLIST_INIT(&tlb->tlb_head);
 			refcount_init(&tlb->tlb_refcnt, 1);
 			tlb->tlb_reqcnt = 1;
 			memset(&tlb->tlb_mtx, 0, sizeof(struct mtx));
 			TCPID_BUCKET_LOCK_INIT(tlb);
 			TCPID_BUCKET_LOCK(tlb);
 			bucket_locked = true;
 
 #define	FREE_NEW_TLB()	do {				\
 	TCPID_BUCKET_LOCK_DESTROY(tlb);			\
 	uma_zfree(tcp_log_id_bucket_zone, tlb);		\
 	counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1);	\
 	counter_u64_add(tcp_log_pcb_ids_tot, (int64_t)-1);	\
 	bucket_locked = false;				\
 	tlb = NULL;					\
 } while (0)
 			/*
 			 * Relock the INP and make sure we are still
 			 * unassigned.
 			 */
 			INP_WLOCK(inp);
 			RECHECK_INP_CLEAN(FREE_NEW_TLB());
 			if (tp->t_lib != NULL) {
 				FREE_NEW_TLB();
 				goto restart;
 			}
 
 			/* Add the new bucket to the tree. */
 			tmp_tlb = RB_INSERT(tcp_log_id_tree, &tcp_log_id_head,
 			    tlb);
 			KASSERT(tmp_tlb == NULL,
 			    ("%s: Unexpected conflicting bucket (%p) while "
 			    "adding new bucket (%p)", __func__, tmp_tlb, tlb));
 
 			/*
 			 * If we found a conflicting bucket, free the new
 			 * one we made and fall through to use the existing
 			 * bucket.
 			 */
 			if (tmp_tlb != NULL) {
 				FREE_NEW_TLB();
 				INP_WUNLOCK(inp);
 			}
 #undef	FREE_NEW_TLB
 		}
 
 		/* If we found an existing bucket, use it. */
 		if (tmp_tlb != NULL) {
 			tlb = tmp_tlb;
 			TCPID_BUCKET_LOCK(tlb);
 			bucket_locked = true;
 
 			/*
 			 * Relock the INP and make sure we are still
 			 * unassigned.
 			 */
 			INP_UNLOCK_ASSERT(inp);
 			INP_WLOCK(inp);
 			RECHECK_INP();
 			if (tp->t_lib != NULL) {
 				TCPID_BUCKET_UNLOCK(tlb);
 				bucket_locked = false;
 				tlb = NULL;
 				goto restart;
 			}
 
 			/* Take a reference on the bucket. */
 			TCPID_BUCKET_REF(tlb);
 
 			/* Record the request. */
 			tcp_log_increment_reqcnt(tlb);
 		}
 
 		tcp_log_grow_tlb(tlb->tlb_id, tp);
 
 		/* Add the new node to the list. */
 		SLIST_INSERT_HEAD(&tlb->tlb_head, tln, tln_list);
 		tp->t_lib = tlb;
 		tp->t_lin = tln;
 		if (tp->t_lib->tlb_logstate > TCP_LOG_STATE_OFF) {
 			/* Clone in any logging */
 
 			tp->_t_logstate = tp->t_lib->tlb_logstate;
 		}
 		if (tp->t_lib->tlb_loglimit) {
 			/* The loglimit too */
 
 			tp->t_loglimit = tp->t_lib->tlb_loglimit;
 		}
 		tln = NULL;
 	}
 
 	rv = 0;
 
 done:
 	/* Unlock things, as needed, and return. */
 	INP_WUNLOCK(inp);
 done_noinp:
 	INP_UNLOCK_ASSERT(inp);
 	if (bucket_locked) {
 		TCPID_BUCKET_LOCK_ASSERT(tlb);
 		TCPID_BUCKET_UNLOCK(tlb);
 	} else if (tlb != NULL)
 		TCPID_BUCKET_UNLOCK_ASSERT(tlb);
 	if (tree_locked == TREE_WLOCKED) {
 		TCPID_TREE_WLOCK_ASSERT();
 		TCPID_TREE_WUNLOCK();
 	} else if (tree_locked == TREE_RLOCKED) {
 		TCPID_TREE_RLOCK_ASSERT();
 		TCPID_TREE_RUNLOCK();
 	} else
 		TCPID_TREE_UNLOCK_ASSERT();
 	if (tln != NULL)
 		uma_zfree(tcp_log_id_node_zone, tln);
 	return (rv);
 }
 
 /*
  * Get the TCP log ID for a TCPCB.
  * Called with INPCB locked.
  * 'buf' must point to a buffer that is at least TCP_LOG_ID_LEN bytes long.
  * Returns number of bytes copied.
  */
 size_t
 tcp_log_get_id(struct tcpcb *tp, char *buf)
 {
 	size_t len;
 
 	INP_LOCK_ASSERT(tptoinpcb(tp));
 	if (tp->t_lib != NULL) {
 		len = strlcpy(buf, tp->t_lib->tlb_id, TCP_LOG_ID_LEN);
 		KASSERT(len < TCP_LOG_ID_LEN,
 		    ("%s:%d: tp->t_lib->tlb_id too long (%zu)",
 		    __func__, __LINE__, len));
 	} else {
 		*buf = '\0';
 		len = 0;
 	}
 	return (len);
 }
 
 /*
  * Get the tag associated with the TCPCB's log ID.
  * Called with INPCB locked. Returns with it unlocked.
  * 'buf' must point to a buffer that is at least TCP_LOG_TAG_LEN bytes long.
  * Returns number of bytes copied.
  */
 size_t
 tcp_log_get_tag(struct tcpcb *tp, char *buf)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct tcp_log_id_bucket *tlb;
 	size_t len;
 	int tree_locked;
 
 	INP_WLOCK_ASSERT(inp);
 
 	tree_locked = TREE_UNLOCKED;
 	tlb = tp->t_lib;
 
 	if (tlb != NULL) {
 		TCPID_BUCKET_REF(tlb);
 		INP_WUNLOCK(inp);
 		TCPID_BUCKET_LOCK(tlb);
 		len = strlcpy(buf, tlb->tlb_tag, TCP_LOG_TAG_LEN);
 		KASSERT(len < TCP_LOG_TAG_LEN,
 		    ("%s:%d: tp->t_lib->tlb_tag too long (%zu)",
 		    __func__, __LINE__, len));
 		if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL))
 			TCPID_BUCKET_UNLOCK(tlb);
 
 		if (tree_locked == TREE_WLOCKED) {
 			TCPID_TREE_WLOCK_ASSERT();
 			TCPID_TREE_WUNLOCK();
 		} else if (tree_locked == TREE_RLOCKED) {
 			TCPID_TREE_RLOCK_ASSERT();
 			TCPID_TREE_RUNLOCK();
 		} else
 			TCPID_TREE_UNLOCK_ASSERT();
 	} else {
 		INP_WUNLOCK(inp);
 		*buf = '\0';
 		len = 0;
 	}
 
 	return (len);
 }
 
 /*
  * Get number of connections with the same log ID.
  * Log ID is taken from given TCPCB.
  * Called with INPCB locked.
  */
 u_int
 tcp_log_get_id_cnt(struct tcpcb *tp)
 {
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	return ((tp->t_lib == NULL) ? 0 : tp->t_lib->tlb_refcnt);
 }
 
 #ifdef TCPLOG_DEBUG_RINGBUF
 /*
  * Functions/macros to increment/decrement reference count for a log
  * entry. This should catch when we do a double-free/double-remove or
  * a double-add.
  */
 static inline void
 _tcp_log_entry_refcnt_add(struct tcp_log_mem *log_entry, const char *func,
     int line)
 {
 	int refcnt;
 
 	refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, 1);
 	if (refcnt != 0)
 		panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 0)",
 		    func, line, log_entry, refcnt);
 }
 #define	tcp_log_entry_refcnt_add(l)	\
     _tcp_log_entry_refcnt_add((l), __func__, __LINE__)
 
 static inline void
 _tcp_log_entry_refcnt_rem(struct tcp_log_mem *log_entry, const char *func,
     int line)
 {
 	int refcnt;
 
 	refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, -1);
 	if (refcnt != 1)
 		panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 1)",
 		    func, line, log_entry, refcnt);
 }
 #define	tcp_log_entry_refcnt_rem(l)	\
     _tcp_log_entry_refcnt_rem((l), __func__, __LINE__)
 
 #else /* !TCPLOG_DEBUG_RINGBUF */
 
 #define	tcp_log_entry_refcnt_add(l)
 #define	tcp_log_entry_refcnt_rem(l)
 
 #endif
 
 /*
  * Cleanup after removing a log entry, but only decrement the count if we
  * are running INVARIANTS.
  */
 static inline void
 tcp_log_free_log_common(struct tcp_log_mem *log_entry, int *count __unused)
 {
 
 	uma_zfree(tcp_log_zone, log_entry);
 #ifdef INVARIANTS
 	(*count)--;
 	KASSERT(*count >= 0,
 	    ("%s: count unexpectedly negative", __func__));
 #endif
 }
 
 static void
 tcp_log_free_entries(struct tcp_log_stailq *head, int *count)
 {
 	struct tcp_log_mem *log_entry;
 
 	/* Free the entries. */
 	while ((log_entry = STAILQ_FIRST(head)) != NULL) {
 		STAILQ_REMOVE_HEAD(head, tlm_queue);
 		tcp_log_entry_refcnt_rem(log_entry);
 		tcp_log_free_log_common(log_entry, count);
 	}
 }
 
 /* Cleanup after removing a log entry. */
 static inline void
 tcp_log_remove_log_cleanup(struct tcpcb *tp, struct tcp_log_mem *log_entry)
 {
 	uma_zfree(tcp_log_zone, log_entry);
 	tp->t_lognum--;
 	KASSERT(tp->t_lognum >= 0,
 	    ("%s: tp->t_lognum unexpectedly negative", __func__));
 }
 
 /* Remove a log entry from the head of a list. */
 static inline void
 tcp_log_remove_log_head(struct tcpcb *tp, struct tcp_log_mem *log_entry)
 {
 
 	KASSERT(log_entry == STAILQ_FIRST(&tp->t_logs),
 	    ("%s: attempt to remove non-HEAD log entry", __func__));
 	STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue);
 	tcp_log_entry_refcnt_rem(log_entry);
 	tcp_log_remove_log_cleanup(tp, log_entry);
 }
 
 #ifdef TCPLOG_DEBUG_RINGBUF
 /*
  * Initialize the log entry's reference count, which we want to
  * survive allocations.
  */
 static int
 tcp_log_zone_init(void *mem, int size, int flags __unused)
 {
 	struct tcp_log_mem *tlm;
 
 	KASSERT(size >= sizeof(struct tcp_log_mem),
 	    ("%s: unexpectedly short (%d) allocation", __func__, size));
 	tlm = (struct tcp_log_mem *)mem;
 	tlm->tlm_refcnt = 0;
 	return (0);
 }
 
 /*
  * Double check that the refcnt is zero on allocation and return.
  */
 static int
 tcp_log_zone_ctor(void *mem, int size, void *args __unused, int flags __unused)
 {
 	struct tcp_log_mem *tlm;
 
 	KASSERT(size >= sizeof(struct tcp_log_mem),
 	    ("%s: unexpectedly short (%d) allocation", __func__, size));
 	tlm = (struct tcp_log_mem *)mem;
 	if (tlm->tlm_refcnt != 0)
 		panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)",
 		    __func__, __LINE__, tlm, tlm->tlm_refcnt);
 	return (0);
 }
 
 static void
 tcp_log_zone_dtor(void *mem, int size, void *args __unused)
 {
 	struct tcp_log_mem *tlm;
 
 	KASSERT(size >= sizeof(struct tcp_log_mem),
 	    ("%s: unexpectedly short (%d) allocation", __func__, size));
 	tlm = (struct tcp_log_mem *)mem;
 	if (tlm->tlm_refcnt != 0)
 		panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)",
 		    __func__, __LINE__, tlm, tlm->tlm_refcnt);
 }
 #endif /* TCPLOG_DEBUG_RINGBUF */
 
 /* Do global initialization. */
 void
 tcp_log_init(void)
 {
 
 	tcp_log_zone = uma_zcreate("tcp_log", sizeof(struct tcp_log_mem),
 #ifdef TCPLOG_DEBUG_RINGBUF
 	    tcp_log_zone_ctor, tcp_log_zone_dtor, tcp_log_zone_init,
 #else
 	    NULL, NULL, NULL,
 #endif
 	    NULL, UMA_ALIGN_PTR, 0);
 	(void)uma_zone_set_max(tcp_log_zone, TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT);
 	tcp_log_id_bucket_zone = uma_zcreate("tcp_log_id_bucket",
 	    sizeof(struct tcp_log_id_bucket), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	tcp_log_id_node_zone = uma_zcreate("tcp_log_id_node",
 	    sizeof(struct tcp_log_id_node), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 #ifdef TCPLOG_DEBUG_COUNTERS
 	tcp_log_queued = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail1 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail2 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail3 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail4 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_fail5 = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_copyout = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_read = counter_u64_alloc(M_WAITOK);
 	tcp_log_que_freed = counter_u64_alloc(M_WAITOK);
 #endif
 	tcp_log_pcb_ids_cur = counter_u64_alloc(M_WAITOK);
 	tcp_log_pcb_ids_tot = counter_u64_alloc(M_WAITOK);
 
 	rw_init_flags(&tcp_id_tree_lock, "TCP ID tree", RW_NEW);
 	mtx_init(&tcp_log_expireq_mtx, "TCP log expireq", NULL, MTX_DEF);
 	callout_init(&tcp_log_expireq_callout, 1);
 }
 
 /* Do per-TCPCB initialization. */
 void
 tcp_log_tcpcbinit(struct tcpcb *tp)
 {
 
 	/* A new TCPCB should start out zero-initialized. */
 	STAILQ_INIT(&tp->t_logs);
 
 	/*
 	 * If we are doing auto-capturing, figure out whether we will capture
 	 * this session.
 	 */
 	tp->t_loglimit = tcp_log_session_limit;
 	if ((tcp_log_auto_all == true) &&
 	    tcp_log_auto_mode &&
 	    tcp_log_selectauto()) {
 		tp->_t_logstate = tcp_log_auto_mode;
 		tp->t_flags2 |= TF2_LOG_AUTO;
 	}
 }
 
 /* Remove entries */
 static void
 tcp_log_expire(void *unused __unused)
 {
 	struct tcp_log_id_bucket *tlb;
 	struct tcp_log_id_node *tln;
 	sbintime_t expiry_limit;
 	int tree_locked;
 
 	TCPLOG_EXPIREQ_LOCK();
 	if (callout_pending(&tcp_log_expireq_callout)) {
 		/* Callout was reset. */
 		TCPLOG_EXPIREQ_UNLOCK();
 		return;
 	}
 
 	/*
 	 * Process entries until we reach one that expires too far in the
 	 * future. Look one second in the future.
 	 */
 	expiry_limit = getsbinuptime() + SBT_1S;
 	tree_locked = TREE_UNLOCKED;
 
 	while ((tln = STAILQ_FIRST(&tcp_log_expireq_head)) != NULL &&
 	    tln->tln_expiretime <= expiry_limit) {
 		if (!callout_active(&tcp_log_expireq_callout)) {
 			/*
 			 * Callout was stopped. I guess we should
 			 * just quit at this point.
 			 */
 			TCPLOG_EXPIREQ_UNLOCK();
 			return;
 		}
 
 		/*
 		 * Remove the node from the head of the list and unlock
 		 * the list. Change the expiry time to SBT_MAX as a signal
 		 * to other threads that we now own this.
 		 */
 		STAILQ_REMOVE_HEAD(&tcp_log_expireq_head, tln_expireq);
 		tln->tln_expiretime = SBT_MAX;
 		TCPLOG_EXPIREQ_UNLOCK();
 
 		/*
 		 * Remove the node from the bucket.
 		 */
 		tlb = tln->tln_bucket;
 		TCPID_BUCKET_LOCK(tlb);
 		if (tcp_log_remove_id_node(NULL, NULL, tlb, tln, &tree_locked)) {
 			tcp_log_id_validate_tree_lock(tree_locked);
 			if (tree_locked == TREE_WLOCKED)
 				TCPID_TREE_WUNLOCK();
 			else
 				TCPID_TREE_RUNLOCK();
 			tree_locked = TREE_UNLOCKED;
 		}
 
 		/* Drop the INP reference. */
 		INP_WLOCK(tln->tln_inp);
 		if (!in_pcbrele_wlocked(tln->tln_inp))
 			INP_WUNLOCK(tln->tln_inp);
 
 		/* Free the log records. */
 		tcp_log_free_entries(&tln->tln_entries, &tln->tln_count);
 
 		/* Free the node. */
 		uma_zfree(tcp_log_id_node_zone, tln);
 
 		/* Relock the expiry queue. */
 		TCPLOG_EXPIREQ_LOCK();
 	}
 
 	/*
 	 * We've expired all the entries we can. Do we need to reschedule
 	 * ourselves?
 	 */
 	callout_deactivate(&tcp_log_expireq_callout);
 	if (tln != NULL) {
 		/*
 		 * Get max(now + TCP_LOG_EXPIRE_INTVL, tln->tln_expiretime) and
 		 * set the next callout to that. (This helps ensure we generally
 		 * run the callout no more often than desired.)
 		 */
 		expiry_limit = getsbinuptime() + TCP_LOG_EXPIRE_INTVL;
 		if (expiry_limit < tln->tln_expiretime)
 			expiry_limit = tln->tln_expiretime;
 		callout_reset_sbt(&tcp_log_expireq_callout, expiry_limit,
 		    SBT_1S, tcp_log_expire, NULL, C_ABSOLUTE);
 	}
 
 	/* We're done. */
 	TCPLOG_EXPIREQ_UNLOCK();
 	return;
 }
 
 /*
  * Move log data from the TCPCB to a new node. This will reset the TCPCB log
  * entries and log count; however, it will not touch other things from the
  * TCPCB (e.g. t_lin, t_lib).
  *
  * NOTE: Must hold a lock on the INP.
  */
 static void
 tcp_log_move_tp_to_node(struct tcpcb *tp, struct tcp_log_id_node *tln)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 
 	INP_WLOCK_ASSERT(inp);
 
 	tln->tln_ie = inp->inp_inc.inc_ie;
 	if (inp->inp_inc.inc_flags & INC_ISIPV6)
 		tln->tln_af = AF_INET6;
 	else
 		tln->tln_af = AF_INET;
 	tln->tln_entries = tp->t_logs;
 	tln->tln_count = tp->t_lognum;
 	tln->tln_bucket = tp->t_lib;
 
 	/* Clear information from the PCB. */
 	STAILQ_INIT(&tp->t_logs);
 	tp->t_lognum = 0;
 }
 
 /* Do per-TCPCB cleanup */
 void
 tcp_log_tcpcbfini(struct tcpcb *tp)
 {
 	struct tcp_log_id_node *tln, *tln_first;
 	struct tcp_log_mem *log_entry;
 	sbintime_t callouttime;
 
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	if (tp->_t_logstate) {
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 #ifdef TCP_ACCOUNTING
 		struct tcp_log_buffer *lgb;
 		int i;
 
 		memset(&log, 0, sizeof(log));
 		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			for (i = 0; i < TCP_NUM_CNT_COUNTERS; i++) {
 				log.u_raw.u64_flex[i] = tp->tcp_cnt_counters[i];
 			}
 			lgb = tcp_log_event(tp, NULL,
 				  NULL,
 				  NULL,
 				  TCP_LOG_ACCOUNTING, 0,
 				  0, &log, false, NULL, NULL, 0, &tv);
 			if (lgb != NULL) {
 				lgb->tlb_flex1 = TCP_NUM_CNT_COUNTERS;
 				lgb->tlb_flex2 = 1;
 			} else
 				goto skip_out;
 			for (i = 0; i<TCP_NUM_CNT_COUNTERS; i++) {
 				log.u_raw.u64_flex[i] = tp->tcp_proc_time[i];
 			}
 			lgb = tcp_log_event(tp, NULL,
 				 NULL,
 				 NULL,
 				 TCP_LOG_ACCOUNTING, 0,
 				 0, &log, false, NULL, NULL, 0, &tv);
 			if (lgb != NULL) {
 				lgb->tlb_flex1 = TCP_NUM_CNT_COUNTERS;
 				lgb->tlb_flex2 = 2;
 			}
 		}
 skip_out:
 #endif
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.cur_del_rate = tp->t_end_info;
 		(void)tcp_log_event(tp, NULL,
 	                 NULL,
 			 NULL,
 		         TCP_LOG_CONNEND, 0,
 		         0, &log, false, NULL, NULL, 0,  &tv);
 	}
 	/*
 	 * If we were gathering packets to be automatically dumped, try to do
 	 * it now. If this succeeds, the log information in the TCPCB will be
 	 * cleared. Otherwise, we'll handle the log information as we do
 	 * for other states.
 	 */
 	switch(tp->_t_logstate) {
 	case TCP_LOG_STATE_HEAD_AUTO:
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head",
 		    M_NOWAIT, false);
 		break;
 	case TCP_LOG_STATE_TAIL_AUTO:
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail",
 		    M_NOWAIT, false);
 		break;
 	case TCP_LOG_VIA_BBPOINTS:
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints",
 		    M_NOWAIT, false);
 		break;
 	case TCP_LOG_STATE_CONTINUAL:
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual",
 		    M_NOWAIT, false);
 		break;
 	}
 
 	/*
 	 * There are two ways we could keep logs: per-socket or per-ID. If
 	 * we are tracking logs with an ID, then the logs survive the
 	 * destruction of the TCPCB.
 	 *
 	 * If the TCPCB is associated with an ID node, move the logs from the
 	 * TCPCB to the ID node. In theory, this is safe, for reasons which I
 	 * will now explain for my own benefit when I next need to figure out
 	 * this code. :-)
 	 *
 	 * We own the INP lock. Therefore, no one else can change the contents
 	 * of this node (Rule C). Further, no one can remove this node from
 	 * the bucket while we hold the lock (Rule D). Basically, no one can
 	 * mess with this node. That leaves two states in which we could be:
 	 *
 	 * 1. Another thread is currently waiting to acquire the INP lock, with
 	 *    plans to do something with this node. When we drop the INP lock,
 	 *    they will have a chance to do that. They will recheck the
 	 *    tln_closed field (see note to Rule C) and then acquire the
 	 *    bucket lock before proceeding further.
 	 *
 	 * 2. Another thread will try to acquire a lock at some point in the
 	 *    future. If they try to acquire a lock before we set the
 	 *    tln_closed field, they will follow state #1. If they try to
 	 *    acquire a lock after we set the tln_closed field, they will be
 	 *    able to make changes to the node, at will, following Rule C.
 	 *
 	 * Therefore, we currently own this node and can make any changes
 	 * we want. But, as soon as we set the tln_closed field to true, we
 	 * have effectively dropped our lock on the node. (For this reason, we
 	 * also need to make sure our writes are ordered correctly. An atomic
 	 * operation with "release" semantics should be sufficient.)
 	 */
 
 	if (tp->t_lin != NULL) {
 		struct inpcb *inp = tptoinpcb(tp);
 
 		/* Copy the relevant information to the log entry. */
 		tln = tp->t_lin;
 		KASSERT(tln->tln_inp == inp,
 		    ("%s: Mismatched inp (tln->tln_inp=%p, tp inpcb=%p)",
 		    __func__, tln->tln_inp, inp));
 		tcp_log_move_tp_to_node(tp, tln);
 
 		/* Clear information from the PCB. */
 		tp->t_lin = NULL;
 		tp->t_lib = NULL;
 
 		/*
 		 * Take a reference on the INP. This ensures that the INP
 		 * remains valid while the node is on the expiry queue. This
 		 * ensures the INP is valid for other threads that may be
 		 * racing to lock this node when we move it to the expire
 		 * queue.
 		 */
 		in_pcbref(inp);
 
 		/*
 		 * Store the entry on the expiry list. The exact behavior
 		 * depends on whether we have entries to keep. If so, we
 		 * put the entry at the tail of the list and expire in
 		 * TCP_LOG_EXPIRE_TIME. Otherwise, we expire "now" and put
 		 * the entry at the head of the list. (Handling the cleanup
 		 * via the expiry timer lets us avoid locking messy-ness here.)
 		 */
 		tln->tln_expiretime = getsbinuptime();
 		TCPLOG_EXPIREQ_LOCK();
 		if (tln->tln_count) {
 			tln->tln_expiretime += TCP_LOG_EXPIRE_TIME;
 			if (STAILQ_EMPTY(&tcp_log_expireq_head) &&
 			    !callout_active(&tcp_log_expireq_callout)) {
 				/*
 				 * We are adding the first entry and a callout
 				 * is not currently scheduled; therefore, we
 				 * need to schedule one.
 				 */
 				callout_reset_sbt(&tcp_log_expireq_callout,
 				    tln->tln_expiretime, SBT_1S, tcp_log_expire,
 				    NULL, C_ABSOLUTE);
 			}
 			STAILQ_INSERT_TAIL(&tcp_log_expireq_head, tln,
 			    tln_expireq);
 		} else {
 			callouttime = tln->tln_expiretime +
 			    TCP_LOG_EXPIRE_INTVL;
 			tln_first = STAILQ_FIRST(&tcp_log_expireq_head);
 
 			if ((tln_first == NULL ||
 			    callouttime < tln_first->tln_expiretime) &&
 			    (callout_pending(&tcp_log_expireq_callout) ||
 			    !callout_active(&tcp_log_expireq_callout))) {
 				/*
 				 * The list is empty, or we want to run the
 				 * expire code before the first entry's timer
 				 * fires. Also, we are in a case where a callout
 				 * is not actively running. We want to reset
 				 * the callout to occur sooner.
 				 */
 				callout_reset_sbt(&tcp_log_expireq_callout,
 				    callouttime, SBT_1S, tcp_log_expire, NULL,
 				    C_ABSOLUTE);
 			}
 
 			/*
 			 * Insert to the head, or just after the head, as
 			 * appropriate. (This might result in small
 			 * mis-orderings as a bunch of "expire now" entries
 			 * gather at the start of the list, but that should
 			 * not produce big problems, since the expire timer
 			 * will walk through all of them.)
 			 */
 			if (tln_first == NULL ||
 			    tln->tln_expiretime < tln_first->tln_expiretime)
 				STAILQ_INSERT_HEAD(&tcp_log_expireq_head, tln,
 				    tln_expireq);
 			else
 				STAILQ_INSERT_AFTER(&tcp_log_expireq_head,
 				    tln_first, tln, tln_expireq);
 		}
 		TCPLOG_EXPIREQ_UNLOCK();
 
 		/*
 		 * We are done messing with the tln. After this point, we
 		 * can't touch it. (Note that the "release" semantics should
 		 * be included with the TCPLOG_EXPIREQ_UNLOCK() call above.
 		 * Therefore, they should be unnecessary here. However, it
 		 * seems like a good idea to include them anyway, since we
 		 * really are releasing a lock here.)
 		 */
 		atomic_store_rel_int(&tln->tln_closed, 1);
 	} else {
 		/* Remove log entries. */
 		while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL)
 			tcp_log_remove_log_head(tp, log_entry);
 		KASSERT(tp->t_lognum == 0,
 		    ("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
 			__func__, tp->t_lognum));
 	}
 
 	/*
 	 * Change the log state to off (just in case anything tries to sneak
 	 * in a last-minute log).
 	 */
 	tp->_t_logstate = TCP_LOG_STATE_OFF;
 }
 
 static void
 tcp_log_purge_tp_logbuf(struct tcpcb *tp)
 {
 	struct tcp_log_mem *log_entry;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	if (tp->t_lognum == 0)
 		return;
 
 	while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL)
 		tcp_log_remove_log_head(tp, log_entry);
 	KASSERT(tp->t_lognum == 0,
 		("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
 		 __func__, tp->t_lognum));
 	tp->_t_logstate = TCP_LOG_STATE_OFF;
 }
 
 /*
  * This logs an event for a TCP socket. Normally, this is called via
  * TCP_LOG_EVENT or TCP_LOG_EVENT_VERBOSE. See the documentation for
  * TCP_LOG_EVENT().
  */
 
 struct tcp_log_buffer *
 tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
     union tcp_log_stackspecific *stackinfo, int th_hostorder,
     const char *output_caller, const char *func, int line, const struct timeval *itv)
 {
 	struct tcp_log_mem *log_entry;
 	struct tcp_log_buffer *log_buf;
 	int attempt_count = 0;
 	struct tcp_log_verbose *log_verbose;
 	uint32_t logsn;
 
 	KASSERT((func == NULL && line == 0) || (func != NULL && line > 0),
 	    ("%s called with inconsistent func (%p) and line (%d) arguments",
 		__func__, func, line));
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	if (tcp_disable_all_bb_logs) {
 		/*
 		 * The global shutdown logging
 		 * switch has been thrown. Call
 		 * the purge function that frees
 		 * purges out the logs and
 		 * turns off logging.
 		 */
 		tcp_log_purge_tp_logbuf(tp);
 		return (NULL);
 	}
 	KASSERT(tp->_t_logstate == TCP_LOG_STATE_HEAD ||
 	    tp->_t_logstate == TCP_LOG_STATE_TAIL ||
 	    tp->_t_logstate == TCP_LOG_STATE_CONTINUAL ||
 	    tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO ||
 	    tp->_t_logstate == TCP_LOG_VIA_BBPOINTS ||
 	    tp->_t_logstate == TCP_LOG_STATE_TAIL_AUTO,
 	    ("%s called with unexpected tp->_t_logstate (%d)", __func__,
 		tp->_t_logstate));
 
 	/*
 	 * Get the serial number. We do this early so it will
 	 * increment even if we end up skipping the log entry for some
 	 * reason.
 	 */
 	logsn = tp->t_logsn++;
 
 	/*
 	 * Can we get a new log entry? If so, increment the lognum counter
 	 * here.
 	 */
 retry:
 	if (tp->t_lognum < tp->t_loglimit) {
 		if ((log_entry = uma_zalloc(tcp_log_zone, M_NOWAIT)) != NULL)
 			tp->t_lognum++;
 	} else
 		log_entry = NULL;
 
 	/* Do we need to try to reuse? */
 	if (log_entry == NULL) {
 		/*
 		 * Sacrifice auto-logged sessions without a log ID if
 		 * tcp_log_auto_all is false. (If they don't have a log
 		 * ID by now, it is probable that either they won't get one
 		 * or we are resource-constrained.)
 		 */
 		if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) &&
 		    !tcp_log_auto_all) {
 			if (tcp_log_state_change(tp, TCP_LOG_STATE_CLEAR)) {
 #ifdef INVARIANTS
 				panic("%s:%d: tcp_log_state_change() failed "
 				    "to set tp %p to TCP_LOG_STATE_CLEAR",
 				    __func__, __LINE__, tp);
 #endif
 				tp->_t_logstate = TCP_LOG_STATE_OFF;
 			}
 			return (NULL);
 		}
 		/*
 		 * If we are in TCP_LOG_STATE_HEAD_AUTO state, try to dump
 		 * the buffers. If successful, deactivate tracing. Otherwise,
 		 * leave it active so we will retry.
 		 */
 		if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO &&
 		    !tcp_log_dump_tp_logbuf(tp, "auto-dumped from head",
 		    M_NOWAIT, false)) {
 			tp->_t_logstate = TCP_LOG_STATE_OFF;
 			return(NULL);
 		} else if ((tp->_t_logstate == TCP_LOG_STATE_CONTINUAL) &&
 		    !tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual",
 		    M_NOWAIT, false)) {
 			if (attempt_count == 0) {
 				attempt_count++;
 				goto retry;
 			}
 #ifdef TCPLOG_DEBUG_COUNTERS
 			counter_u64_add(tcp_log_que_fail4, 1);
 #endif
 			return(NULL);
 
 		} else if ((tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) &&
 		    !tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints",
 		    M_NOWAIT, false)) {
 			if (attempt_count == 0) {
 				attempt_count++;
 				goto retry;
 			}
 #ifdef TCPLOG_DEBUG_COUNTERS
 			counter_u64_add(tcp_log_que_fail4, 1);
 #endif
 			return(NULL);
 		} else if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO)
 			return(NULL);
 
 		/* If in HEAD state, just deactivate the tracing and return. */
 		if (tp->_t_logstate == TCP_LOG_STATE_HEAD) {
 			tp->_t_logstate = TCP_LOG_STATE_OFF;
 			return(NULL);
 		}
 		/*
 		 * Get a buffer to reuse. If that fails, just give up.
 		 * (We can't log anything without a buffer in which to
 		 * put it.)
 		 *
 		 * Note that we don't change the t_lognum counter
 		 * here. Because we are re-using the buffer, the total
 		 * number won't change.
 		 */
 		if ((log_entry = STAILQ_FIRST(&tp->t_logs)) == NULL)
 			return(NULL);
 		STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue);
 		tcp_log_entry_refcnt_rem(log_entry);
 	}
 
 	KASSERT(log_entry != NULL,
 	    ("%s: log_entry unexpectedly NULL", __func__));
 
 	/* Extract the log buffer and verbose buffer pointers. */
 	log_buf = &log_entry->tlm_buf;
 	log_verbose = &log_entry->tlm_v;
 
 	/* Basic entries. */
 	if (itv == NULL)
 		microuptime(&log_buf->tlb_tv);
 	else
 		memcpy(&log_buf->tlb_tv, itv, sizeof(struct timeval));
 	log_buf->tlb_ticks = ticks;
 	log_buf->tlb_sn = logsn;
 	log_buf->tlb_stackid = tp->t_fb->tfb_id;
 	log_buf->tlb_eventid = eventid;
 	log_buf->tlb_eventflags = 0;
 	log_buf->tlb_errno = errornum;
 
 	/* Socket buffers */
 	if (rxbuf != NULL) {
 		log_buf->tlb_eventflags |= TLB_FLAG_RXBUF;
 		log_buf->tlb_rxbuf.tls_sb_acc = rxbuf->sb_acc;
 		log_buf->tlb_rxbuf.tls_sb_ccc = rxbuf->sb_ccc;
 		log_buf->tlb_rxbuf.tls_sb_spare = 0;
 	} else {
 		log_buf->tlb_rxbuf.tls_sb_acc = 0;
 		log_buf->tlb_rxbuf.tls_sb_ccc = 0;
 	}
 	if (txbuf != NULL) {
 		log_buf->tlb_eventflags |= TLB_FLAG_TXBUF;
 		log_buf->tlb_txbuf.tls_sb_acc = txbuf->sb_acc;
 		log_buf->tlb_txbuf.tls_sb_ccc = txbuf->sb_ccc;
 		log_buf->tlb_txbuf.tls_sb_spare = 0;
 	} else {
 		log_buf->tlb_txbuf.tls_sb_acc = 0;
 		log_buf->tlb_txbuf.tls_sb_ccc = 0;
 	}
 	/* Copy values from tp to the log entry. */
 	log_buf->tlb_state = tp->t_state;
 	log_buf->tlb_starttime = tp->t_starttime;
 	log_buf->tlb_iss = tp->iss;
 	log_buf->tlb_flags = tp->t_flags;
 	log_buf->tlb_snd_una = tp->snd_una;
 	log_buf->tlb_snd_max = tp->snd_max;
 	log_buf->tlb_snd_cwnd = tp->snd_cwnd;
 	log_buf->tlb_snd_nxt = tp->snd_nxt;
 	log_buf->tlb_snd_recover = tp->snd_recover;
 	log_buf->tlb_snd_wnd = tp->snd_wnd;
 	log_buf->tlb_snd_ssthresh = tp->snd_ssthresh;
 	log_buf->tlb_srtt = tp->t_srtt;
 	log_buf->tlb_rttvar = tp->t_rttvar;
 	log_buf->tlb_rcv_up = tp->rcv_up;
 	log_buf->tlb_rcv_adv = tp->rcv_adv;
 	log_buf->tlb_flags2 = tp->t_flags2;
 	log_buf->tlb_rcv_nxt = tp->rcv_nxt;
 	log_buf->tlb_rcv_wnd = tp->rcv_wnd;
 	log_buf->tlb_dupacks = tp->t_dupacks;
 	log_buf->tlb_segqlen = tp->t_segqlen;
 	log_buf->tlb_snd_numholes = tp->snd_numholes;
 	log_buf->tlb_flex1 = 0;
 	log_buf->tlb_flex2 = 0;
 	log_buf->tlb_fbyte_in = tp->t_fbyte_in;
 	log_buf->tlb_fbyte_out = tp->t_fbyte_out;
 	log_buf->tlb_snd_scale = tp->snd_scale;
 	log_buf->tlb_rcv_scale = tp->rcv_scale;
 	log_buf->_pad[0] = 0;
 	log_buf->_pad[1] = 0;
 	log_buf->_pad[2] = 0;
 	/* Copy stack-specific info. */
 	if (stackinfo != NULL) {
 		memcpy(&log_buf->tlb_stackinfo, stackinfo,
 		    sizeof(log_buf->tlb_stackinfo));
 		log_buf->tlb_eventflags |= TLB_FLAG_STACKINFO;
 	}
 
 	/* The packet */
 	log_buf->tlb_len = len;
 	if (th) {
 		int optlen;
 
 		log_buf->tlb_eventflags |= TLB_FLAG_HDR;
 		log_buf->tlb_th = *th;
 		if (th_hostorder)
 			tcp_fields_to_net(&log_buf->tlb_th);
 		optlen = (th->th_off << 2) - sizeof (struct tcphdr);
 		if (optlen > 0)
 			memcpy(log_buf->tlb_opts, th + 1, optlen);
 	} else {
 		memset(&log_buf->tlb_th, 0, sizeof(*th));
 	}
 
 	/* Verbose information */
 	if (func != NULL) {
 		log_buf->tlb_eventflags |= TLB_FLAG_VERBOSE;
 		if (output_caller != NULL)
 			strlcpy(log_verbose->tlv_snd_frm, output_caller,
 			    TCP_FUNC_LEN);
 		else
 			*log_verbose->tlv_snd_frm = 0;
 		strlcpy(log_verbose->tlv_trace_func, func, TCP_FUNC_LEN);
 		log_verbose->tlv_trace_line = line;
 	}
 
 	/* Insert the new log at the tail. */
 	STAILQ_INSERT_TAIL(&tp->t_logs, log_entry, tlm_queue);
 	tcp_log_entry_refcnt_add(log_entry);
 	return (log_buf);
 }
 
 /*
  * Change the logging state for a TCPCB. Returns 0 on success or an
  * error code on failure.
  */
 int
 tcp_log_state_change(struct tcpcb *tp, int state)
 {
 	struct tcp_log_mem *log_entry;
 	int rv;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	rv = 0;
 	switch(state) {
 	case TCP_LOG_STATE_CLEAR:
 		while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL)
 			tcp_log_remove_log_head(tp, log_entry);
 		/* FALLTHROUGH */
 
 	case TCP_LOG_STATE_OFF:
 		tp->_t_logstate = TCP_LOG_STATE_OFF;
 		break;
 
 	case TCP_LOG_STATE_TAIL:
 	case TCP_LOG_STATE_HEAD:
 	case TCP_LOG_STATE_CONTINUAL:
 	case TCP_LOG_VIA_BBPOINTS:
 	case TCP_LOG_STATE_HEAD_AUTO:
 	case TCP_LOG_STATE_TAIL_AUTO:
 		/*
 		 * When the RATIO_OFF state is set for the bucket, the log ID
 		 * this tp is associated with has been probabilistically opted
 		 * out of logging per tcp_log_apply_ratio().
 		 */
 		if (tp->t_lib == NULL ||
 		    tp->t_lib->tlb_logstate != TCP_LOG_STATE_RATIO_OFF) {
 			tp->_t_logstate = state;
 		} else {
 			rv = ECANCELED;
 			tp->_t_logstate = TCP_LOG_STATE_OFF;
 		}
 		break;
 
 	default:
 		return (EINVAL);
 	}
 	if (tcp_disable_all_bb_logs) {
 		/* We are prohibited from doing any logs */
 		tp->_t_logstate = TCP_LOG_STATE_OFF;
 		rv = EBUSY;
 	}
 	tp->t_flags2 &= ~(TF2_LOG_AUTO);
 
 	return (rv);
 }
 
 /* If tcp_drain() is called, flush half the log entries. */
 void
 tcp_log_drain(struct tcpcb *tp)
 {
 	struct tcp_log_mem *log_entry, *next;
 	int target, skip;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	if ((target = tp->t_lognum / 2) == 0)
 		return;
 
 	/*
 	 * XXXRRS: At this I don't think this is wise that
 	 * we do this. All that a drain call means is that
 	 * we are hitting one of the system mbuf limits. BB
 	 * logging, or freeing of them, will not create any
 	 * more mbufs and really has nothing to do with
 	 * the system running out of mbufs. For now I
 	 * am changing this to free any "AUTO" by dumping
 	 * them out. But this should either be changed
 	 * so that it gets called when we hit the BB limit
 	 * or it should just not get called (one of the two)
 	 * since I don't think the mbuf <-> BB log cleanup
 	 * is the right thing to do here.
 	 */
 	/*
 	 * If we are logging the "head" packets, we want to discard
 	 * from the tail of the queue. Otherwise, we want to discard
 	 * from the head.
 	 */
 	if (tp->_t_logstate == TCP_LOG_STATE_HEAD) {
 		skip = tp->t_lognum - target;
 		STAILQ_FOREACH(log_entry, &tp->t_logs, tlm_queue)
 			if (!--skip)
 				break;
 		KASSERT(log_entry != NULL,
 		    ("%s: skipped through all entries!", __func__));
 		if (log_entry == NULL)
 			return;
 		while ((next = STAILQ_NEXT(log_entry, tlm_queue)) != NULL) {
 			STAILQ_REMOVE_AFTER(&tp->t_logs, log_entry, tlm_queue);
 			tcp_log_entry_refcnt_rem(next);
 			tcp_log_remove_log_cleanup(tp, next);
 #ifdef INVARIANTS
 			target--;
 #endif
 		}
 		KASSERT(target == 0,
 		    ("%s: After removing from tail, target was %d", __func__,
 			target));
 	} else if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO) {
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head at drain",
 		    M_NOWAIT, false);
 	} else if (tp->_t_logstate == TCP_LOG_STATE_TAIL_AUTO) {
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail at drain",
 		    M_NOWAIT, false);
 	} else if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) {
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints",
 		    M_NOWAIT, false);
 	} else if (tp->_t_logstate == TCP_LOG_STATE_CONTINUAL) {
 		(void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual",
 		    M_NOWAIT, false);
 	} else {
 		while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL &&
 		    target--)
 			tcp_log_remove_log_head(tp, log_entry);
 		KASSERT(target <= 0,
 		    ("%s: After removing from head, target was %d", __func__,
 			target));
 		KASSERT(tp->t_lognum > 0,
 		    ("%s: After removing from head, tp->t_lognum was %d",
 			__func__, target));
 		KASSERT(log_entry != NULL,
 		    ("%s: After removing from head, the tailq was empty",
 			__func__));
 	}
 }
 
 static inline int
 tcp_log_copyout(struct sockopt *sopt, void *src, void *dst, size_t len)
 {
 
 	if (sopt->sopt_td != NULL)
 		return (copyout(src, dst, len));
 	bcopy(src, dst, len);
 	return (0);
 }
 
 static int
 tcp_log_logs_to_buf(struct sockopt *sopt, struct tcp_log_stailq *log_tailqp,
     struct tcp_log_buffer **end, int count)
 {
 	struct tcp_log_buffer *out_entry;
 	struct tcp_log_mem *log_entry;
 	size_t entrysize;
 	int error;
 #ifdef INVARIANTS
 	int orig_count = count;
 #endif
 
 	/* Copy the data out. */
 	error = 0;
 	out_entry = (struct tcp_log_buffer *) sopt->sopt_val;
 	STAILQ_FOREACH(log_entry, log_tailqp, tlm_queue) {
 		count--;
 		KASSERT(count >= 0,
 		    ("%s:%d: Exceeded expected count (%d) processing list %p",
 		    __func__, __LINE__, orig_count, log_tailqp));
 
 #ifdef TCPLOG_DEBUG_COUNTERS
 		counter_u64_add(tcp_log_que_copyout, 1);
 #endif
 
 		/*
 		 * Skip copying out the header if it isn't present.
 		 * Instead, copy out zeros (to ensure we don't leak info).
 		 * TODO: Make sure we truly do zero everything we don't
 		 * explicitly set.
 		 */
 		if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR)
 			entrysize = sizeof(struct tcp_log_buffer);
 		else
 			entrysize = offsetof(struct tcp_log_buffer, tlb_th);
 		error = tcp_log_copyout(sopt, &log_entry->tlm_buf, out_entry,
 		    entrysize);
 		if (error)
 			break;
 		if (!(log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR)) {
 			error = tcp_log_copyout(sopt, zerobuf,
 			    ((uint8_t *)out_entry) + entrysize,
 			    sizeof(struct tcp_log_buffer) - entrysize);
 		}
 
 		/*
 		 * Copy out the verbose bit, if needed. Either way,
 		 * increment the output pointer the correct amount.
 		 */
 		if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_VERBOSE) {
 			error = tcp_log_copyout(sopt, &log_entry->tlm_v,
 			    out_entry->tlb_verbose,
 			    sizeof(struct tcp_log_verbose));
 			if (error)
 				break;
 			out_entry = (struct tcp_log_buffer *)
 			    (((uint8_t *) (out_entry + 1)) +
 			    sizeof(struct tcp_log_verbose));
 		} else
 			out_entry++;
 	}
 	*end = out_entry;
 	KASSERT(error || count == 0,
 	    ("%s:%d: Less than expected count (%d) processing list %p"
 	    " (%d remain)", __func__, __LINE__, orig_count,
 	    log_tailqp, count));
 
 	return (error);
 }
 
 /*
  * Copy out the buffer. Note that we do incremental copying, so
  * sooptcopyout() won't work. However, the goal is to produce the same
  * end result as if we copied in the entire user buffer, updated it,
  * and then used sooptcopyout() to copy it out.
  *
  * NOTE: This should be called with a write lock on the PCB; however,
  * the function will drop it after it extracts the data from the TCPCB.
  */
 int
 tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp)
 {
 	struct tcp_log_stailq log_tailq;
 	struct tcp_log_mem *log_entry, *log_next;
 	struct tcp_log_buffer *out_entry;
 	struct inpcb *inp = tptoinpcb(tp);
 	size_t outsize, entrysize;
 	int error, outnum;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Determine which log entries will fit in the buffer. As an
 	 * optimization, skip this if all the entries will clearly fit
 	 * in the buffer. (However, get an exact size if we are using
 	 * INVARIANTS.)
 	 */
 #ifndef INVARIANTS
 	if (sopt->sopt_valsize / (sizeof(struct tcp_log_buffer) +
 	    sizeof(struct tcp_log_verbose)) >= tp->t_lognum) {
 		log_entry = STAILQ_LAST(&tp->t_logs, tcp_log_mem, tlm_queue);
 		log_next = NULL;
 		outsize = 0;
 		outnum = tp->t_lognum;
 	} else {
 #endif
 		outsize = outnum = 0;
 		log_entry = NULL;
 		STAILQ_FOREACH(log_next, &tp->t_logs, tlm_queue) {
 			entrysize = sizeof(struct tcp_log_buffer);
 			if (log_next->tlm_buf.tlb_eventflags &
 			    TLB_FLAG_VERBOSE)
 				entrysize += sizeof(struct tcp_log_verbose);
 			if ((sopt->sopt_valsize - outsize) < entrysize)
 				break;
 			outsize += entrysize;
 			outnum++;
 			log_entry = log_next;
 		}
 		KASSERT(outsize <= sopt->sopt_valsize,
 		    ("%s: calculated output size (%zu) greater than available"
 			"space (%zu)", __func__, outsize, sopt->sopt_valsize));
 #ifndef INVARIANTS
 	}
 #endif
 
 	/*
 	 * Copy traditional sooptcopyout() behavior: if sopt->sopt_val
 	 * is NULL, silently skip the copy. However, in this case, we
 	 * will leave the list alone and return. Functionally, this
 	 * gives userspace a way to poll for an approximate buffer
 	 * size they will need to get the log entries.
 	 */
 	if (sopt->sopt_val == NULL) {
 		INP_WUNLOCK(inp);
 		if (outsize == 0) {
 			outsize = outnum * (sizeof(struct tcp_log_buffer) +
 			    sizeof(struct tcp_log_verbose));
 		}
 		if (sopt->sopt_valsize > outsize)
 			sopt->sopt_valsize = outsize;
 		return (0);
 	}
 
 	/*
 	 * Break apart the list. We'll save the ones we want to copy
 	 * out locally and remove them from the TCPCB list. We can
 	 * then drop the INPCB lock while we do the copyout.
 	 *
 	 * There are roughly three cases:
 	 * 1. There was nothing to copy out. That's easy: drop the
 	 * lock and return.
 	 * 2. We are copying out the entire list. Again, that's easy:
 	 * move the whole list.
 	 * 3. We are copying out a partial list. That's harder. We
 	 * need to update the list book-keeping entries.
 	 */
 	if (log_entry != NULL && log_next == NULL) {
 		/* Move entire list. */
 		KASSERT(outnum == tp->t_lognum,
 		    ("%s:%d: outnum (%d) should match tp->t_lognum (%d)",
 			__func__, __LINE__, outnum, tp->t_lognum));
 		log_tailq = tp->t_logs;
 		tp->t_lognum = 0;
 		STAILQ_INIT(&tp->t_logs);
 	} else if (log_entry != NULL) {
 		/* Move partial list. */
 		KASSERT(outnum < tp->t_lognum,
 		    ("%s:%d: outnum (%d) not less than tp->t_lognum (%d)",
 			__func__, __LINE__, outnum, tp->t_lognum));
 		STAILQ_FIRST(&log_tailq) = STAILQ_FIRST(&tp->t_logs);
 		STAILQ_FIRST(&tp->t_logs) = STAILQ_NEXT(log_entry, tlm_queue);
 		KASSERT(STAILQ_NEXT(log_entry, tlm_queue) != NULL,
 		    ("%s:%d: tp->t_logs is unexpectedly shorter than expected"
 		    "(tp: %p, log_tailq: %p, outnum: %d, tp->t_lognum: %d)",
 		    __func__, __LINE__, tp, &log_tailq, outnum, tp->t_lognum));
 		STAILQ_NEXT(log_entry, tlm_queue) = NULL;
 		log_tailq.stqh_last = &STAILQ_NEXT(log_entry, tlm_queue);
 		tp->t_lognum -= outnum;
 	} else
 		STAILQ_INIT(&log_tailq);
 
 	/* Drop the PCB lock. */
 	INP_WUNLOCK(inp);
 
 	/* Copy the data out. */
 	error = tcp_log_logs_to_buf(sopt, &log_tailq, &out_entry, outnum);
 
 	if (error) {
 		/* Restore list */
 		INP_WLOCK(inp);
 		if ((inp->inp_flags & INP_DROPPED) == 0) {
 			tp = intotcpcb(inp);
 
 			/* Merge the two lists. */
 			STAILQ_CONCAT(&log_tailq, &tp->t_logs);
 			tp->t_logs = log_tailq;
 			tp->t_lognum += outnum;
 		}
 		INP_WUNLOCK(inp);
 	} else {
 		/* Sanity check entries */
 		KASSERT(((caddr_t)out_entry - (caddr_t)sopt->sopt_val)  ==
 		    outsize, ("%s: Actual output size (%zu) != "
 			"calculated output size (%zu)", __func__,
 			(size_t)((caddr_t)out_entry - (caddr_t)sopt->sopt_val),
 			outsize));
 
 		/* Free the entries we just copied out. */
 		STAILQ_FOREACH_SAFE(log_entry, &log_tailq, tlm_queue, log_next) {
 			tcp_log_entry_refcnt_rem(log_entry);
 			uma_zfree(tcp_log_zone, log_entry);
 		}
 	}
 
 	sopt->sopt_valsize = (size_t)((caddr_t)out_entry -
 	    (caddr_t)sopt->sopt_val);
 	return (error);
 }
 
 static void
 tcp_log_free_queue(struct tcp_log_dev_queue *param)
 {
 	struct tcp_log_dev_log_queue *entry;
 
 	KASSERT(param != NULL, ("%s: called with NULL param", __func__));
 	if (param == NULL)
 		return;
 
 	entry = (struct tcp_log_dev_log_queue *)param;
 
 	/* Free the entries. */
 	tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count);
 
 	/* Free the buffer, if it is allocated. */
 	if (entry->tldl_common.tldq_buf != NULL)
 		free(entry->tldl_common.tldq_buf, M_TCPLOGDEV);
 
 	/* Free the queue entry. */
 	free(entry, M_TCPLOGDEV);
 }
 
 static struct tcp_log_common_header *
 tcp_log_expandlogbuf(struct tcp_log_dev_queue *param)
 {
 	struct tcp_log_dev_log_queue *entry;
 	struct tcp_log_header *hdr;
 	uint8_t *end;
 	struct sockopt sopt;
 	int error;
 
 	entry = (struct tcp_log_dev_log_queue *)param;
 
 	/* Take a worst-case guess at space needs. */
 	sopt.sopt_valsize = sizeof(struct tcp_log_header) +
 	    entry->tldl_count * (sizeof(struct tcp_log_buffer) +
 	    sizeof(struct tcp_log_verbose));
 	hdr = malloc(sopt.sopt_valsize, M_TCPLOGDEV, M_NOWAIT);
 	if (hdr == NULL) {
 #ifdef TCPLOG_DEBUG_COUNTERS
 		counter_u64_add(tcp_log_que_fail5, entry->tldl_count);
 #endif
 		return (NULL);
 	}
 	sopt.sopt_val = hdr + 1;
 	sopt.sopt_valsize -= sizeof(struct tcp_log_header);
 	sopt.sopt_td = NULL;
 
 	error = tcp_log_logs_to_buf(&sopt, &entry->tldl_entries,
 	    (struct tcp_log_buffer **)&end, entry->tldl_count);
 	if (error) {
 		free(hdr, M_TCPLOGDEV);
 		return (NULL);
 	}
 
 	/* Free the entries. */
 	tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count);
 	entry->tldl_count = 0;
 
 	memset(hdr, 0, sizeof(struct tcp_log_header));
 	hdr->tlh_version = TCP_LOG_BUF_VER;
 	hdr->tlh_type = TCP_LOG_DEV_TYPE_BBR;
 	hdr->tlh_length = end - (uint8_t *)hdr;
 	hdr->tlh_ie = entry->tldl_ie;
 	hdr->tlh_af = entry->tldl_af;
 	getboottime(&hdr->tlh_offset);
 	strlcpy(hdr->tlh_id, entry->tldl_id, TCP_LOG_ID_LEN);
 	strlcpy(hdr->tlh_tag, entry->tldl_tag, TCP_LOG_TAG_LEN);
 	strlcpy(hdr->tlh_reason, entry->tldl_reason, TCP_LOG_REASON_LEN);
 	return ((struct tcp_log_common_header *)hdr);
 }
 
 /*
  * Queue the tcpcb's log buffer for transmission via the log buffer facility.
  *
  * NOTE: This should be called with a write lock on the PCB.
  *
  * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop
  * and reacquire the INP lock if it needs to do so.
  *
  * If force is false, this will only dump auto-logged sessions if
  * tcp_log_auto_all is true or if there is a log ID defined for the session.
  */
 int
 tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force)
 {
 	struct tcp_log_dev_log_queue *entry;
 	struct inpcb *inp = tptoinpcb(tp);
 #ifdef TCPLOG_DEBUG_COUNTERS
 	int num_entries;
 #endif
 
 	INP_WLOCK_ASSERT(inp);
 
 	/* If there are no log entries, there is nothing to do. */
 	if (tp->t_lognum == 0)
 		return (0);
 
 	/* Check for a log ID. */
 	if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) &&
 	    !tcp_log_auto_all && !force) {
 		struct tcp_log_mem *log_entry;
 
 		/*
 		 * We needed a log ID and none was found. Free the log entries
 		 * and return success. Also, cancel further logging. If the
 		 * session doesn't have a log ID by now, we'll assume it isn't
 		 * going to get one.
 		 */
 		while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL)
 			tcp_log_remove_log_head(tp, log_entry);
 		KASSERT(tp->t_lognum == 0,
 		    ("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
 			__func__, tp->t_lognum));
 		tp->_t_logstate = TCP_LOG_STATE_OFF;
 		return (0);
 	}
 
 	/*
 	 * Allocate memory. If we must wait, we'll need to drop the locks
 	 * and reacquire them (and do all the related business that goes
 	 * along with that).
 	 */
 	entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV,
 	    M_NOWAIT);
 	if (entry == NULL && (how & M_NOWAIT)) {
 #ifdef TCPLOG_DEBUG_COUNTERS
 		counter_u64_add(tcp_log_que_fail3, 1);
 #endif
 		return (ENOBUFS);
 	}
 	if (entry == NULL) {
 		INP_WUNLOCK(inp);
 		entry = malloc(sizeof(struct tcp_log_dev_log_queue),
 		    M_TCPLOGDEV, M_WAITOK);
 		INP_WLOCK(inp);
 		/*
 		 * Note that this check is slightly overly-restrictive in
 		 * that the TCB can survive either of these events.
 		 * However, there is currently not a good way to ensure
 		 * that is the case. So, if we hit this M_WAIT path, we
 		 * may end up dropping some entries. That seems like a
 		 * small price to pay for safety.
 		 */
 		if (inp->inp_flags & INP_DROPPED) {
 			free(entry, M_TCPLOGDEV);
 #ifdef TCPLOG_DEBUG_COUNTERS
 			counter_u64_add(tcp_log_que_fail2, 1);
 #endif
 			return (ECONNRESET);
 		}
 		tp = intotcpcb(inp);
 		if (tp->t_lognum == 0) {
 			free(entry, M_TCPLOGDEV);
 			return (0);
 		}
 	}
 
 	/* Fill in the unique parts of the queue entry. */
 	if (tp->t_lib != NULL) {
 		strlcpy(entry->tldl_id, tp->t_lib->tlb_id, TCP_LOG_ID_LEN);
 		strlcpy(entry->tldl_tag, tp->t_lib->tlb_tag, TCP_LOG_TAG_LEN);
 	} else {
 		strlcpy(entry->tldl_id, "UNKNOWN", TCP_LOG_ID_LEN);
 		strlcpy(entry->tldl_tag, "UNKNOWN", TCP_LOG_TAG_LEN);
 	}
 	if (reason != NULL)
 		strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN);
 	else
 		strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_REASON_LEN);
 	entry->tldl_ie = inp->inp_inc.inc_ie;
 	if (inp->inp_inc.inc_flags & INC_ISIPV6)
 		entry->tldl_af = AF_INET6;
 	else
 		entry->tldl_af = AF_INET;
 	entry->tldl_entries = tp->t_logs;
 	entry->tldl_count = tp->t_lognum;
 
 	/* Fill in the common parts of the queue entry. */
 	entry->tldl_common.tldq_buf = NULL;
 	entry->tldl_common.tldq_xform = tcp_log_expandlogbuf;
 	entry->tldl_common.tldq_dtor = tcp_log_free_queue;
 
 	/* Clear the log data from the TCPCB. */
 #ifdef TCPLOG_DEBUG_COUNTERS
 	num_entries = tp->t_lognum;
 #endif
 	tp->t_lognum = 0;
 	STAILQ_INIT(&tp->t_logs);
 
 	/* Add the entry. If no one is listening, free the entry. */
 	if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry)) {
 		tcp_log_free_queue((struct tcp_log_dev_queue *)entry);
 #ifdef TCPLOG_DEBUG_COUNTERS
 		counter_u64_add(tcp_log_que_fail1, num_entries);
 	} else {
 		counter_u64_add(tcp_log_queued, num_entries);
 #endif
 	}
 	return (0);
 }
 
 /*
  * Queue the log_id_node's log buffers for transmission via the log buffer
  * facility.
  *
  * NOTE: This should be called with the bucket locked and referenced.
  *
  * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop
  * and reacquire the bucket lock if it needs to do so. (The caller must
  * ensure that the tln is no longer on any lists so no one else will mess
  * with this while the lock is dropped!)
  */
 static int
 tcp_log_dump_node_logbuf(struct tcp_log_id_node *tln, char *reason, int how)
 {
 	struct tcp_log_dev_log_queue *entry;
 	struct tcp_log_id_bucket *tlb;
 
 	tlb = tln->tln_bucket;
 	TCPID_BUCKET_LOCK_ASSERT(tlb);
 	KASSERT(tlb->tlb_refcnt > 0,
 	    ("%s:%d: Called with unreferenced bucket (tln=%p, tlb=%p)",
 	    __func__, __LINE__, tln, tlb));
 	KASSERT(tln->tln_closed,
 	    ("%s:%d: Called for node with tln_closed==false (tln=%p)",
 	    __func__, __LINE__, tln));
 
 	/* If there are no log entries, there is nothing to do. */
 	if (tln->tln_count == 0)
 		return (0);
 
 	/*
 	 * Allocate memory. If we must wait, we'll need to drop the locks
 	 * and reacquire them (and do all the related business that goes
 	 * along with that).
 	 */
 	entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV,
 	    M_NOWAIT);
 	if (entry == NULL && (how & M_NOWAIT))
 		return (ENOBUFS);
 	if (entry == NULL) {
 		TCPID_BUCKET_UNLOCK(tlb);
 		entry = malloc(sizeof(struct tcp_log_dev_log_queue),
 		    M_TCPLOGDEV, M_WAITOK);
 		TCPID_BUCKET_LOCK(tlb);
 	}
 
 	/* Fill in the common parts of the queue entry.. */
 	entry->tldl_common.tldq_buf = NULL;
 	entry->tldl_common.tldq_xform = tcp_log_expandlogbuf;
 	entry->tldl_common.tldq_dtor = tcp_log_free_queue;
 
 	/* Fill in the unique parts of the queue entry. */
 	strlcpy(entry->tldl_id, tlb->tlb_id, TCP_LOG_ID_LEN);
 	strlcpy(entry->tldl_tag, tlb->tlb_tag, TCP_LOG_TAG_LEN);
 	if (reason != NULL)
 		strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN);
 	else
 		strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_REASON_LEN);
 	entry->tldl_ie = tln->tln_ie;
 	entry->tldl_entries = tln->tln_entries;
 	entry->tldl_count = tln->tln_count;
 	entry->tldl_af = tln->tln_af;
 
 	/* Add the entry. If no one is listening, free the entry. */
 	if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry))
 		tcp_log_free_queue((struct tcp_log_dev_queue *)entry);
 
 	return (0);
 }
 
 /*
  * Queue the log buffers for all sessions in a bucket for transmissions via
  * the log buffer facility.
  *
  * NOTE: This should be called with a locked bucket; however, the function
  * will drop the lock.
  */
 #define	LOCAL_SAVE	10
 static void
 tcp_log_dumpbucketlogs(struct tcp_log_id_bucket *tlb, char *reason)
 {
 	struct tcp_log_id_node local_entries[LOCAL_SAVE];
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct tcp_log_id_node *cur_tln, *prev_tln, *tmp_tln;
 	int i, num_local_entries, tree_locked;
 	bool expireq_locked;
 
 	TCPID_BUCKET_LOCK_ASSERT(tlb);
 
 	/*
 	 * Take a reference on the bucket to keep it from disappearing until
 	 * we are done.
 	 */
 	TCPID_BUCKET_REF(tlb);
 
 	/*
 	 * We'll try to create these without dropping locks. However, we
 	 * might very well need to drop locks to get memory. If that's the
 	 * case, we'll save up to 10 on the stack, and sacrifice the rest.
 	 * (Otherwise, we need to worry about finding our place again in a
 	 * potentially changed list. It just doesn't seem worth the trouble
 	 * to do that.
 	 */
 	expireq_locked = false;
 	num_local_entries = 0;
 	prev_tln = NULL;
 	tree_locked = TREE_UNLOCKED;
 	SLIST_FOREACH_SAFE(cur_tln, &tlb->tlb_head, tln_list, tmp_tln) {
 		/*
 		 * If this isn't associated with a TCPCB, we can pull it off
 		 * the list now. We need to be careful that the expire timer
 		 * hasn't already taken ownership (tln_expiretime == SBT_MAX).
 		 * If so, we let the expire timer code free the data.
 		 */
 		if (cur_tln->tln_closed) {
 no_inp:
 			/*
 			 * Get the expireq lock so we can get a consistent
 			 * read of tln_expiretime and so we can remove this
 			 * from the expireq.
 			 */
 			if (!expireq_locked) {
 				TCPLOG_EXPIREQ_LOCK();
 				expireq_locked = true;
 			}
 
 			/*
 			 * We ignore entries with tln_expiretime == SBT_MAX.
 			 * The expire timer code already owns those.
 			 */
 			KASSERT(cur_tln->tln_expiretime > (sbintime_t) 0,
 			    ("%s:%d: node on the expire queue without positive "
 			    "expire time", __func__, __LINE__));
 			if (cur_tln->tln_expiretime == SBT_MAX) {
 				prev_tln = cur_tln;
 				continue;
 			}
 
 			/* Remove the entry from the expireq. */
 			STAILQ_REMOVE(&tcp_log_expireq_head, cur_tln,
 			    tcp_log_id_node, tln_expireq);
 
 			/* Remove the entry from the bucket. */
 			if (prev_tln != NULL)
 				SLIST_REMOVE_AFTER(prev_tln, tln_list);
 			else
 				SLIST_REMOVE_HEAD(&tlb->tlb_head, tln_list);
 
 			/*
 			 * Drop the INP and bucket reference counts. Due to
 			 * lock-ordering rules, we need to drop the expire
 			 * queue lock.
 			 */
 			TCPLOG_EXPIREQ_UNLOCK();
 			expireq_locked = false;
 
 			/* Drop the INP reference. */
 			INP_WLOCK(cur_tln->tln_inp);
 			if (!in_pcbrele_wlocked(cur_tln->tln_inp))
 				INP_WUNLOCK(cur_tln->tln_inp);
 
 			if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) {
 #ifdef INVARIANTS
 				panic("%s: Bucket refcount unexpectedly 0.",
 				    __func__);
 #endif
 				/*
 				 * Recover as best we can: free the entry we
 				 * own.
 				 */
 				tcp_log_free_entries(&cur_tln->tln_entries,
 				    &cur_tln->tln_count);
 				uma_zfree(tcp_log_id_node_zone, cur_tln);
 				goto done;
 			}
 
 			if (tcp_log_dump_node_logbuf(cur_tln, reason,
 			    M_NOWAIT)) {
 				/*
 				 * If we have sapce, save the entries locally.
 				 * Otherwise, free them.
 				 */
 				if (num_local_entries < LOCAL_SAVE) {
 					local_entries[num_local_entries] =
 					    *cur_tln;
 					num_local_entries++;
 				} else {
 					tcp_log_free_entries(
 					    &cur_tln->tln_entries,
 					    &cur_tln->tln_count);
 				}
 			}
 
 			/* No matter what, we are done with the node now. */
 			uma_zfree(tcp_log_id_node_zone, cur_tln);
 
 			/*
 			 * Because we removed this entry from the list, prev_tln
 			 * (which tracks the previous entry still on the tlb
 			 * list) remains unchanged.
 			 */
 			continue;
 		}
 
 		/*
 		 * If we get to this point, the session data is still held in
 		 * the TCPCB. So, we need to pull the data out of that.
 		 *
 		 * We will need to drop the expireq lock so we can lock the INP.
 		 * We can then try to extract the data the "easy" way. If that
 		 * fails, we'll save the log entries for later.
 		 */
 		if (expireq_locked) {
 			TCPLOG_EXPIREQ_UNLOCK();
 			expireq_locked = false;
 		}
 
 		/* Lock the INP and then re-check the state. */
 		inp = cur_tln->tln_inp;
 		INP_WLOCK(inp);
 		/*
 		 * If we caught this while it was transitioning, the data
 		 * might have moved from the TCPCB to the tln (signified by
 		 * setting tln_closed to true. If so, treat this like an
 		 * inactive connection.
 		 */
 		if (cur_tln->tln_closed) {
 			/*
 			 * It looks like we may have caught this connection
 			 * while it was transitioning from active to inactive.
 			 * Treat this like an inactive connection.
 			 */
 			INP_WUNLOCK(inp);
 			goto no_inp;
 		}
 
 		/*
 		 * Try to dump the data from the tp without dropping the lock.
 		 * If this fails, try to save off the data locally.
 		 */
 		tp = cur_tln->tln_tp;
 		if (tcp_log_dump_tp_logbuf(tp, reason, M_NOWAIT, true) &&
 		    num_local_entries < LOCAL_SAVE) {
 			tcp_log_move_tp_to_node(tp,
 			    &local_entries[num_local_entries]);
 			local_entries[num_local_entries].tln_closed = 1;
 			KASSERT(local_entries[num_local_entries].tln_bucket ==
 			    tlb, ("%s: %d: bucket mismatch for node %p",
 			    __func__, __LINE__, cur_tln));
 			num_local_entries++;
 		}
 
 		INP_WUNLOCK(inp);
 
 		/*
 		 * We are goint to leave the current tln on the list. It will
 		 * become the previous tln.
 		 */
 		prev_tln = cur_tln;
 	}
 
 	/* Drop our locks, if any. */
 	KASSERT(tree_locked == TREE_UNLOCKED,
 	    ("%s: %d: tree unexpectedly locked", __func__, __LINE__));
 	switch (tree_locked) {
 	case TREE_WLOCKED:
 		TCPID_TREE_WUNLOCK();
 		tree_locked = TREE_UNLOCKED;
 		break;
 	case TREE_RLOCKED:
 		TCPID_TREE_RUNLOCK();
 		tree_locked = TREE_UNLOCKED;
 		break;
 	}
 	if (expireq_locked) {
 		TCPLOG_EXPIREQ_UNLOCK();
 		expireq_locked = false;
 	}
 
 	/*
 	 * Try again for any saved entries. tcp_log_dump_node_logbuf() is
 	 * guaranteed to free the log entries within the node. And, since
 	 * the node itself is on our stack, we don't need to free it.
 	 */
 	for (i = 0; i < num_local_entries; i++)
 		tcp_log_dump_node_logbuf(&local_entries[i], reason, M_WAITOK);
 
 	/* Drop our reference. */
 	if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL))
 		TCPID_BUCKET_UNLOCK(tlb);
 
 done:
 	/* Drop our locks, if any. */
 	switch (tree_locked) {
 	case TREE_WLOCKED:
 		TCPID_TREE_WUNLOCK();
 		break;
 	case TREE_RLOCKED:
 		TCPID_TREE_RUNLOCK();
 		break;
 	}
 	if (expireq_locked)
 		TCPLOG_EXPIREQ_UNLOCK();
 }
 #undef	LOCAL_SAVE
 
 /*
  * Queue the log buffers for all sessions in a bucket for transmissions via
  * the log buffer facility.
  *
  * NOTE: This should be called with a locked INP; however, the function
  * will drop the lock.
  */
 void
 tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct tcp_log_id_bucket *tlb;
 	int tree_locked;
 
 	/* Figure out our bucket and lock it. */
 	INP_WLOCK_ASSERT(inp);
 	tlb = tp->t_lib;
 	if (tlb == NULL) {
 		/*
 		 * No bucket; treat this like a request to dump a single
 		 * session's traces.
 		 */
 		(void)tcp_log_dump_tp_logbuf(tp, reason, M_WAITOK, true);
 		INP_WUNLOCK(inp);
 		return;
 	}
 	TCPID_BUCKET_REF(tlb);
 	INP_WUNLOCK(inp);
 	TCPID_BUCKET_LOCK(tlb);
 
 	/* If we are the last reference, we have nothing more to do here. */
 	tree_locked = TREE_UNLOCKED;
 	if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) {
 		switch (tree_locked) {
 		case TREE_WLOCKED:
 			TCPID_TREE_WUNLOCK();
 			break;
 		case TREE_RLOCKED:
 			TCPID_TREE_RUNLOCK();
 			break;
 		}
 		return;
 	}
 
 	/* Turn this over to tcp_log_dumpbucketlogs() to finish the work. */
 	tcp_log_dumpbucketlogs(tlb, reason);
 }
 
 /*
  * Mark the end of a flow with the current stack. A stack can add
  * stack-specific info to this trace event by overriding this
  * function (see bbr_log_flowend() for example).
  */
 void
 tcp_log_flowend(struct tcpcb *tp)
 {
 	if (tp->_t_logstate != TCP_LOG_STATE_OFF) {
 		struct socket *so = tptosocket(tp);
 		TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd,
 				TCP_LOG_FLOWEND, 0, 0, NULL, false);
 	}
 }
 
 void
 tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, int flags)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 #ifdef TCP_REQUEST_TRK
 	struct tcp_sendfile_track *ent;
 	int i, fnd;
 #endif
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_log_sendfile: inp == NULL"));
 
 	/* quick check to see if logging is enabled for this connection */
 	tp = intotcpcb(inp);
 	if ((inp->inp_flags & INP_DROPPED) ||
 	    (tp->_t_logstate == TCP_LOG_STATE_OFF)) {
 		return;
 	}
 
 	INP_WLOCK(inp);
 	/* double check log state now that we have the lock */
 	if (inp->inp_flags & INP_DROPPED)
 		goto done;
 	if (tp->_t_logstate != TCP_LOG_STATE_OFF) {
 		struct timeval tv;
 		tcp_log_eventspecific_t log;
 
 		memset(&log, 0, sizeof(log));
 		microuptime(&tv);
 		log.u_sf.offset = offset;
 		log.u_sf.length = nbytes;
 		log.u_sf.flags = flags;
 
 		TCP_LOG_EVENTP(tp, NULL,
 		    &tptosocket(tp)->so_rcv,
 		    &tptosocket(tp)->so_snd,
 		    TCP_LOG_SENDFILE, 0, 0, &log, false, &tv);
 	}
 #ifdef TCP_REQUEST_TRK
 	if (tp->t_tcpreq_req == 0) {
 		/* No http requests to track */
 		goto done;
 	}
 	fnd = 0;
 	if (tp->t_tcpreq_closed == 0) {
 		/* No closed end req to track */
 		goto skip_closed_req;
 	}
 	for(i = 0; i < MAX_TCP_TRK_REQ; i++) {
 		/* Lets see if this one can be found */
 		ent = &tp->t_tcpreq_info[i];
 		if (ent->flags == TCP_TRK_TRACK_FLG_EMPTY) {
 			/* Not used */
 			continue;
 		}
 		if (ent->flags & TCP_TRK_TRACK_FLG_OPEN) {
 			/* This pass does not consider open requests */
 			continue;
 		}
 		if (ent->flags & TCP_TRK_TRACK_FLG_COMP) {
 			/* Don't look at what we have completed */
 			continue;
 		}
 		/* If we reach here its a allocated closed end request */
 		if ((ent->start == offset) ||
 		    ((offset > ent->start) && (offset < ent->end))){
 			/* Its within this request?? */
 			fnd = 1;
 		}
 		if (fnd) {
 			/*
 			 * It is at or past the end, its complete.
 			 */
 			ent->flags |= TCP_TRK_TRACK_FLG_SEQV;
 			/*
 			 * When an entry completes we can take (snd_una + sb_cc) and know where
 			 * the end of the range really is. Note that this works since two
 			 * requests must be sequential and sendfile now is complete for *this* request.
 			 * we must use sb_ccc since the data may still be in-flight in TLS.
 			 *
 			 * We always cautiously move the end_seq only if our calculations
 			 * show it happened (just in case sf has the call to here at the wrong
 			 * place). When we go COMP we will stop coming here and hopefully be
 			 * left with the correct end_seq.
 			 */
 			if (SEQ_GT((tp->snd_una + so->so_snd.sb_ccc), ent->end_seq))
 				ent->end_seq = tp->snd_una + so->so_snd.sb_ccc;
 			if ((offset + nbytes) >= ent->end) {
 				ent->flags |= TCP_TRK_TRACK_FLG_COMP;
 				tcp_req_log_req_info(tp, ent, i, TCP_TRK_REQ_LOG_COMPLETE, offset, nbytes);
 			} else {
 				tcp_req_log_req_info(tp, ent, i, TCP_TRK_REQ_LOG_MOREYET, offset, nbytes);
 			}
 			/* We assume that sendfile never sends overlapping requests */
 			goto done;
 		}
 	}
 skip_closed_req:
 	if (!fnd) {
 		/* Ok now lets look for open requests */
 		for(i = 0; i < MAX_TCP_TRK_REQ; i++) {
 			ent = &tp->t_tcpreq_info[i];
 			if (ent->flags == TCP_TRK_TRACK_FLG_EMPTY) {
 				/* Not used */
 				continue;
 			}
 			if ((ent->flags & TCP_TRK_TRACK_FLG_OPEN) == 0)
 				continue;
 			/* If we reach here its an allocated open request */
 			if (ent->start == offset) {
 				/* It begins this request */
 				ent->start_seq = tp->snd_una +
 				    tptosocket(tp)->so_snd.sb_ccc;
 				ent->flags |= TCP_TRK_TRACK_FLG_SEQV;
 				break;
 			} else if (offset > ent->start) {
 				ent->flags |= TCP_TRK_TRACK_FLG_SEQV;
 				break;
 			}
 		}
 	}
 #endif
 done:
 	INP_WUNLOCK(inp);
 }
+
+#ifdef DDB
+static void
+db_print_indent(int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		db_printf(" ");
+}
+
+static void
+db_print_tcphdr(struct tcp_log_buffer *tlm_buf)
+{
+	struct sackblk sack;
+	struct tcphdr *th;
+	int cnt, i, j, opt, optlen, num_sacks;
+	uint32_t val, ecr;
+	uint16_t mss;
+	uint16_t flags;
+
+	if ((tlm_buf->tlb_eventflags & TLB_FLAG_HDR) == 0) {
+		return;
+	}
+	th = &tlm_buf->tlb_th;
+	flags = tcp_get_flags(th);
+	if (flags & TH_FIN) {
+		db_printf("F");
+	}
+	if (flags & TH_SYN) {
+		db_printf("S");
+	}
+	if (flags & TH_RST) {
+		db_printf("R");
+	}
+	if (flags & TH_PUSH) {
+		db_printf("P");
+	}
+	if (flags & TH_ACK) {
+		db_printf(".");
+	}
+	if (flags & TH_URG) {
+		db_printf("U");
+	}
+	if (flags & TH_ECE) {
+		db_printf("E");
+	}
+	if (flags & TH_FIN) {
+		db_printf("F");
+	}
+	if (flags & TH_CWR) {
+		db_printf("W");
+	}
+	if (flags & TH_FIN) {
+		db_printf("F");
+	}
+	if (flags & TH_AE) {
+		db_printf("A");
+	}
+	db_printf(" %u:%u(%u)", ntohl(th->th_seq),
+	    ntohl(th->th_seq) + tlm_buf->tlb_len, tlm_buf->tlb_len);
+	if (flags & TH_ACK) {
+		db_printf(" ack %u", ntohl(th->th_ack));
+	}
+	db_printf(" win %u", ntohs(th->th_win));
+	if (flags & TH_URG) {
+		db_printf(" urg %u", ntohs(th->th_urp));
+	}
+	cnt = (th->th_off << 2) - sizeof(struct tcphdr);
+	if (cnt > 0) {
+		db_printf(" <");
+		for (i = 0; i < cnt; i += optlen) {
+			opt = tlm_buf->tlb_opts[i];
+			if (opt == TCPOPT_EOL || opt == TCPOPT_NOP) {
+				optlen = 1;
+			} else {
+				if (cnt - i < 2) {
+					break;
+				}
+				optlen = tlm_buf->tlb_opts[i + 1];
+				if (optlen < 2 || optlen > cnt - i) {
+					break;
+				}
+			}
+			if (i > 0) {
+				db_printf(",");
+			}
+			switch (opt) {
+			case TCPOPT_EOL:
+				db_printf("eol");
+				break;
+			case TCPOPT_NOP:
+				db_printf("nop");
+				break;
+			case TCPOPT_MAXSEG:
+				if (optlen != TCPOLEN_MAXSEG) {
+					break;
+				}
+				bcopy(tlm_buf->tlb_opts + i + 2, &mss,
+				    sizeof(uint16_t));
+				db_printf("mss %u", ntohs(mss));
+				break;
+			case TCPOPT_WINDOW:
+				if (optlen != TCPOLEN_WINDOW) {
+					break;
+				}
+				db_printf("wscale %u",
+				    tlm_buf->tlb_opts[i + 2]);
+				break;
+			case TCPOPT_SACK_PERMITTED:
+				if (optlen != TCPOLEN_SACK_PERMITTED) {
+					break;
+				}
+				db_printf("sackOK");
+				break;
+			case TCPOPT_SACK:
+				if (optlen == TCPOLEN_SACKHDR ||
+				    (optlen - 2) % TCPOLEN_SACK != 0) {
+					break;
+				}
+				num_sacks = (optlen - 2) / TCPOLEN_SACK;
+				db_printf("sack");
+				for (j = 0; j < num_sacks; j++) {
+					bcopy(tlm_buf->tlb_opts + i + 2 +
+					    j * TCPOLEN_SACK, &sack,
+					    TCPOLEN_SACK);
+					db_printf(" %u:%u", ntohl(sack.start),
+					    ntohl(sack.end));
+				}
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (optlen != TCPOLEN_TIMESTAMP) {
+					break;
+				}
+				bcopy(tlm_buf->tlb_opts + i + 2, &val,
+				    sizeof(uint32_t));
+				bcopy(tlm_buf->tlb_opts + i + 6, &ecr,
+				    sizeof(uint32_t));
+				db_printf("TS val %u ecr %u", ntohl(val),
+				    ntohl(ecr));
+				break;
+			case TCPOPT_SIGNATURE:
+				db_printf("md5");
+				if (optlen > 2) {
+					db_printf(" ");
+				}
+				for (j = 0; j < optlen - 2; j++) {
+					db_printf("%02x",
+					    tlm_buf->tlb_opts[i + 2 + j]);
+				}
+				break;
+			case TCPOPT_FAST_OPEN:
+				db_printf("FO");
+				if (optlen > 2) {
+					db_printf(" ");
+				}
+				for (j = 0; j < optlen - 2; j++) {
+					db_printf("%02x",
+					    tlm_buf->tlb_opts[i + 2 + j]);
+				}
+				break;
+			default:
+				db_printf("opt=%u len=%u", opt, optlen);
+				break;
+			}
+		}
+		db_printf(">");
+	}
+}
+static void
+db_print_pru(struct tcp_log_buffer *tlm_buf)
+{
+	switch (tlm_buf->tlb_flex1) {
+	case PRU_ATTACH:
+		db_printf("ATTACH");
+		break;
+	case PRU_DETACH:
+		db_printf("DETACH");
+		break;
+	case PRU_BIND:
+		db_printf("BIND");
+		break;
+	case PRU_LISTEN:
+		db_printf("LISTEN");
+		break;
+	case PRU_CONNECT:
+		db_printf("CONNECT");
+		break;
+	case PRU_ACCEPT:
+		db_printf("ACCEPT");
+		break;
+	case PRU_DISCONNECT:
+		db_printf("DISCONNECT");
+		break;
+	case PRU_SHUTDOWN:
+		db_printf("SHUTDOWN");
+		break;
+	case PRU_RCVD:
+		db_printf("RCVD");
+		break;
+	case PRU_SEND:
+		db_printf("SEND");
+		break;
+	case PRU_ABORT:
+		db_printf("ABORT");
+		break;
+	case PRU_CONTROL:
+		db_printf("CONTROL");
+		break;
+	case PRU_SENSE:
+		db_printf("SENSE");
+		break;
+	case PRU_RCVOOB:
+		db_printf("RCVOOB");
+		break;
+	case PRU_SENDOOB:
+		db_printf("SENDOOB");
+		break;
+	case PRU_SOCKADDR:
+		db_printf("SOCKADDR");
+		break;
+	case PRU_PEERADDR:
+		db_printf("PEERADDR");
+		break;
+	case PRU_CONNECT2:
+		db_printf("CONNECT2");
+		break;
+	case PRU_FASTTIMO:
+		db_printf("FASTTIMO");
+		break;
+	case PRU_SLOWTIMO:
+		db_printf("SLOWTIMO");
+		break;
+	case PRU_PROTORCV:
+		db_printf("PROTORCV");
+		break;
+	case PRU_PROTOSEND:
+		db_printf("PROTOSEND");
+		break;
+	case PRU_SEND_EOF:
+		db_printf("SEND_EOF");
+		break;
+	case PRU_SOSETLABEL:
+		db_printf("SOSETLABEL");
+		break;
+	case PRU_CLOSE:
+		db_printf("CLOSE");
+		break;
+	case PRU_FLUSH:
+		db_printf("FLUSH");
+		break;
+	default:
+		db_printf("Unknown PRU (%u)", tlm_buf->tlb_flex1);
+		break;
+	}
+	if (tlm_buf->tlb_errno >= 0) {
+		db_printf(", error: %d", tlm_buf->tlb_errno);
+	}
+}
+
+static void
+db_print_rto(struct tcp_log_buffer *tlm_buf)
+{
+	tt_what what;
+	tt_which which;
+
+	what = (tlm_buf->tlb_flex1 & 0xffffff00) >> 8;
+	which = tlm_buf->tlb_flex1 & 0x000000ff;
+	switch (what) {
+	case TT_PROCESSING:
+		db_printf("Processing ");
+		break;
+	case TT_PROCESSED:
+		db_printf("Processed ");
+		break;
+	case TT_STARTING:
+		db_printf("Starting ");
+		break;
+	case TT_STOPPING:
+		db_printf("Stopping ");
+		break;
+	default:
+		db_printf("Unknown operation (%u) for ", what);
+		break;
+	}
+	switch (which) {
+	case TT_REXMT:
+		db_printf("Retransmission ");
+		break;
+	case TT_PERSIST:
+		db_printf("Persist ");
+		break;
+	case TT_KEEP:
+		db_printf("Keepalive ");
+		break;
+	case TT_2MSL:
+		db_printf("2 MSL ");
+		break;
+	case TT_DELACK:
+		db_printf("Delayed ACK ");
+		break;
+	default:
+		db_printf("Unknown (%u) ", which);
+		break;
+	}
+	db_printf("timer");
+	if (what == TT_STARTING) {
+		db_printf(": %u ms", tlm_buf->tlb_flex2);
+	}
+}
+
+static void
+db_print_usersend(struct tcp_log_buffer *tlm_buf)
+{
+	if ((tlm_buf->tlb_eventflags & TLB_FLAG_RXBUF) == 0) {
+		return;
+	}
+	if ((tlm_buf->tlb_eventflags & TLB_FLAG_TXBUF) == 0) {
+		return;
+	}
+	db_printf("usersend: rcv.acc: %u rcv.ccc: %u snd.acc: %u snd.ccc: %u",
+	    tlm_buf->tlb_rxbuf.tls_sb_acc, tlm_buf->tlb_rxbuf.tls_sb_ccc,
+	    tlm_buf->tlb_txbuf.tls_sb_acc, tlm_buf->tlb_txbuf.tls_sb_ccc);
+}
+
+void
+db_print_bblog_entries(struct tcp_log_stailq *log_entries, int indent)
+{
+	struct tcp_log_mem *log_entry;
+	struct tcp_log_buffer *tlm_buf, *prev_tlm_buf;
+	int64_t delta_t;
+
+	indent += 2;
+	prev_tlm_buf = NULL;
+	STAILQ_FOREACH(log_entry, log_entries, tlm_queue) {
+		db_print_indent(indent);
+		tlm_buf = &log_entry->tlm_buf;
+		if (prev_tlm_buf == NULL) {
+			db_printf(" 0.000 ");
+		} else {
+			delta_t = sbttoms(tvtosbt(tlm_buf->tlb_tv) -
+			    tvtosbt(prev_tlm_buf->tlb_tv));
+			db_printf("+%u.%03u ", (uint32_t)(delta_t / 1000),
+			    (uint32_t)(delta_t % 1000));
+		}
+		switch (tlm_buf->tlb_eventid) {
+		case TCP_LOG_IN:
+			db_printf("< ");
+			db_print_tcphdr(tlm_buf);
+			break;
+		case TCP_LOG_OUT:
+			db_printf("> ");
+			db_print_tcphdr(tlm_buf);
+			break;
+		case TCP_LOG_RTO:
+			db_print_rto(tlm_buf);
+			break;
+		case TCP_LOG_PRU:
+			db_print_pru(tlm_buf);
+			break;
+		case TCP_LOG_USERSEND:
+			db_print_usersend(tlm_buf);
+			break;
+		default:
+			break;
+		}
+		db_printf("\n");
+		prev_tlm_buf = tlm_buf;
+		if (db_pager_quit)
+			break;
+	}
+}
+#endif
diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h
index 06b2998b19ed..eb33a9c15ebb 100644
--- a/sys/netinet/tcp_log_buf.h
+++ b/sys/netinet/tcp_log_buf.h
@@ -1,589 +1,593 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2016-2020 Netflix, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef __tcp_log_buf_h__
 #define __tcp_log_buf_h__
 
 #define	TCP_LOG_REASON_LEN	32
 #define	TCP_LOG_TAG_LEN		32
 #define	TCP_LOG_BUF_VER		(9)
 
 /*
  * Because the (struct tcp_log_buffer) includes 8-byte uint64_t's, it requires
  * 8-byte alignment to work properly on all platforms. Therefore, we will
  * enforce 8-byte alignment for all the structures that may appear by
  * themselves (instead of being embedded in another structure) in a data
  * stream.
  */
 #define	ALIGN_TCP_LOG		__aligned(8)
 
 /* Information about the socketbuffer state. */
 struct tcp_log_sockbuf
 {
 	uint32_t	tls_sb_acc;	/* available chars (sb->sb_acc) */
 	uint32_t	tls_sb_ccc;	/* claimed chars (sb->sb_ccc) */
 	uint32_t	tls_sb_spare;	/* spare */
 };
 
 /* Optional, verbose information that may be appended to an event log. */
 struct tcp_log_verbose
 {
 #define	TCP_FUNC_LEN	32
 	char		tlv_snd_frm[TCP_FUNC_LEN]; /* tcp_output() caller */
 	char		tlv_trace_func[TCP_FUNC_LEN]; /* Function that
 							 generated trace */
 	uint32_t	tlv_trace_line;	/* Line number that generated trace */
 	uint8_t		_pad[4];
 } ALIGN_TCP_LOG;
 
 struct tcp_log_bbr {
 	uint64_t cur_del_rate;
 	uint64_t delRate;
 	uint64_t rttProp;
 	uint64_t bw_inuse;
 	uint32_t inflight;
 	uint32_t applimited;
 	uint32_t delivered;
 	uint32_t timeStamp;
 	uint32_t epoch;
 	uint32_t lt_epoch;
 	uint32_t pkts_out;
 	uint32_t flex1;
 	uint32_t flex2;
 	uint32_t flex3;
 	uint32_t flex4;
 	uint32_t flex5;
 	uint32_t flex6;
 	uint32_t lost;
 	uint16_t pacing_gain;
 	uint16_t cwnd_gain;
 	uint16_t flex7;
 	uint8_t bbr_state;
 	uint8_t bbr_substate;
 	uint8_t inhpts;
 	uint8_t __spare;
 	uint8_t use_lt_bw;
 	uint8_t flex8;
 	uint32_t pkt_epoch;
 };
 
 /* shadows tcp_log_bbr struct element sizes */
 struct tcp_log_raw {
 	uint64_t u64_flex[4];
 	uint32_t u32_flex[14];
 	uint16_t u16_flex[3];
 	uint8_t u8_flex[6];
 	uint32_t u32_flex2[1];
 };
 
 struct tcp_log_uint64 {
 	uint64_t u64_flex[13];
 };
 
 struct tcp_log_sendfile {
 	uint64_t offset;
 	uint64_t length;
 	uint32_t flags;
 };
 
 /*
  * tcp_log_stackspecific is currently being used as "event specific" log
  * info by all stacks (i.e. struct tcp_log_bbr is used for generic event
  * logging). Until this is cleaned up more generically and throughout,
  * allow events to use the same space in the union.
  */
 union tcp_log_stackspecific
 {
 	struct tcp_log_bbr u_bbr;
 	struct tcp_log_sendfile u_sf;
 	struct tcp_log_raw u_raw;	/* "raw" log access */
 	struct tcp_log_uint64 u64_raw;	/* just u64's - used by process info */
 };
 
 typedef union tcp_log_stackspecific tcp_log_eventspecific_t;
 
 struct tcp_log_buffer
 {
 	/* Event basics */
 	struct timeval	tlb_tv;		/* Timestamp of trace */
 	uint32_t	tlb_ticks;	/* Timestamp of trace */
 	uint32_t	tlb_sn;		/* Serial number */
 	uint8_t		tlb_stackid;	/* Stack ID */
 	uint8_t		tlb_eventid;	/* Event ID */
 	uint16_t	tlb_eventflags;	/* Flags for the record */
 #define	TLB_FLAG_RXBUF		0x0001	/* Includes receive buffer info */
 #define	TLB_FLAG_TXBUF		0x0002	/* Includes send buffer info */
 #define	TLB_FLAG_HDR		0x0004	/* Includes a TCP header */
 #define	TLB_FLAG_VERBOSE	0x0008	/* Includes function/line numbers */
 #define	TLB_FLAG_STACKINFO	0x0010	/* Includes stack-specific info */
 	int		tlb_errno;	/* Event error (if any) */
 
 	/* Internal session state */
 	struct tcp_log_sockbuf tlb_rxbuf; /* Receive buffer */
 	struct tcp_log_sockbuf tlb_txbuf; /* Send buffer */
 
 	int		tlb_state;	/* TCPCB t_state */
 	uint32_t	tlb_starttime;	/* TCPCB t_starttime */
 	uint32_t	tlb_iss;	/* TCPCB iss */
 	uint32_t	tlb_flags;	/* TCPCB flags */
 	uint32_t	tlb_snd_una;	/* TCPCB snd_una */
 	uint32_t	tlb_snd_max;	/* TCPCB snd_max */
 	uint32_t	tlb_snd_cwnd;	/* TCPCB snd_cwnd */
 	uint32_t	tlb_snd_nxt;	/* TCPCB snd_nxt */
 	uint32_t	tlb_snd_recover;/* TCPCB snd_recover */
 	uint32_t	tlb_snd_wnd;	/* TCPCB snd_wnd */
 	uint32_t	tlb_snd_ssthresh; /* TCPCB snd_ssthresh */
 	uint32_t	tlb_srtt;	/* TCPCB t_srtt */
 	uint32_t	tlb_rttvar;	/* TCPCB t_rttvar */
 	uint32_t	tlb_rcv_up;	/* TCPCB rcv_up */
 	uint32_t	tlb_rcv_adv;	/* TCPCB rcv_adv */
 	uint32_t	tlb_flags2;	/* TCPCB t_flags2 */
 	uint32_t	tlb_rcv_nxt;	/* TCPCB rcv_nxt */
 	uint32_t	tlb_rcv_wnd;	/* TCPCB rcv_wnd */
 	uint32_t	tlb_dupacks;	/* TCPCB t_dupacks */
 	int		tlb_segqlen;	/* TCPCB segqlen */
 	int		tlb_snd_numholes; /* TCPCB snd_numholes */
 	uint32_t	tlb_flex1;	/* Event specific information */
 	uint32_t	tlb_flex2;	/* Event specific information */
 	uint32_t	tlb_fbyte_in;	/* TCPCB first byte in time */
 	uint32_t	tlb_fbyte_out;	/* TCPCB first byte out time */
 	uint8_t		tlb_snd_scale:4, /* TCPCB snd_scale */
 			tlb_rcv_scale:4; /* TCPCB rcv_scale */
 	uint8_t		_pad[3];	/* Padding */
 	/* Per-stack info */
 	union tcp_log_stackspecific tlb_stackinfo;
 
 	/* The packet */
 	uint32_t	tlb_len;	/* The packet's data length */
 	struct tcphdr	tlb_th;		/* The TCP header */
 	uint8_t		tlb_opts[TCP_MAXOLEN]; /* The TCP options */
 
 	/* Verbose information (optional) */
 	struct tcp_log_verbose tlb_verbose[0];
 } ALIGN_TCP_LOG;
 
 enum tcp_log_events {
 	TCP_LOG_IN = 1,		/* Incoming packet                   1 */
 	TCP_LOG_OUT,		/* Transmit (without other event)    2 */
 	TCP_LOG_RTO,		/* Retransmit timeout                3 */
 	TCP_LOG_SB_WAKE,	/* Awaken socket buffer              4 */
 	TCP_UNUSED_5,		/* Detected bad retransmission       5 */
 	TCP_LOG_PRR,		/* Doing PRR                         6 */
 	TCP_UNUSED_7,		/* Detected reorder                  7 */
 	TCP_LOG_HPTS,		/* Hpts sending a packet             8 */
 	BBR_LOG_BBRUPD,		/* We updated BBR info               9 */
 	BBR_LOG_BBRSND,		/* We did a slot calculation and sending is done 10 */
 	BBR_LOG_ACKCLEAR,	/* A ack clears all outstanding     11 */
 	TCP_UNUSED_12,		/* The tcb had a packet input to it 12 */
 	BBR_LOG_TIMERSTAR,	/* Start a timer                    13 */
 	BBR_LOG_TIMERCANC,	/* Cancel a timer                   14 */
 	BBR_LOG_ENTREC,		/* Entered recovery                 15 */
 	BBR_LOG_EXITREC,	/* Exited recovery                  16 */
 	BBR_LOG_CWND,		/* Cwnd change                      17 */
 	BBR_LOG_BWSAMP,		/* LT B/W sample has been made      18 */
 	BBR_LOG_MSGSIZE,	/* We received a EMSGSIZE error     19 */
 	BBR_LOG_BBRRTT,		/* BBR RTT is updated               20 */
 	BBR_LOG_JUSTRET,	/* We just returned out of output   21 */
 	BBR_LOG_STATE,		/* A BBR state change occurred      22 */
 	BBR_LOG_PKT_EPOCH,	/* A BBR packet epoch occurred      23 */
 	BBR_LOG_PERSIST,	/* BBR changed to/from a persists   24 */
 	TCP_LOG_FLOWEND,	/* End of a flow                    25 */
 	BBR_LOG_RTO,		/* BBR's timeout includes BBR info  26 */
 	BBR_LOG_DOSEG_DONE,	/* hpts do_segment completes        27 */
 	BBR_LOG_EXIT_GAIN,	/* hpts do_segment completes        28 */
 	BBR_LOG_THRESH_CALC,	/* Doing threshold calculation      29 */
 	TCP_LOG_MAPCHG,		/* Map Changes to the sendmap       30 */
 	TCP_LOG_USERSEND,	/* User level sends data            31 */
 	BBR_RSM_CLEARED,	/* RSM cleared of ACK flags         32 */
 	BBR_LOG_STATE_TARGET,	/* Log of target at state           33 */
 	BBR_LOG_TIME_EPOCH,	/* A timed based Epoch occurred     34 */
 	BBR_LOG_TO_PROCESS,	/* A to was processed               35 */
 	BBR_LOG_BBRTSO,		/* TSO update                       36 */
 	BBR_LOG_HPTSDIAG,	/* Hpts diag insert                 37 */
 	BBR_LOG_LOWGAIN,	/* Low gain accounting              38 */
 	BBR_LOG_PROGRESS,	/* Progress timer event             39 */
 	TCP_LOG_SOCKET_OPT,	/* A socket option is set           40 */
 	BBR_LOG_TIMERPREP,	/* A BBR var to debug out TLP issues  41 */
 	BBR_LOG_ENOBUF_JMP,	/* We had a enobuf jump             42 */
 	BBR_LOG_HPTSI_CALC,	/* calc the hptsi time              43 */
 	BBR_LOG_RTT_SHRINKS,	/* We had a log reduction of rttProp 44 */
 	BBR_LOG_BW_RED_EV,	/* B/W reduction events             45 */
 	BBR_LOG_REDUCE,		/* old bbr log reduce for 4.1 and earlier 46*/
 	TCP_LOG_RTT,		/* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
 	BBR_LOG_SETTINGS_CHG,	/* Settings changed for loss response 48 */
 	TCP_UNUSED_49,		/* SRTT gaining -- now not used    49 */
 	TCP_LOG_REASS,		/* Reassembly buffer logging        50 */
 	TCP_HDWR_PACE_SIZE,	/*  TCP pacing size set (rl and rack uses this)  51 */
 	BBR_LOG_HDWR_PACE,	/* TCP Hardware pacing log          52 */
 	BBR_LOG_TSTMP_VAL,	/* Temp debug timestamp validation  53 */
 	TCP_LOG_CONNEND,	/* End of connection                54 */
 	TCP_LOG_LRO,		/* LRO entry                        55 */
 	TCP_SACK_FILTER_RES,	/* Results of SACK Filter           56 */
 	TCP_UNUSED_57,		/* Sack Attack Detection            57 */
 	TCP_TIMELY_WORK,	/* Logs regarding Timely CC tweaks  58 */
 	TCP_UNUSED_59,		/* User space event data            59 */
 	TCP_LOG_SENDFILE,	/* sendfile() logging for TCP connections 60 */
 	TCP_LOG_REQ_T,		/* logging of request tracking      61 */
 	TCP_LOG_ACCOUNTING,	/* Log of TCP Accounting data       62 */
 	TCP_LOG_FSB,		/* FSB information                  63 */
 	RACK_DSACK_HANDLING,	/* Handling of DSACK in rack for reordering window 64 */
 	TCP_HYSTART,		/* TCP Hystart logging              65 */
 	TCP_CHG_QUERY,		/* Change query during fnc_init()   66 */
 	TCP_RACK_LOG_COLLAPSE,	/* Window collapse by peer          67 */
 	TCP_RACK_TP_TRIGGERED,	/* A rack tracepoint is triggered   68 */
 	TCP_HYBRID_PACING_LOG,	/* Hybrid pacing log                69 */
 	TCP_LOG_PRU,		/* TCP protocol user request        70 */
 	TCP_UNUSED_71,		/* old TCP Policer detectionn, not used 71 */
 	TCP_PCM_MEASURE,	/* TCP Path Capacity Measurement    72 */
 	TCP_LOG_END		/* End (keep at end)                73 */
 };
 
 enum tcp_log_states {
 	TCP_LOG_STATE_RATIO_OFF = -2,	/* Log ratio evaluation yielded an OFF
 					   result. Only used for tlb_logstate */
 	TCP_LOG_STATE_CLEAR = -1,	/* Deactivate and clear tracing. Passed
 					   to tcp_log_state_change() but never
 					   stored in any logstate variable */
 	TCP_LOG_STATE_OFF = 0,		/* Pause */
 
 	/* Positively numbered states represent active logging modes */
 	TCP_LOG_STATE_TAIL=1,		/* Keep the trailing events */
 	TCP_LOG_STATE_HEAD=2,		/* Keep the leading events */
 	TCP_LOG_STATE_HEAD_AUTO=3,	/* Keep the leading events, and
 					   automatically dump them to the
 					   device  */
 	TCP_LOG_STATE_CONTINUAL=4,	/* Continually dump the data when full */
 	TCP_LOG_STATE_TAIL_AUTO=5,	/* Keep the trailing events, and
 					   automatically dump them when the
 					   session ends */
 	TCP_LOG_VIA_BBPOINTS=6		/* Log only if the BB point has been configured */
 };
 
 /* Use this if we don't know whether the operation succeeded. */
 #define	ERRNO_UNK	(-1)
 
 /*
  * If the user included dev/tcp_log/tcp_log_dev.h, then include our private
  * headers. Otherwise, there is no reason to pollute all the files with an
  * additional include.
  *
  * This structure is aligned to an 8-byte boundary to match the alignment
  * requirements of (struct tcp_log_buffer).
  */
 #ifdef __tcp_log_dev_h__
 struct tcp_log_header {
 	struct tcp_log_common_header tlh_common;
 #define	tlh_version	tlh_common.tlch_version
 #define	tlh_type	tlh_common.tlch_type
 #define	tlh_length	tlh_common.tlch_length
 	struct in_endpoints	tlh_ie;
 	struct timeval		tlh_offset;	/* Uptime -> UTC offset */
 	char			tlh_id[TCP_LOG_ID_LEN];
 	char			tlh_reason[TCP_LOG_REASON_LEN];
 	char			tlh_tag[TCP_LOG_TAG_LEN];
 	uint8_t		tlh_af;
 	uint8_t		_pad[7];
 } ALIGN_TCP_LOG;
 
 #ifdef _KERNEL
 struct tcp_log_dev_log_queue {
 	struct tcp_log_dev_queue tldl_common;
 	char			tldl_id[TCP_LOG_ID_LEN];
 	char			tldl_reason[TCP_LOG_REASON_LEN];
 	char			tldl_tag[TCP_LOG_TAG_LEN];
 	struct in_endpoints	tldl_ie;
 	struct tcp_log_stailq	tldl_entries;
 	int			tldl_count;
 	uint8_t			tldl_af;
 };
 #endif /* _KERNEL */
 #endif /* __tcp_log_dev_h__ */
 
 /*
  * Defined BBPOINTS that can be used
  * with TCP_LOG_VIA_BBPOINTS.
  */
 #define TCP_BBPOINT_NONE		0
 #define TCP_BBPOINT_REQ_LEVEL_LOGGING	1
 
 /*********************/
 /* TCP Trace points */
 /*********************/
 /*
  * TCP trace points are interesting points within
  * the TCP code that the author/debugger may want
  * to have BB logging enabled if we hit that point.
  * In order to enable a trace point you set the
  * sysctl var net.inet.tcp.bb.tp.number to
  * one of the numbers listed below. You also
  * must make sure net.inet.tcp.bb.tp.bbmode is
  * non-zero, the default is 4 for continuous tracing.
  * You also set in the number of connections you want
  * have get BB logs in net.inet.tcp.bb.tp.count.
  *
  * Count will decrement every time BB logging is assigned
  * to a connection that hit your tracepoint.
  *
  * You can enable all trace points by setting the number
  * to 0xffffffff. You can disable all trace points by
  * setting number to zero (or count to 0).
  *
  * Below are the enumerated list of tracepoints that
  * have currently been defined in the code. Add more
  * as you add a call to rack_trace_point(rack, <name>);
  * where <name> is defined below.
  */
 #define TCP_TP_HWENOBUF		0x00000001	/* When we are doing hardware pacing and hit enobufs */
 #define TCP_TP_ENOBUF		0x00000002	/* When we hit enobufs with software pacing */
 #define TCP_TP_COLLAPSED_WND	0x00000003	/* When a peer to collapses its rwnd on us */
 #define TCP_TP_COLLAPSED_RXT	0x00000004	/* When we actually retransmit a collapsed window rsm */
 #define TCP_TP_REQ_LOG_FAIL	0x00000005	/* We tried to allocate a Request log but had no space */
 #define TCP_TP_RESET_RCV	0x00000006	/* Triggers when we receive a RST */
 #define TCP_TP_POLICER_DET	0x00000007	/* When we detect a policer */
 #define TCP_TP_EXCESS_RXT	TCP_TP_POLICER_DET	/* alias */
 #define TCP_TP_SAD_TRIGGERED	0x00000008	/* Sack Attack Detection triggers */
 #define TCP_TP_SAD_SUSPECT	0x0000000a	/* A sack has supicious information in it */
 #define TCP_TP_PACED_BOTTOM	0x0000000b	/* We have paced at the bottom */
 
 #ifdef _KERNEL
 
 extern uint32_t tcp_trace_point_config;
 extern uint32_t tcp_trace_point_bb_mode;
 extern int32_t tcp_trace_point_count;
 
 /*
  * Returns true if any sort of BB logging is enabled,
  * commonly used throughout the codebase. 
  */
 static inline int
 tcp_bblogging_on(struct tcpcb *tp)
 {
 	if (tp->_t_logstate <= TCP_LOG_STATE_OFF) 
 		return (0);
 	if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS)
 		return (0);
 	return (1);
 }
 
 /*
  * Returns true if we match a specific bbpoint when
  * in TCP_LOG_VIA_BBPOINTS, but also returns true
  * for all the other logging states.
  */
 static inline int
 tcp_bblogging_point_on(struct tcpcb *tp, uint8_t bbpoint)
 {
 	if (tp->_t_logstate <= TCP_LOG_STATE_OFF)
 		return (0);
 	if ((tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) &&
 	    (tp->_t_logpoint == bbpoint))
 		return (1);
 	else if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS)
 		return (0);
 	return (1);
 }
 
 static inline void
 tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint)
 {
 	if ((ls == TCP_LOG_VIA_BBPOINTS) &&
 	    (tp->_t_logstate == TCP_LOG_STATE_OFF)){
 		/*
 		 * We don't allow a BBPOINTS set to override
 		 * other types of BB logging set by other means such
 		 * as the bb_ratio/bb_state URL parameters. In other
 		 * words BBlogging must be *off* in order to turn on
 		 * a BBpoint.
 		 */
 		tp->_t_logpoint = bbpoint;
 		tp->_t_logstate = ls;
 	} else if (ls < TCP_LOG_VIA_BBPOINTS) {
 		tp->_t_logpoint = TCP_BBPOINT_NONE;
 		tp->_t_logstate = ls;
 	}
 }
 
 static inline uint32_t 
 tcp_get_bblog_state(struct tcpcb *tp)
 {
 	return (tp->_t_logstate);
 }
 
 static inline void
 tcp_trace_point(struct tcpcb *tp, int num)
 {
 #ifdef TCP_BLACKBOX
 	if (((tcp_trace_point_config == num)  ||
 	     (tcp_trace_point_config == 0xffffffff)) &&
 	    (tcp_trace_point_bb_mode != 0) &&
 	    (tcp_trace_point_count > 0) &&
 	    (tcp_bblogging_on(tp) == 0)) {
 		int res;
 		res = atomic_fetchadd_int(&tcp_trace_point_count, -1);
 		if (res > 0) {
 			tcp_set_bblog_state(tp, tcp_trace_point_bb_mode, TCP_BBPOINT_NONE);
 		} else {
 			/* Loss a race assure its zero now */
 			tcp_trace_point_count = 0;
 		}
 	}
 #endif
 }
 
 #define	TCP_LOG_BUF_DEFAULT_SESSION_LIMIT	5000
 #define	TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT	5000000
 
 /*
  * TCP_LOG_EVENT_VERBOSE: The same as TCP_LOG_EVENT, except it always
  * tries to record verbose information.
  */
 #define	TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
 	do {								\
 		if (tcp_bblogging_on(tp)) \
 			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
 			    errornum, len, stackinfo, th_hostorder,	\
 			    tp->t_output_caller, __func__, __LINE__, tv);\
 	} while (0)
 
 /*
  * TCP_LOG_EVENT: This is a macro so we can capture function/line
  * information when needed. You can use the macro when you are not
  * doing a lot of prep in the stack specific information i.e. you
  * don't add extras (stackinfo). If you are adding extras which
  * means filling out a stack variable instead use the tcp_log_event()
  * function but enclose the call to the log (and all the setup) in a
  * if (tcp_bblogging_on(tp)) {
  *   ... setup and logging call ...
  * }
  *
  * Always use the macro tcp_bblogging_on() since sometimes the defintions
  * do change.
  *
  * BBlogging also supports the concept of a BBpoint. The idea behind this
  * is that when you set a specific BBpoint on and turn the logging into
  * the BBpoint mode (TCP_LOG_VIA_BBPOINTS) you will be defining very very
  * few of these points to come out. The point is specific to a code you
  * want tied to that one BB logging. This allows you to turn on a much broader
  * scale set of limited logging on more connections without overwhelming the
  * I/O system with too much BBlogs. This of course means you need to be quite
  * careful on how many BBlogs go with each point, but you can have multiple points
  * only one of which is active at a time.
  *
  * To define a point you add it above under the define for TCP_BBPOINT_NONE (which
  * is the default i.e. no point is defined. You then, for your point use the
  * tcp_bblogging_point_on(struct tcpcb *tp, uint8_t bbpoint) inline to enclose
  * your call to tcp_log_event.  Do not use one of the TCP_LOGGING macros else
  * your point will never come out. You specify your defined point in the bbpoint
  * side of the inline. An example of this you can find in rack where the
  * TCP_BBPOINT_REQ_LEVEL_LOGGING is used. There a specific set of logs are generated
  * for each request that tcp is tracking.
  *
  * When turning on BB logging use the inline:
  * tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint)
  * the ls field is the logging state TCP_LOG_STATE_CONTINUAL etc. The
  * bbpoint field is ignored unless the ls field is set to TCP_LOG_VIA_BBPOINTS.
  * Currently there is only a socket option that turns on the non-BBPOINT
  * logging.
  *
  * Prototype:
  * TCP_LOG_EVENT(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
  *     struct sockbuf *txbuf, uint8_t eventid, int errornum,
  *     union tcp_log_stackspecific *stackinfo)
  *
  * tp is mandatory and must be write locked.
  * th is optional; if present, it will appear in the record.
  * rxbuf and txbuf are optional; if present, they will appear in the record.
  * eventid is mandatory.
  * errornum is mandatory (it indicates the success or failure of the
  *     operation associated with the event).
  * len indicates the length of the packet. If no packet, use 0.
  * stackinfo is optional; if present, it will appear in the record.
  */
 struct tcpcb;
 #ifdef TCP_LOG_FORCEVERBOSE
 #define	TCP_LOG_EVENT	TCP_LOG_EVENT_VERBOSE
 #else
 #define	TCP_LOG_EVENT(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder) \
 	do {								\
 		if (tcp_log_verbose)					\
 			TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf,	\
 			    eventid, errornum, len, stackinfo,		\
 			    th_hostorder, NULL);			\
 		else if (tcp_bblogging_on(tp))				\
 			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
 			    errornum, len, stackinfo, th_hostorder,	\
 			    NULL, NULL, 0, NULL);			\
 	} while (0)
 #endif /* TCP_LOG_FORCEVERBOSE */
 #define	TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
 	do {								\
 		if (tcp_bblogging_on(tp))				\
 			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
 			    errornum, len, stackinfo, th_hostorder,	\
 			    NULL, NULL, 0, tv);				\
 	} while (0)
 
 #ifdef TCP_BLACKBOX
 extern bool tcp_log_verbose;
 void tcp_log_drain(struct tcpcb *tp);
 int tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force);
 void tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason);
 struct tcp_log_buffer *tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
     union tcp_log_stackspecific *stackinfo, int th_hostorder,
     const char *output_caller, const char *func, int line, const struct timeval *tv);
 size_t tcp_log_get_id(struct tcpcb *tp, char *buf);
 size_t tcp_log_get_tag(struct tcpcb *tp, char *buf);
 u_int tcp_log_get_id_cnt(struct tcpcb *tp);
 int tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp);
 void tcp_log_init(void);
 int tcp_log_set_id(struct tcpcb *tp, char *id);
 int tcp_log_set_tag(struct tcpcb *tp, char *tag);
 int tcp_log_state_change(struct tcpcb *tp, int state);
 void tcp_log_tcpcbinit(struct tcpcb *tp);
 void tcp_log_tcpcbfini(struct tcpcb *tp);
 void tcp_log_flowend(struct tcpcb *tp);
 void tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes,
     int flags);
 int tcp_log_apply_ratio(struct tcpcb *tp, int ratio);
 #else /* !TCP_BLACKBOX */
 #define tcp_log_verbose	(false)
 
 static inline struct tcp_log_buffer *
 tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
     union tcp_log_stackspecific *stackinfo, int th_hostorder,
     const char *output_caller, const char *func, int line,
     const struct timeval *tv)
 {
 
 	return (NULL);
 }
 #endif /* TCP_BLACKBOX */
 
+#ifdef DDB
+void db_print_bblog_entries(struct tcp_log_stailq *log_entries, int indent);
+#endif
+
 #endif	/* _KERNEL */
 #endif	/* __tcp_log_buf_h__ */
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index a7a1d98fd193..1eee45e3c2ac 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1,3248 +1,3255 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2006-2007 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_kern_tls.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/arb.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/kernel.h>
 #include <sys/ktls.h>
 #include <sys/qmath.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif /* INET6 */
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/stats.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_log_buf.h>
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
 #include <netinet/tcp_hpts.h>
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 #include <netipsec/ipsec_support.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 
 /*
  * TCP protocol interface to socket abstraction.
  */
 #ifdef INET
 static int	tcp_connect(struct tcpcb *, struct sockaddr_in *,
 		    struct thread *td);
 #endif /* INET */
 #ifdef INET6
 static int	tcp6_connect(struct tcpcb *, struct sockaddr_in6 *,
 		    struct thread *td);
 #endif /* INET6 */
 static void	tcp_disconnect(struct tcpcb *);
 static void	tcp_usrclosed(struct tcpcb *);
 static void	tcp_fill_info(const struct tcpcb *, struct tcp_info *);
 
 static int	tcp_pru_options_support(struct tcpcb *tp, int flags);
 
 static void
 tcp_bblog_pru(struct tcpcb *tp, uint32_t pru, int error)
 {
 	struct tcp_log_buffer *lgb;
 
 	KASSERT(tp != NULL, ("tcp_bblog_pru: tp == NULL"));
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	if (tcp_bblogging_on(tp)) {
 		lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_PRU, error,
 		    0, NULL, false, NULL, NULL, 0, NULL);
 	} else {
 		lgb = NULL;
 	}
 	if (lgb != NULL) {
 		if (error >= 0) {
 			lgb->tlb_errno = (uint32_t)error;
 		}
 		lgb->tlb_flex1 = pru;
 	}
 }
 
 /*
  * TCP attaches to socket via pr_attach(), reserving space,
  * and an internet control block.
  */
 static int
 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
 
 	error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
 	if (error)
 		goto out;
 
 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
 	so->so_snd.sb_flags |= SB_AUTOSIZE;
 	error = in_pcballoc(so, &V_tcbinfo);
 	if (error)
 		goto out;
 	inp = sotoinpcb(so);
 	tp = tcp_newtcpcb(inp, NULL);
 	if (tp == NULL) {
 		error = ENOBUFS;
 		in_pcbfree(inp);
 		goto out;
 	}
 	tp->t_state = TCPS_CLOSED;
 	tcp_bblog_pru(tp, PRU_ATTACH, error);
 	INP_WUNLOCK(inp);
 	TCPSTATES_INC(TCPS_CLOSED);
 out:
 	TCP_PROBE2(debug__user, tp, PRU_ATTACH);
 	return (error);
 }
 
 /*
  * tcp_usr_detach is called when the socket layer loses its final reference
  * to the socket, be it a file descriptor reference, a reference from TCP,
  * etc.  At this point, there is only one case in which we will keep around
  * inpcb state: time wait.
  */
 static void
 tcp_usr_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	INP_WLOCK(inp);
 	KASSERT(so->so_pcb == inp && inp->inp_socket == so,
 		("%s: socket %p inp %p mismatch", __func__, so, inp));
 
 	tp = intotcpcb(inp);
 
 	KASSERT(inp->inp_flags & INP_DROPPED ||
 	    tp->t_state < TCPS_SYN_SENT,
 	    ("%s: inp %p not dropped or embryonic", __func__, inp));
 
 	tcp_discardcb(tp);
 	in_pcbfree(inp);
 }
 
 #ifdef INET
 /*
  * Give the socket an address.
  */
 static int
 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in *sinp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (EINVAL);
 	}
 	tp = intotcpcb(inp);
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_family != AF_INET) {
 		/*
 		 * Preserve compatibility with old programs.
 		 */
 		if (nam->sa_family != AF_UNSPEC ||
 		    nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
 		    sinp->sin_addr.s_addr != INADDR_ANY) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 		nam->sa_family = AF_INET;
 	}
 	if (nam->sa_len != sizeof(*sinp)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Must check for multicast addresses and disallow binding
 	 * to them.
 	 */
 	if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	INP_HASH_WLOCK(&V_tcbinfo);
 	error = in_pcbbind(inp, sinp, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
 	    td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
 	tcp_bblog_pru(tp, PRU_BIND, error);
 	TCP_PROBE2(debug__user, tp, PRU_BIND);
 	INP_WUNLOCK(inp);
 
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in6 *sin6;
 	u_char vflagsav;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (EINVAL);
 	}
 	tp = intotcpcb(inp);
 
 	vflagsav = inp->inp_vflag;
 
 	sin6 = (struct sockaddr_in6 *)nam;
 	if (nam->sa_family != AF_INET6) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (nam->sa_len != sizeof(*sin6)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Must check for multicast addresses and disallow binding
 	 * to them.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 
 	INP_HASH_WLOCK(&V_tcbinfo);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 #ifdef INET
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			inp->inp_vflag |= INP_IPV4;
 		else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 			struct sockaddr_in sin;
 
 			in6_sin6_2_sin(&sin, sin6);
 			if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 				error = EAFNOSUPPORT;
 				INP_HASH_WUNLOCK(&V_tcbinfo);
 				goto out;
 			}
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 			error = in_pcbbind(inp, &sin, 0, td->td_ucred);
 			INP_HASH_WUNLOCK(&V_tcbinfo);
 			goto out;
 		}
 	}
 #endif
 	error = in6_pcbbind(inp, sin6, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
 	    td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
 	if (error != 0)
 		inp->inp_vflag = vflagsav;
 	tcp_bblog_pru(tp, PRU_BIND, error);
 	TCP_PROBE2(debug__user, tp, PRU_BIND);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Prepare to accept connections.
  */
 static int
 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int error = 0;
 	bool already_listening;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (EINVAL);
 	}
 	tp = intotcpcb(inp);
 
 	SOCK_LOCK(so);
 	already_listening = SOLISTENING(so);
 	error = solisten_proto_check(so);
 	if (error != 0) {
 		SOCK_UNLOCK(so);
 		goto out;
 	}
 	if (inp->inp_lport == 0) {
 		INP_HASH_WLOCK(&V_tcbinfo);
 		error = in_pcbbind(inp, NULL,
 		    V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
 		INP_HASH_WUNLOCK(&V_tcbinfo);
 	}
 	if (error == 0) {
 		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
 #ifdef TCP_OFFLOAD
 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
 			tcp_offload_listen_start(tp);
 #endif
 	} else {
 		solisten_proto_abort(so);
 	}
 	SOCK_UNLOCK(so);
 	if (already_listening)
 		goto out;
 
 	if (error == 0)
 		in_pcblisten(inp);
 	if (tp->t_flags & TF_FASTOPEN)
 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
 
 out:
 	tcp_bblog_pru(tp, PRU_LISTEN, error);
 	TCP_PROBE2(debug__user, tp, PRU_LISTEN);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	u_char vflagsav;
 	int error = 0;
 	bool already_listening;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (EINVAL);
 	}
 	tp = intotcpcb(inp);
 
 	vflagsav = inp->inp_vflag;
 
 	SOCK_LOCK(so);
 	already_listening = SOLISTENING(so);
 	error = solisten_proto_check(so);
 	if (error != 0) {
 		SOCK_UNLOCK(so);
 		goto out;
 	}
 	INP_HASH_WLOCK(&V_tcbinfo);
 	if (inp->inp_lport == 0) {
 		inp->inp_vflag &= ~INP_IPV4;
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 			inp->inp_vflag |= INP_IPV4;
 		error = in6_pcbbind(inp, NULL,
 		    V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
 	}
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error == 0) {
 		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
 #ifdef TCP_OFFLOAD
 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
 			tcp_offload_listen_start(tp);
 #endif
 	} else {
 		solisten_proto_abort(so);
 	}
 	SOCK_UNLOCK(so);
 	if (already_listening)
 		goto out;
 
 	if (error == 0)
 		in_pcblisten(inp);
 	if (tp->t_flags & TF_FASTOPEN)
 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
 
 	if (error != 0)
 		inp->inp_vflag = vflagsav;
 
 out:
 	tcp_bblog_pru(tp, PRU_LISTEN, error);
 	TCP_PROBE2(debug__user, tp, PRU_LISTEN);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Initiate connection to peer.
  * Create a template for use in transmissions on this connection.
  * Enter SYN_SENT state, and mark socket as connecting.
  * Start keep-alive timer, and seed output sequence space.
  * Send initial segment on connection.
  */
 static int
 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct epoch_tracker et;
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in *sinp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNREFUSED);
 	}
 	tp = intotcpcb(inp);
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_family != AF_INET) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (nam->sa_len != sizeof (*sinp)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Must disallow TCP ``connections'' to multicast addresses.
 	 */
 	if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
 		error = EACCES;
 		goto out;
 	}
 	if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
 		goto out;
 	if (SOLISTENING(so) || so->so_options & SO_REUSEPORT_LB) {
 		error = EOPNOTSUPP;
 		goto out;
 	}
 	NET_EPOCH_ENTER(et);
 	if ((error = tcp_connect(tp, sinp, td)) != 0)
 		goto out_in_epoch;
 #ifdef TCP_OFFLOAD
 	if (registered_toedevs > 0 &&
 	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 	    (error = tcp_offload_connect(so, nam)) == 0)
 		goto out_in_epoch;
 #endif
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 	error = tcp_output(tp);
 	KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
 	    ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
 out_in_epoch:
 	NET_EPOCH_EXIT(et);
 out:
 	tcp_bblog_pru(tp, PRU_CONNECT, error);
 	TCP_PROBE2(debug__user, tp, PRU_CONNECT);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct epoch_tracker et;
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct sockaddr_in6 *sin6;
 	u_int8_t incflagsav;
 	u_char vflagsav;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNREFUSED);
 	}
 	tp = intotcpcb(inp);
 
 	vflagsav = inp->inp_vflag;
 	incflagsav = inp->inp_inc.inc_flags;
 
 	sin6 = (struct sockaddr_in6 *)nam;
 	if (nam->sa_family != AF_INET6) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (nam->sa_len != sizeof (*sin6)) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Must disallow TCP ``connections'' to multicast addresses.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 	if (SOLISTENING(so) || so->so_options & SO_REUSEPORT_LB) {
 		error = EOPNOTSUPP;
 		goto out;
 	}
 #ifdef INET
 	/*
 	 * XXXRW: Some confusion: V4/V6 flags relate to binding, and
 	 * therefore probably require the hash lock, which isn't held here.
 	 * Is this a significant problem?
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		struct sockaddr_in sin;
 
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 			error = EINVAL;
 			goto out;
 		}
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 
 		in6_sin6_2_sin(&sin, sin6);
 		if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 		if (ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) {
 			error = EACCES;
 			goto out;
 		}
 		if ((error = prison_remote_ip4(td->td_ucred,
 		    &sin.sin_addr)) != 0)
 			goto out;
 		inp->inp_vflag |= INP_IPV4;
 		inp->inp_vflag &= ~INP_IPV6;
 		NET_EPOCH_ENTER(et);
 		if ((error = tcp_connect(tp, &sin, td)) != 0)
 			goto out_in_epoch;
 #ifdef TCP_OFFLOAD
 		if (registered_toedevs > 0 &&
 		    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 		    (error = tcp_offload_connect(so, nam)) == 0)
 			goto out_in_epoch;
 #endif
 		error = tcp_output(tp);
 		goto out_in_epoch;
 	} else {
 		if ((inp->inp_vflag & INP_IPV6) == 0) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 	}
 #endif
 	if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
 		goto out;
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	inp->inp_inc.inc_flags |= INC_ISIPV6;
 	NET_EPOCH_ENTER(et);
 	if ((error = tcp6_connect(tp, sin6, td)) != 0)
 		goto out_in_epoch;
 #ifdef TCP_OFFLOAD
 	if (registered_toedevs > 0 &&
 	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 	    (error = tcp_offload_connect(so, nam)) == 0)
 		goto out_in_epoch;
 #endif
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 	error = tcp_output(tp);
 out_in_epoch:
 	NET_EPOCH_EXIT(et);
 out:
 	KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
 	    ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
 	/*
 	 * If the implicit bind in the connect call fails, restore
 	 * the flags we modified.
 	 */
 	if (error != 0 && inp->inp_lport == 0) {
 		inp->inp_vflag = vflagsav;
 		inp->inp_inc.inc_flags = incflagsav;
 	}
 
 	tcp_bblog_pru(tp, PRU_CONNECT, error);
 	TCP_PROBE2(debug__user, tp, PRU_CONNECT);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 /*
  * Initiate disconnect from peer.
  * If connection never passed embryonic stage, just drop;
  * else if don't need to let data drain, then can just drop anyways,
  * else have to begin TCP shutdown process: mark socket disconnecting,
  * drain unread data, state switch to reflect user close, and
  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
  * when peer sends FIN and acks ours.
  *
  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  */
 static int
 tcp_usr_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	struct epoch_tracker et;
 
 	NET_EPOCH_ENTER(et);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	if (tp->t_state == TCPS_TIME_WAIT)
 		goto out;
 	tcp_disconnect(tp);
 out:
 	tcp_bblog_pru(tp, PRU_DISCONNECT, 0);
 	TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 	return (0);
 }
 
 #ifdef INET
 /*
  * Accept a connection.  Essentially all the work is done at higher levels;
  * just return the address of the peer, storing through addr.
  */
 static int
 tcp_usr_accept(struct socket *so, struct sockaddr *sa)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int error = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNABORTED);
 	}
 	tp = intotcpcb(inp);
 
 	if (so->so_state & SS_ISDISCONNECTED)
 		error = ECONNABORTED;
 	else
 		*(struct sockaddr_in *)sa = (struct sockaddr_in ){
 			.sin_family = AF_INET,
 			.sin_len = sizeof(struct sockaddr_in),
 			.sin_port = inp->inp_fport,
 			.sin_addr = inp->inp_faddr,
 		};
 	tcp_bblog_pru(tp, PRU_ACCEPT, error);
 	TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
 	INP_WUNLOCK(inp);
 
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_accept(struct socket *so, struct sockaddr *sa)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int error = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNABORTED);
 	}
 	tp = intotcpcb(inp);
 
 	if (so->so_state & SS_ISDISCONNECTED) {
 		error = ECONNABORTED;
 	} else {
 		if (inp->inp_vflag & INP_IPV4) {
 			struct sockaddr_in sin = {
 				.sin_family = AF_INET,
 				.sin_len = sizeof(struct sockaddr_in),
 				.sin_port = inp->inp_fport,
 				.sin_addr = inp->inp_faddr,
 			};
 			in6_sin_2_v4mapsin6(&sin, (struct sockaddr_in6 *)sa);
 		} else {
 			*(struct sockaddr_in6 *)sa = (struct sockaddr_in6 ){
 				.sin6_family = AF_INET6,
 				.sin6_len = sizeof(struct sockaddr_in6),
 				.sin6_port = inp->inp_fport,
 				.sin6_addr = inp->in6p_faddr,
 			};
 			/* XXX: should catch errors */
 			(void)sa6_recoverscope((struct sockaddr_in6 *)sa);
 		}
 	}
 
 	tcp_bblog_pru(tp, PRU_ACCEPT, error);
 	TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
 	INP_WUNLOCK(inp);
 
 	return (error);
 }
 #endif /* INET6 */
 
 /*
  * Mark the connection as being incapable of further output.
  */
 static int
 tcp_usr_shutdown(struct socket *so, enum shutdown_how how)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	int error = 0;
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		if (how != SHUT_WR) {
 			so->so_error = ECONNABORTED;
 			solisten_wakeup(so);	/* unlocks so */
 		} else
 			SOCK_UNLOCK(so);
 		return (ENOTCONN);
 	} else if ((so->so_state &
 	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
 		SOCK_UNLOCK(so);
 		return (ENOTCONN);
 	}
 	SOCK_UNLOCK(so);
 
 	switch (how) {
 	case SHUT_RD:
 		sorflush(so);
 		break;
 	case SHUT_RDWR:
 		sorflush(so);
 		/* FALLTHROUGH */
 	case SHUT_WR:
 		/*
 		 * XXXGL: mimicing old soshutdown() here. But shouldn't we
 		 * return ECONNRESEST for SHUT_RD as well?
 		 */
 		INP_WLOCK(inp);
 		if (inp->inp_flags & INP_DROPPED) {
 			INP_WUNLOCK(inp);
 			return (ECONNRESET);
 		}
 
 		socantsendmore(so);
 		NET_EPOCH_ENTER(et);
 		tcp_usrclosed(tp);
 		error = tcp_output_nodrop(tp);
 		tcp_bblog_pru(tp, PRU_SHUTDOWN, error);
 		TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
 		error = tcp_unlock_or_drop(tp, error);
 		NET_EPOCH_EXIT(et);
 	}
 	wakeup(&so->so_timeo);
 
 	return (error);
 }
 
 /*
  * After a receive, possibly send window update to peer.
  */
 static int
 tcp_usr_rcvd(struct socket *so, int flags)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int outrv = 0, error = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	NET_EPOCH_ENTER(et);
 	/*
 	 * For passively-created TFO connections, don't attempt a window
 	 * update while still in SYN_RECEIVED as this may trigger an early
 	 * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
 	 * application response data, or failing that, when the DELACK timer
 	 * expires.
 	 */
 	if ((tp->t_flags & TF_FASTOPEN) && (tp->t_state == TCPS_SYN_RECEIVED))
 		goto out;
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_rcvd(tp);
 	else
 #endif
 		outrv = tcp_output_nodrop(tp);
 out:
 	tcp_bblog_pru(tp, PRU_RCVD, error);
 	TCP_PROBE2(debug__user, tp, PRU_RCVD);
 	(void) tcp_unlock_or_drop(tp, outrv);
 	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
 /*
  * Do a send by putting data in output queue and updating urgent
  * marker if URG set.  Possibly send more data.  Unlike the other
  * pr_*() routines, the mbuf chains are our responsibility.  We
  * must either enqueue them or free them.  The other pr_*() routines
  * generally are caller-frees.
  */
 static int
 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	struct epoch_tracker et;
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 #ifdef INET
 #ifdef INET6
 	struct sockaddr_in sin;
 #endif
 	struct sockaddr_in *sinp;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 	int isipv6;
 #endif
 	u_int8_t incflagsav;
 	u_char vflagsav;
 	bool restoreflags;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 			m_freem(m);
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	vflagsav = inp->inp_vflag;
 	incflagsav = inp->inp_inc.inc_flags;
 	restoreflags = false;
 
 	NET_EPOCH_ENTER(et);
 	if (control != NULL) {
 		/* TCP doesn't do control messages (rights, creds, etc) */
 		if (control->m_len > 0) {
 			m_freem(control);
 			error = EINVAL;
 			goto out;
 		}
 		m_freem(control);	/* empty control, just free it */
 	}
 
 	if ((flags & PRUS_OOB) != 0 &&
 	    (error = tcp_pru_options_support(tp, PRUS_OOB)) != 0)
 		goto out;
 
 	if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
 		if (tp->t_state == TCPS_LISTEN) {
 			error = EINVAL;
 			goto out;
 		}
 		switch (nam->sa_family) {
 #ifdef INET
 		case AF_INET:
 			sinp = (struct sockaddr_in *)nam;
 			if (sinp->sin_len != sizeof(struct sockaddr_in)) {
 				error = EINVAL;
 				goto out;
 			}
 			if ((inp->inp_vflag & INP_IPV6) != 0) {
 				error = EAFNOSUPPORT;
 				goto out;
 			}
 			if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
 				error = EAFNOSUPPORT;
 				goto out;
 			}
 			if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
 				error = EACCES;
 				goto out;
 			}
 			if ((error = prison_remote_ip4(td->td_ucred,
 			    &sinp->sin_addr)))
 				goto out;
 #ifdef INET6
 			isipv6 = 0;
 #endif
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			sin6 = (struct sockaddr_in6 *)nam;
 			if (sin6->sin6_len != sizeof(*sin6)) {
 				error = EINVAL;
 				goto out;
 			}
 			if ((inp->inp_vflag & INP_IPV6PROTO) == 0) {
 				error = EAFNOSUPPORT;
 				goto out;
 			}
 			if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 				error = EAFNOSUPPORT;
 				goto out;
 			}
 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 #ifdef INET
 				if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 					error = EINVAL;
 					goto out;
 				}
 				if ((inp->inp_vflag & INP_IPV4) == 0) {
 					error = EAFNOSUPPORT;
 					goto out;
 				}
 				restoreflags = true;
 				inp->inp_vflag &= ~INP_IPV6;
 				sinp = &sin;
 				in6_sin6_2_sin(sinp, sin6);
 				if (IN_MULTICAST(
 				    ntohl(sinp->sin_addr.s_addr))) {
 					error = EAFNOSUPPORT;
 					goto out;
 				}
 				if ((error = prison_remote_ip4(td->td_ucred,
 				    &sinp->sin_addr)))
 					goto out;
 				isipv6 = 0;
 #else /* !INET */
 				error = EAFNOSUPPORT;
 				goto out;
 #endif /* INET */
 			} else {
 				if ((inp->inp_vflag & INP_IPV6) == 0) {
 					error = EAFNOSUPPORT;
 					goto out;
 				}
 				restoreflags = true;
 				inp->inp_vflag &= ~INP_IPV4;
 				inp->inp_inc.inc_flags |= INC_ISIPV6;
 				if ((error = prison_remote_ip6(td->td_ucred,
 				    &sin6->sin6_addr)))
 					goto out;
 				isipv6 = 1;
 			}
 			break;
 #endif /* INET6 */
 		default:
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 	}
 	if (!(flags & PRUS_OOB)) {
 		if (tp->t_acktime == 0)
 			tp->t_acktime = ticks;
 		sbappendstream(&so->so_snd, m, flags);
 		m = NULL;
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			KASSERT(tp->t_state == TCPS_CLOSED,
 			    ("%s: tp %p is listening", __func__, tp));
 
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
 			 * initialize maxseg using peer's cached MSS.
 			 */
 #ifdef INET6
 			if (isipv6)
 				error = tcp6_connect(tp, sin6, td);
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 				error = tcp_connect(tp, sinp, td);
 #endif
 			/*
 			 * The bind operation in tcp_connect succeeded. We
 			 * no longer want to restore the flags if later
 			 * operations fail.
 			 */
 			if (error == 0 || inp->inp_lport != 0)
 				restoreflags = false;
 
 			if (error) {
 				/* m is freed if PRUS_NOTREADY is unset. */
 				sbflush(&so->so_snd);
 				goto out;
 			}
 			if (tp->t_flags & TF_FASTOPEN)
 				tcp_fastopen_connect(tp);
 			else {
 				tp->snd_wnd = TTCP_CLIENT_SND_WND;
 				tcp_mss(tp, -1);
 			}
 		}
 		if (flags & PRUS_EOF) {
 			/*
 			 * Close the send side of the connection after
 			 * the data is sent.
 			 */
 			socantsendmore(so);
 			tcp_usrclosed(tp);
 		}
 		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 		    ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
 		    (tp->t_fbyte_out == 0) &&
 		    (so->so_snd.sb_ccc > 0)) {
 			tp->t_fbyte_out = ticks;
 			if (tp->t_fbyte_out == 0)
 				tp->t_fbyte_out = 1;
 			if (tp->t_fbyte_out && tp->t_fbyte_in)
 				tp->t_flags2 |= TF2_FBYTES_COMPLETE;
 		}
 		if (!(inp->inp_flags & INP_DROPPED) &&
 		    !(flags & PRUS_NOTREADY)) {
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags |= TF_MORETOCOME;
 			error = tcp_output_nodrop(tp);
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags &= ~TF_MORETOCOME;
 		}
 	} else {
 		/*
 		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
 		 */
 		SOCK_SENDBUF_LOCK(so);
 		if (sbspace(&so->so_snd) < -512) {
 			SOCK_SENDBUF_UNLOCK(so);
 			error = ENOBUFS;
 			goto out;
 		}
 		/*
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section.
 		 * Otherwise, snd_up should be one lower.
 		 */
 		if (tp->t_acktime == 0)
 			tp->t_acktime = ticks;
 		sbappendstream_locked(&so->so_snd, m, flags);
 		SOCK_SENDBUF_UNLOCK(so);
 		m = NULL;
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
 			 * initialize maxseg using peer's cached MSS.
 			 */
 
 			/*
 			 * Not going to contemplate SYN|URG
 			 */
 			if (tp->t_flags & TF_FASTOPEN)
 				tp->t_flags &= ~TF_FASTOPEN;
 #ifdef INET6
 			if (isipv6)
 				error = tcp6_connect(tp, sin6, td);
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 				error = tcp_connect(tp, sinp, td);
 #endif
 			/*
 			 * The bind operation in tcp_connect succeeded. We
 			 * no longer want to restore the flags if later
 			 * operations fail.
 			 */
 			if (error == 0 || inp->inp_lport != 0)
 				restoreflags = false;
 
 			if (error != 0) {
 				/* m is freed if PRUS_NOTREADY is unset. */
 				sbflush(&so->so_snd);
 				goto out;
 			}
 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
 			tcp_mss(tp, -1);
 		}
 		tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
 		if ((flags & PRUS_NOTREADY) == 0) {
 			tp->t_flags |= TF_FORCEDATA;
 			error = tcp_output_nodrop(tp);
 			tp->t_flags &= ~TF_FORCEDATA;
 		}
 	}
 	TCP_LOG_EVENT(tp, NULL,
 	    &inp->inp_socket->so_rcv,
 	    &inp->inp_socket->so_snd,
 	    TCP_LOG_USERSEND, error,
 	    0, NULL, false);
 
 out:
 	/*
 	 * In case of PRUS_NOTREADY, the caller or tcp_usr_ready() is
 	 * responsible for freeing memory.
 	 */
 	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 		m_freem(m);
 
 	/*
 	 * If the request was unsuccessful and we changed flags,
 	 * restore the original flags.
 	 */
 	if (error != 0 && restoreflags) {
 		inp->inp_vflag = vflagsav;
 		inp->inp_inc.inc_flags = incflagsav;
 	}
 	tcp_bblog_pru(tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
 		      ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND), error);
 	TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
 		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 	error = tcp_unlock_or_drop(tp, error);
 	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
 static int
 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct epoch_tracker et;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int error;
 
 	inp = sotoinpcb(so);
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		mb_free_notready(m, count);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	SOCK_SENDBUF_LOCK(so);
 	error = sbready(&so->so_snd, m, count);
 	SOCK_SENDBUF_UNLOCK(so);
 	if (error) {
 		INP_WUNLOCK(inp);
 		return (error);
 	}
 	NET_EPOCH_ENTER(et);
 	error = tcp_output_unlock(tp);
 	NET_EPOCH_EXIT(et);
 
 	return (error);
 }
 
 /*
  * Abort the TCP.  Drop the connection abruptly.
  */
 static void
 tcp_usr_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct epoch_tracker et;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
 
 	NET_EPOCH_ENTER(et);
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_abort: inp_socket == NULL"));
 
 	/*
 	 * If we still have full TCP state, and we're not dropped, drop.
 	 */
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		tp = intotcpcb(inp);
 		tp = tcp_drop(tp, ECONNABORTED);
 		if (tp == NULL)
 			goto dropped;
 		tcp_bblog_pru(tp, PRU_ABORT, 0);
 		TCP_PROBE2(debug__user, tp, PRU_ABORT);
 	}
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		soref(so);
 		inp->inp_flags |= INP_SOCKREF;
 	}
 	INP_WUNLOCK(inp);
 dropped:
 	NET_EPOCH_EXIT(et);
 }
 
 /*
  * TCP socket is closed.  Start friendly disconnect.
  */
 static void
 tcp_usr_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct epoch_tracker et;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
 
 	NET_EPOCH_ENTER(et);
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_close: inp_socket == NULL"));
 
 	/*
 	 * If we are still connected and we're not dropped, initiate
 	 * a disconnect.
 	 */
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		tp = intotcpcb(inp);
 		if (tp->t_state != TCPS_TIME_WAIT) {
 			tp->t_flags |= TF_CLOSED;
 			tcp_disconnect(tp);
 			tcp_bblog_pru(tp, PRU_CLOSE, 0);
 			TCP_PROBE2(debug__user, tp, PRU_CLOSE);
 		}
 	}
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		soref(so);
 		inp->inp_flags |= INP_SOCKREF;
 	}
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
 }
 
 static int
 tcp_pru_options_support(struct tcpcb *tp, int flags)
 {
 	/*
 	 * If the specific TCP stack has a pru_options
 	 * specified then it does not always support
 	 * all the PRU_XX options and we must ask it.
 	 * If the function is not specified then all
 	 * of the PRU_XX options are supported.
 	 */
 	int ret = 0;
 
 	if (tp->t_fb->tfb_pru_options) {
 		ret = (*tp->t_fb->tfb_pru_options)(tp, flags);
 	}
 	return (ret);
 }
 
 /*
  * Receive out-of-band data.
  */
 static int
 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	error = tcp_pru_options_support(tp, PRUS_OOB);
 	if (error) {
 		goto out;
 	}
 	if ((so->so_oobmark == 0 &&
 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 	    so->so_options & SO_OOBINLINE ||
 	    tp->t_oobflags & TCPOOB_HADDATA) {
 		error = EINVAL;
 		goto out;
 	}
 	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
 		error = EWOULDBLOCK;
 		goto out;
 	}
 	m->m_len = 1;
 	*mtod(m, caddr_t) = tp->t_iobc;
 	if ((flags & MSG_PEEK) == 0)
 		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 
 out:
 	tcp_bblog_pru(tp, PRU_RCVOOB, error);
 	TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 #ifdef INET
 struct protosw tcp_protosw = {
 	.pr_type =		SOCK_STREAM,
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED | PR_IMPLOPCL | PR_WANTRCVD |
 				    PR_CAPATTACH,
 	.pr_ctloutput =		tcp_ctloutput,
 	.pr_abort =		tcp_usr_abort,
 	.pr_accept =		tcp_usr_accept,
 	.pr_attach =		tcp_usr_attach,
 	.pr_bind =		tcp_usr_bind,
 	.pr_connect =		tcp_usr_connect,
 	.pr_control =		in_control,
 	.pr_detach =		tcp_usr_detach,
 	.pr_disconnect =	tcp_usr_disconnect,
 	.pr_listen =		tcp_usr_listen,
 	.pr_peeraddr =		in_getpeeraddr,
 	.pr_rcvd =		tcp_usr_rcvd,
 	.pr_rcvoob =		tcp_usr_rcvoob,
 	.pr_send =		tcp_usr_send,
 	.pr_sendfile_wait =	sendfile_wait_generic,
 	.pr_ready =		tcp_usr_ready,
 	.pr_shutdown =		tcp_usr_shutdown,
 	.pr_sockaddr =		in_getsockaddr,
 	.pr_sosetlabel =	in_pcbsosetlabel,
 	.pr_close =		tcp_usr_close,
 };
 #endif /* INET */
 
 #ifdef INET6
 struct protosw tcp6_protosw = {
 	.pr_type =		SOCK_STREAM,
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED | PR_IMPLOPCL |PR_WANTRCVD |
 				    PR_CAPATTACH,
 	.pr_ctloutput =		tcp_ctloutput,
 	.pr_abort =		tcp_usr_abort,
 	.pr_accept =		tcp6_usr_accept,
 	.pr_attach =		tcp_usr_attach,
 	.pr_bind =		tcp6_usr_bind,
 	.pr_connect =		tcp6_usr_connect,
 	.pr_control =		in6_control,
 	.pr_detach =		tcp_usr_detach,
 	.pr_disconnect =	tcp_usr_disconnect,
 	.pr_listen =		tcp6_usr_listen,
 	.pr_peeraddr =		in6_mapped_peeraddr,
 	.pr_rcvd =		tcp_usr_rcvd,
 	.pr_rcvoob =		tcp_usr_rcvoob,
 	.pr_send =		tcp_usr_send,
 	.pr_sendfile_wait =	sendfile_wait_generic,
 	.pr_ready =		tcp_usr_ready,
 	.pr_shutdown =		tcp_usr_shutdown,
 	.pr_sockaddr =		in6_mapped_sockaddr,
 	.pr_sosetlabel =	in_pcbsosetlabel,
 	.pr_close =		tcp_usr_close,
 };
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Common subroutine to open a TCP connection to remote host specified
  * by struct sockaddr_in.  Call in_pcbconnect() to choose local host address
  * and assign a local port number and install the inpcb into the hash.
  * Initialize connection parameters and enter SYN-SENT state.
  */
 static int
 tcp_connect(struct tcpcb *tp, struct sockaddr_in *sin, struct thread *td)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct socket *so = tptosocket(tp);
 	int error;
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(inp);
 
 	if (__predict_false((so->so_state &
 	    (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING |
 	    SS_ISDISCONNECTED)) != 0))
 		return (EISCONN);
 
 	INP_HASH_WLOCK(&V_tcbinfo);
 	error = in_pcbconnect(inp, sin, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Compute window scaling to request:
 	 * Scale to fit into sweet spot.  See tcp_syncache.c.
 	 * XXX: This should move to tcp_output().
 	 */
 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 		tp->request_r_scale++;
 
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
 	tcp_state_change(tp, TCPS_SYN_SENT);
 	tp->iss = tcp_new_isn(&inp->inp_inc);
 	if (tp->t_flags & TF_REQ_TSTMP)
 		tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
 	tcp_sendseqinit(tp);
 
 	return (0);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct socket *so = tptosocket(tp);
 	int error;
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(inp);
 
 	if (__predict_false((so->so_state &
 	    (SS_ISCONNECTING | SS_ISCONNECTED)) != 0))
 		return (EISCONN);
 
 	INP_HASH_WLOCK(&V_tcbinfo);
 	error = in6_pcbconnect(inp, sin6, td->td_ucred, true);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error != 0)
 		return (error);
 
 	/* Compute window scaling to request.  */
 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 		tp->request_r_scale++;
 
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
 	tcp_state_change(tp, TCPS_SYN_SENT);
 	tp->iss = tcp_new_isn(&inp->inp_inc);
 	if (tp->t_flags & TF_REQ_TSTMP)
 		tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
 	tcp_sendseqinit(tp);
 
 	return (0);
 }
 #endif /* INET6 */
 
 /*
  * Export TCP internal state information via a struct tcp_info, based on the
  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
  * (TCP state machine, etc).  We export all information using FreeBSD-native
  * constants -- for example, the numeric values for tcpi_state will differ
  * from Linux.
  */
 void
 tcp_fill_info(const struct tcpcb *tp, struct tcp_info *ti)
 {
 
 	INP_LOCK_ASSERT(tptoinpcb(tp));
 	bzero(ti, sizeof(*ti));
 
 	ti->tcpi_state = tp->t_state;
 	if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
 		ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		ti->tcpi_options |= TCPI_OPT_SACK;
 	if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
 		ti->tcpi_options |= TCPI_OPT_WSCALE;
 		ti->tcpi_snd_wscale = tp->snd_scale;
 		ti->tcpi_rcv_wscale = tp->rcv_scale;
 	}
 	switch (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
 		case TF2_ECN_PERMIT:
 			ti->tcpi_options |= TCPI_OPT_ECN;
 			break;
 		case TF2_ACE_PERMIT:
 			/* FALLTHROUGH */
 		case TF2_ECN_PERMIT | TF2_ACE_PERMIT:
 			ti->tcpi_options |= TCPI_OPT_ACE;
 			break;
 		default:
 			break;
 	}
 	if (tp->t_flags & TF_FASTOPEN)
 		ti->tcpi_options |= TCPI_OPT_TFO;
 
 	ti->tcpi_rto = tp->t_rxtcur * tick;
 	ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
 	ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
 	ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
 
 	ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
 	ti->tcpi_snd_cwnd = tp->snd_cwnd;
 
 	/*
 	 * FreeBSD-specific extension fields for tcp_info.
 	 */
 	ti->tcpi_rcv_space = tp->rcv_wnd;
 	ti->tcpi_rcv_nxt = tp->rcv_nxt;
 	ti->tcpi_snd_wnd = tp->snd_wnd;
 	ti->tcpi_snd_bwnd = 0;		/* Unused, kept for compat. */
 	ti->tcpi_snd_nxt = tp->snd_nxt;
 	ti->tcpi_snd_mss = tp->t_maxseg;
 	ti->tcpi_rcv_mss = tp->t_maxseg;
 	ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
 	ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
 	ti->tcpi_snd_zerowin = tp->t_sndzerowin;
 	ti->tcpi_snd_una = tp->snd_una;
 	ti->tcpi_snd_max = tp->snd_max;
 	ti->tcpi_rcv_numsacks = tp->rcv_numsacks;
 	ti->tcpi_rcv_adv = tp->rcv_adv;
 	ti->tcpi_dupacks = tp->t_dupacks;
 	ti->tcpi_rttmin = tp->t_rttlow;
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE) {
 		ti->tcpi_options |= TCPI_OPT_TOE;
 		tcp_offload_tcp_info(tp, ti);
 	}
 #endif
 	/*
 	 * AccECN related counters.
 	 */
 	if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) ==
 	    (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
 		/*
 		 * Internal counter starts at 5 for AccECN
 		 * but 0 for RFC3168 ECN.
 		 */
 		ti->tcpi_delivered_ce = tp->t_scep - 5;
 	else
 		ti->tcpi_delivered_ce = tp->t_scep;
 	ti->tcpi_received_ce = tp->t_rcep;
 }
 
 /*
  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
  * socket option arguments.  When it re-acquires the lock after the copy, it
  * has to revalidate that the connection is still valid for the socket
  * option.
  */
 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do {			\
 	INP_WLOCK(inp);							\
 	if (inp->inp_flags & INP_DROPPED) {				\
 		INP_WUNLOCK(inp);					\
 		cleanup;						\
 		return (ECONNRESET);					\
 	}								\
 	tp = intotcpcb(inp);						\
 } while(0)
 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
 
 int
 tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *tp = intotcpcb(inp);
 	int error = 0;
 
 	MPASS(sopt->sopt_dir == SOPT_SET);
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("inp_flags == %x", inp->inp_flags));
 	KASSERT(so != NULL, ("inp_socket == NULL"));
 
 	if (sopt->sopt_level != IPPROTO_TCP) {
 		INP_WUNLOCK(inp);
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6PROTO)
 			error = ip6_ctloutput(so, sopt);
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET
 			error = ip_ctloutput(so, sopt);
 #endif
 		/*
 		 * When an IP-level socket option affects TCP, pass control
 		 * down to stack tfb_tcp_ctloutput, otherwise return what
 		 * IP level returned.
 		 */
 		switch (sopt->sopt_level) {
 #ifdef INET6
 		case IPPROTO_IPV6:
 			if ((inp->inp_vflag & INP_IPV6PROTO) == 0)
 				return (error);
 			switch (sopt->sopt_name) {
 			case IPV6_TCLASS:
 				/* Notify tcp stacks that care (e.g. RACK). */
 				break;
 			case IPV6_USE_MIN_MTU:
 				/* Update t_maxseg accordingly. */
 				break;
 			default:
 				return (error);
 			}
 			break;
 #endif
 #ifdef INET
 		case IPPROTO_IP:
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos &= ~IPTOS_ECN_MASK;
 				break;
 			case IP_TTL:
 				/* Notify tcp stacks that care (e.g. RACK). */
 				break;
 			default:
 				return (error);
 			}
 			break;
 #endif
 		default:
 			return (error);
 		}
 		INP_WLOCK_RECHECK(inp);
 	} else if (sopt->sopt_name == TCP_FUNCTION_BLK) {
 		/*
 		 * Protect the TCP option TCP_FUNCTION_BLK so
 		 * that a sub-function can *never* overwrite this.
 		 */
 		struct tcp_function_set fsn;
 		struct tcp_function_block *blk;
 		void *ptr = NULL;
 
 		INP_WUNLOCK(inp);
 		error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn);
 		if (error)
 			return (error);
 
 		INP_WLOCK_RECHECK(inp);
 
 		blk = find_and_ref_tcp_functions(&fsn);
 		if (blk == NULL) {
 			INP_WUNLOCK(inp);
 			return (ENOENT);
 		}
 		if (tp->t_fb == blk) {
 			/* You already have this */
 			refcount_release(&blk->tfb_refcnt);
 			INP_WUNLOCK(inp);
 			return (0);
 		}
 		if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
 			refcount_release(&blk->tfb_refcnt);
 			INP_WUNLOCK(inp);
 			return (ENOENT);
 		}
 		error = (*blk->tfb_tcp_handoff_ok)(tp);
 		if (error) {
 			refcount_release(&blk->tfb_refcnt);
 			INP_WUNLOCK(inp);
 			return (error);
 		}
 		/*
 		 * Ensure the new stack takes ownership with a
 		 * clean slate on peak rate threshold.
 		 */
 		if (tp->t_fb->tfb_tcp_timer_stop_all != NULL)
 			tp->t_fb->tfb_tcp_timer_stop_all(tp);
 		if (blk->tfb_tcp_fb_init) {
 			error = (*blk->tfb_tcp_fb_init)(tp, &ptr);
 			if (error) {
 				/*
 				 * Release the ref count the lookup
 				 * acquired.
 				 */ 
 				refcount_release(&blk->tfb_refcnt);
 				/* 
 				 * Now there is a chance that the
 				 * init() function mucked with some
 				 * things before it failed, such as
 				 * hpts or inp_flags2 or timer granularity.
 				 * It should not of, but lets give the old
 				 * stack a chance to reset to a known good state.
 				 */
 				if (tp->t_fb->tfb_switch_failed) {
 					(*tp->t_fb->tfb_switch_failed)(tp);
 				}
 			 	goto err_out;
 			}
 		}
 		if (tp->t_fb->tfb_tcp_fb_fini) {
 			struct epoch_tracker et;
 			/*
 			 * Tell the stack to cleanup with 0 i.e.
 			 * the tcb is not going away.
 			 */
 			NET_EPOCH_ENTER(et);
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
 			NET_EPOCH_EXIT(et);
 		}
 		/*
 		 * Release the old refcnt, the
 		 * lookup acquired a ref on the
 		 * new one already.
 		 */
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		/* 
 		 * Set in the new stack.
 		 */
 		tp->t_fb = blk;
 		tp->t_fb_ptr = ptr;
 #ifdef TCP_OFFLOAD
 		if (tp->t_flags & TF_TOE) {
 			tcp_offload_ctloutput(tp, sopt->sopt_dir,
 			     sopt->sopt_name);
 		}
 #endif
 err_out:
 		INP_WUNLOCK(inp);
 		return (error);
 
 	}
 
 	/* Pass in the INP locked, callee must unlock it. */
 	return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
 }
 
 static int
 tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *tp = intotcpcb(inp);
 	int error = 0;
 
 	MPASS(sopt->sopt_dir == SOPT_GET);
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("inp_flags == %x", inp->inp_flags));
 	KASSERT(so != NULL, ("inp_socket == NULL"));
 
 	if (sopt->sopt_level != IPPROTO_TCP) {
 		INP_WUNLOCK(inp);
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6PROTO)
 			error = ip6_ctloutput(so, sopt);
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET
 			error = ip_ctloutput(so, sopt);
 #endif
 		return (error);
 	}
 	if (((sopt->sopt_name == TCP_FUNCTION_BLK) ||
 	     (sopt->sopt_name == TCP_FUNCTION_ALIAS))) {
 		struct tcp_function_set fsn;
 
 		if (sopt->sopt_name == TCP_FUNCTION_ALIAS) {
 			memset(&fsn, 0, sizeof(fsn));
 			find_tcp_function_alias(tp->t_fb, &fsn);
 		} else {
 			strncpy(fsn.function_set_name,
 			    tp->t_fb->tfb_tcp_block_name,
 			    TCP_FUNCTION_NAME_LEN_MAX);
 			fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
 		}
 		fsn.pcbcnt = tp->t_fb->tfb_refcnt;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &fsn, sizeof fsn);
 		return (error);
 	}
 
 	/* Pass in the INP locked, callee must unlock it. */
 	return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
 }
 
 int
 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	if (sopt->sopt_dir == SOPT_SET)
 		return (tcp_ctloutput_set(inp, sopt));
 	else if (sopt->sopt_dir == SOPT_GET)
 		return (tcp_ctloutput_get(inp, sopt));
 	else
 		panic("%s: sopt_dir $%d", __func__, sopt->sopt_dir);
 }
 
 /*
  * If this assert becomes untrue, we need to change the size of the buf
  * variable in tcp_default_ctloutput().
  */
 #ifdef CTASSERT
 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
 #endif
 
 extern struct cc_algo newreno_cc_algo;
 
 static int
 tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct cc_algo *algo;
 	void *ptr = NULL;
 	struct tcpcb *tp;
 	struct cc_var cc_mem;
 	char	buf[TCP_CA_NAME_MAX];
 	size_t mem_sz;
 	int error;
 
 	INP_WUNLOCK(inp);
 	error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
 	if (error)
 		return(error);
 	buf[sopt->sopt_valsize] = '\0';
 	CC_LIST_RLOCK();
 	STAILQ_FOREACH(algo, &cc_list, entries) {
 		if (strncmp(buf, algo->name,
 			    TCP_CA_NAME_MAX) == 0) {
 			if (algo->flags & CC_MODULE_BEING_REMOVED) {
 				/* We can't "see" modules being unloaded */
 				continue;
 			}
 			break;
 		}
 	}
 	if (algo == NULL) {
 		CC_LIST_RUNLOCK();
 		return(ESRCH);
 	}
 	/* 
 	 * With a reference the algorithm cannot be removed
 	 * so we hold a reference through the change process.
 	 */
 	cc_refer(algo);
 	CC_LIST_RUNLOCK();
 	if (algo->cb_init != NULL) {
 		/* We can now pre-get the memory for the CC */
 		mem_sz = (*algo->cc_data_sz)();
 		if (mem_sz == 0) {
 			goto no_mem_needed;
 		}
 		ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
 	} else {
 no_mem_needed:
 		mem_sz = 0;
 		ptr = NULL;
 	}
 	/*
 	 * Make sure its all clean and zero and also get
 	 * back the inplock.
 	 */
 	memset(&cc_mem, 0, sizeof(cc_mem));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_DROPPED) {
 		INP_WUNLOCK(inp);
 		if (ptr)
 			free(ptr, M_CC_MEM);
 		/* Release our temp reference */
 		CC_LIST_RLOCK();
 		cc_release(algo);
 		CC_LIST_RUNLOCK();
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 	if (ptr != NULL)
 		memset(ptr, 0, mem_sz);
 	cc_mem.tp = tp;
 	/*
 	 * We once again hold a write lock over the tcb so it's
 	 * safe to do these things without ordering concerns.
 	 * Note here we init into stack memory.
 	 */
 	if (algo->cb_init != NULL)
 		error = algo->cb_init(&cc_mem, ptr);
 	else
 		error = 0;
 	/*
 	 * The CC algorithms, when given their memory
 	 * should not fail we could in theory have a
 	 * KASSERT here.
 	 */
 	if (error == 0) {
 		/*
 		 * Touchdown, lets go ahead and move the
 		 * connection to the new CC module by
 		 * copying in the cc_mem after we call
 		 * the old ones cleanup (if any).
 		 */
 		if (CC_ALGO(tp)->cb_destroy != NULL)
 			CC_ALGO(tp)->cb_destroy(&tp->t_ccv);
 		/* Detach the old CC from the tcpcb  */
 		cc_detach(tp);
 		/* Copy in our temp memory that was inited */
 		memcpy(&tp->t_ccv, &cc_mem, sizeof(struct cc_var));
 		/* Now attach the new, which takes a reference */
 		cc_attach(tp, algo);
 		/* Ok now are we where we have gotten past any conn_init? */
 		if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
 			/* Yep run the connection init for the new CC */
 			CC_ALGO(tp)->conn_init(&tp->t_ccv);
 		}
 	} else if (ptr)
 		free(ptr, M_CC_MEM);
 	INP_WUNLOCK(inp);
 	/* Now lets release our temp reference */
 	CC_LIST_RLOCK();
 	cc_release(algo);
 	CC_LIST_RUNLOCK();
 	return (error);
 }
 
 int
 tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	int	error, opt, optval;
 	u_int	ui;
 	struct	tcp_info ti;
 #ifdef KERN_TLS
 	struct tls_enable tls;
 	struct socket *so = inp->inp_socket;
 #endif
 	char	*pbuf, buf[TCP_LOG_ID_LEN];
 #ifdef STATS
 	struct statsblob *sbp;
 #endif
 	size_t	len;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("inp_flags == %x", inp->inp_flags));
 	KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL"));
 
 	switch (sopt->sopt_level) {
 #ifdef INET6
 	case IPPROTO_IPV6:
 		MPASS(inp->inp_vflag & INP_IPV6PROTO);
 		switch (sopt->sopt_name) {
 		case IPV6_USE_MIN_MTU:
 			tcp6_use_min_mtu(tp);
 			/* FALLTHROUGH */
 		}
 		INP_WUNLOCK(inp);
 		return (0);
 #endif
 #ifdef INET
 	case IPPROTO_IP:
 		INP_WUNLOCK(inp);
 		return (0);
 #endif
 	}
 
 	/*
 	 * For TCP_CCALGOOPT forward the control to CC module, for both
 	 * SOPT_SET and SOPT_GET.
 	 */
 	switch (sopt->sopt_name) {
 	case TCP_CCALGOOPT:
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize > CC_ALGOOPT_LIMIT)
 			return (EINVAL);
 		pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
 		error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
 		    sopt->sopt_valsize);
 		if (error) {
 			free(pbuf, M_TEMP);
 			return (error);
 		}
 		INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
 		if (CC_ALGO(tp)->ctl_output != NULL)
 			error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, sopt, pbuf);
 		else
 			error = ENOENT;
 		INP_WUNLOCK(inp);
 		if (error == 0 && sopt->sopt_dir == SOPT_GET)
 			error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
 		free(pbuf, M_TEMP);
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		case TCP_MD5SIG:
 			INP_WUNLOCK(inp);
 			if (!TCPMD5_ENABLED())
 				return (ENOPROTOOPT);
 			error = TCPMD5_PCBCTL(inp, sopt);
 			if (error)
 				return (error);
 			INP_WLOCK_RECHECK(inp);
 			goto unlock_and_done;
 #endif /* IPSEC */
 
 		case TCP_NODELAY:
 		case TCP_NOOPT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			switch (sopt->sopt_name) {
 			case TCP_NODELAY:
 				opt = TF_NODELAY;
 				break;
 			case TCP_NOOPT:
 				opt = TF_NOOPT;
 				break;
 			default:
 				opt = 0; /* dead code to fool gcc */
 				break;
 			}
 
 			if (optval)
 				tp->t_flags |= opt;
 			else
 				tp->t_flags &= ~opt;
 unlock_and_done:
 #ifdef TCP_OFFLOAD
 			if (tp->t_flags & TF_TOE) {
 				tcp_offload_ctloutput(tp, sopt->sopt_dir,
 				    sopt->sopt_name);
 			}
 #endif
 			INP_WUNLOCK(inp);
 			break;
 
 		case TCP_NOPUSH:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval)
 				tp->t_flags |= TF_NOPUSH;
 			else if (tp->t_flags & TF_NOPUSH) {
 				tp->t_flags &= ~TF_NOPUSH;
 				if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 					struct epoch_tracker et;
 
 					NET_EPOCH_ENTER(et);
 					error = tcp_output_nodrop(tp);
 					NET_EPOCH_EXIT(et);
 				}
 			}
 			goto unlock_and_done;
 
 		case TCP_REMOTE_UDP_ENCAPS_PORT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 			if ((optval < TCP_TUNNELING_PORT_MIN) ||
 			    (optval > TCP_TUNNELING_PORT_MAX)) {
 				/* Its got to be in range */
 				return (EINVAL);
 			}
 			if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) {
 				/* You have to have enabled a UDP tunneling port first */
 				return (EINVAL);
 			}
 			INP_WLOCK_RECHECK(inp);
 			if (tp->t_state != TCPS_CLOSED) {
 				/* You can't change after you are connected */
 				error = EINVAL;
 			} else {
 				/* Ok we are all good set the port */
 				tp->t_port = htons(optval);
 			}
 			goto unlock_and_done;
 
 		case TCP_MAXSEG:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval > 0 && optval <= tp->t_maxseg &&
 			    optval + 40 >= V_tcp_minmss) {
 				tp->t_maxseg = optval;
 				if (tp->t_maxseg < V_tcp_mssdflt) {
 					/*
 					 * The MSS is so small we should not process incoming
 					 * SACK's since we are subject to attack in such a
 					 * case.
 					 */
 					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
 				} else {
 					tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
 				}
 			} else
 				error = EINVAL;
 			goto unlock_and_done;
 
 		case TCP_INFO:
 			INP_WUNLOCK(inp);
 			error = EINVAL;
 			break;
 
 		case TCP_STATS:
 			INP_WUNLOCK(inp);
 #ifdef STATS
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			if (optval > 0)
 				sbp = stats_blob_alloc(
 				    V_tcp_perconn_stats_dflt_tpl, 0);
 			else
 				sbp = NULL;
 
 			INP_WLOCK_RECHECK(inp);
 			if ((tp->t_stats != NULL && sbp == NULL) ||
 			    (tp->t_stats == NULL && sbp != NULL)) {
 				struct statsblob *t = tp->t_stats;
 				tp->t_stats = sbp;
 				sbp = t;
 			}
 			INP_WUNLOCK(inp);
 
 			stats_blob_destroy(sbp);
 #else
 			return (EOPNOTSUPP);
 #endif /* !STATS */
 			break;
 
 		case TCP_CONGESTION:
 			error = tcp_set_cc_mod(inp, sopt);
 			break;
 
 		case TCP_REUSPORT_LB_NUMA:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 			    sizeof(optval));
 			INP_WLOCK_RECHECK(inp);
 			if (!error)
 				error = in_pcblbgroup_numa(inp, optval);
 			INP_WUNLOCK(inp);
 			break;
 
 #ifdef KERN_TLS
 		case TCP_TXTLS_ENABLE:
 			INP_WUNLOCK(inp);
 			error = ktls_copyin_tls_enable(sopt, &tls);
 			if (error != 0)
 				break;
 			error = ktls_enable_tx(so, &tls);
 			ktls_cleanup_tls_enable(&tls);
 			break;
 		case TCP_TXTLS_MODE:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 			if (error != 0)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			error = ktls_set_tx_mode(so, ui);
 			INP_WUNLOCK(inp);
 			break;
 		case TCP_RXTLS_ENABLE:
 			INP_WUNLOCK(inp);
 			error = ktls_copyin_tls_enable(sopt, &tls);
 			if (error != 0)
 				break;
 			error = ktls_enable_rx(so, &tls);
 			ktls_cleanup_tls_enable(&tls);
 			break;
 #endif
 		case TCP_MAXUNACKTIME:
 		case TCP_KEEPIDLE:
 		case TCP_KEEPINTVL:
 		case TCP_KEEPINIT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 			if (error)
 				return (error);
 
 			if (ui > (UINT_MAX / hz)) {
 				error = EINVAL;
 				break;
 			}
 			ui *= hz;
 
 			INP_WLOCK_RECHECK(inp);
 			switch (sopt->sopt_name) {
 			case TCP_MAXUNACKTIME:
 				tp->t_maxunacktime = ui;
 				break;
 
 			case TCP_KEEPIDLE:
 				tp->t_keepidle = ui;
 				/*
 				 * XXX: better check current remaining
 				 * timeout and "merge" it with new value.
 				 */
 				if ((tp->t_state > TCPS_LISTEN) &&
 				    (tp->t_state <= TCPS_CLOSING))
 					tcp_timer_activate(tp, TT_KEEP,
 					    TP_KEEPIDLE(tp));
 				break;
 			case TCP_KEEPINTVL:
 				tp->t_keepintvl = ui;
 				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 				    (TP_MAXIDLE(tp) > 0))
 					tcp_timer_activate(tp, TT_2MSL,
 					    TP_MAXIDLE(tp));
 				break;
 			case TCP_KEEPINIT:
 				tp->t_keepinit = ui;
 				if (tp->t_state == TCPS_SYN_RECEIVED ||
 				    tp->t_state == TCPS_SYN_SENT)
 					tcp_timer_activate(tp, TT_KEEP,
 					    TP_KEEPINIT(tp));
 				break;
 			}
 			goto unlock_and_done;
 
 		case TCP_KEEPCNT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			tp->t_keepcnt = ui;
 			if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 			    (TP_MAXIDLE(tp) > 0))
 				tcp_timer_activate(tp, TT_2MSL,
 				    TP_MAXIDLE(tp));
 			goto unlock_and_done;
 
 		case TCP_FASTOPEN: {
 			struct tcp_fastopen tfo_optval;
 
 			INP_WUNLOCK(inp);
 			if (!V_tcp_fastopen_client_enable &&
 			    !V_tcp_fastopen_server_enable)
 				return (EPERM);
 
 			error = sooptcopyin(sopt, &tfo_optval,
 				    sizeof(tfo_optval), sizeof(int));
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if ((tp->t_state != TCPS_CLOSED) &&
 			    (tp->t_state != TCPS_LISTEN)) {
 				error = EINVAL;
 				goto unlock_and_done;
 			}
 			if (tfo_optval.enable) {
 				if (tp->t_state == TCPS_LISTEN) {
 					if (!V_tcp_fastopen_server_enable) {
 						error = EPERM;
 						goto unlock_and_done;
 					}
 
 					if (tp->t_tfo_pending == NULL)
 						tp->t_tfo_pending =
 						    tcp_fastopen_alloc_counter();
 				} else {
 					/*
 					 * If a pre-shared key was provided,
 					 * stash it in the client cookie
 					 * field of the tcpcb for use during
 					 * connect.
 					 */
 					if (sopt->sopt_valsize ==
 					    sizeof(tfo_optval)) {
 						memcpy(tp->t_tfo_cookie.client,
 						       tfo_optval.psk,
 						       TCP_FASTOPEN_PSK_LEN);
 						tp->t_tfo_client_cookie_len =
 						    TCP_FASTOPEN_PSK_LEN;
 					}
 				}
 				tp->t_flags |= TF_FASTOPEN;
 			} else
 				tp->t_flags &= ~TF_FASTOPEN;
 			goto unlock_and_done;
 		}
 
 #ifdef TCP_BLACKBOX
 		case TCP_LOG:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			error = tcp_log_state_change(tp, optval);
 			goto unlock_and_done;
 
 		case TCP_LOGBUF:
 			INP_WUNLOCK(inp);
 			error = EINVAL;
 			break;
 
 		case TCP_LOGID:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
 			if (error)
 				break;
 			buf[sopt->sopt_valsize] = '\0';
 			INP_WLOCK_RECHECK(inp);
 			error = tcp_log_set_id(tp, buf);
 			/* tcp_log_set_id() unlocks the INP. */
 			break;
 
 		case TCP_LOGDUMP:
 		case TCP_LOGDUMPID:
 			INP_WUNLOCK(inp);
 			error =
 			    sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
 			if (error)
 				break;
 			buf[sopt->sopt_valsize] = '\0';
 			INP_WLOCK_RECHECK(inp);
 			if (sopt->sopt_name == TCP_LOGDUMP) {
 				error = tcp_log_dump_tp_logbuf(tp, buf,
 				    M_WAITOK, true);
 				INP_WUNLOCK(inp);
 			} else {
 				tcp_log_dump_tp_bucket_logbufs(tp, buf);
 				/*
 				 * tcp_log_dump_tp_bucket_logbufs() drops the
 				 * INP lock.
 				 */
 			}
 			break;
 #endif
 
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		tp = intotcpcb(inp);
 		switch (sopt->sopt_name) {
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		case TCP_MD5SIG:
 			INP_WUNLOCK(inp);
 			if (!TCPMD5_ENABLED())
 				return (ENOPROTOOPT);
 			error = TCPMD5_PCBCTL(inp, sopt);
 			break;
 #endif
 
 		case TCP_NODELAY:
 			optval = tp->t_flags & TF_NODELAY;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_MAXSEG:
 			optval = tp->t_maxseg;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_REMOTE_UDP_ENCAPS_PORT:
 			optval = ntohs(tp->t_port);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_NOOPT:
 			optval = tp->t_flags & TF_NOOPT;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_NOPUSH:
 			optval = tp->t_flags & TF_NOPUSH;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_INFO:
 			tcp_fill_info(tp, &ti);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ti, sizeof ti);
 			break;
 		case TCP_STATS:
 			{
 #ifdef STATS
 			int nheld;
 			TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
 
 			error = 0;
 			socklen_t outsbsz = sopt->sopt_valsize;
 			if (tp->t_stats == NULL)
 				error = ENOENT;
 			else if (outsbsz >= tp->t_stats->cursz)
 				outsbsz = tp->t_stats->cursz;
 			else if (outsbsz >= sizeof(struct statsblob))
 				outsbsz = sizeof(struct statsblob);
 			else
 				error = EINVAL;
 			INP_WUNLOCK(inp);
 			if (error)
 				break;
 
 			sbp = sopt->sopt_val;
 			nheld = atop(round_page(((vm_offset_t)sbp) +
 			    (vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp));
 			vm_page_t ma[nheld];
 			if (vm_fault_quick_hold_pages(
 			    &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
 			    outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
 			    nheld) < 0) {
 				error = EFAULT;
 				break;
 			}
 
 			if ((error = copyin_nofault(&(sbp->flags), &sbflags,
 			    SIZEOF_MEMBER(struct statsblob, flags))))
 				goto unhold;
 
 			INP_WLOCK_RECHECK(inp);
 			error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
 			    sbflags | SB_CLONE_USRDSTNOFAULT);
 			INP_WUNLOCK(inp);
 			sopt->sopt_valsize = outsbsz;
 unhold:
 			vm_page_unhold_pages(ma, nheld);
 #else
 			INP_WUNLOCK(inp);
 			error = EOPNOTSUPP;
 #endif /* !STATS */
 			break;
 			}
 		case TCP_CONGESTION:
 			len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, buf, len + 1);
 			break;
 		case TCP_MAXUNACKTIME:
 		case TCP_KEEPIDLE:
 		case TCP_KEEPINTVL:
 		case TCP_KEEPINIT:
 		case TCP_KEEPCNT:
 			switch (sopt->sopt_name) {
 			case TCP_MAXUNACKTIME:
 				ui = TP_MAXUNACKTIME(tp) / hz;
 				break;
 			case TCP_KEEPIDLE:
 				ui = TP_KEEPIDLE(tp) / hz;
 				break;
 			case TCP_KEEPINTVL:
 				ui = TP_KEEPINTVL(tp) / hz;
 				break;
 			case TCP_KEEPINIT:
 				ui = TP_KEEPINIT(tp) / hz;
 				break;
 			case TCP_KEEPCNT:
 				ui = TP_KEEPCNT(tp);
 				break;
 			}
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ui, sizeof(ui));
 			break;
 		case TCP_FASTOPEN:
 			optval = tp->t_flags & TF_FASTOPEN;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #ifdef TCP_BLACKBOX
 		case TCP_LOG:
 			optval = tcp_get_bblog_state(tp);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		case TCP_LOGBUF:
 			/* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
 			error = tcp_log_getlogbuf(sopt, tp);
 			break;
 		case TCP_LOGID:
 			len = tcp_log_get_id(tp, buf);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, buf, len + 1);
 			break;
 		case TCP_LOGDUMP:
 		case TCP_LOGDUMPID:
 			INP_WUNLOCK(inp);
 			error = EINVAL;
 			break;
 #endif
 #ifdef KERN_TLS
 		case TCP_TXTLS_MODE:
 			error = ktls_get_tx_mode(so, &optval);
 			INP_WUNLOCK(inp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &optval,
 				    sizeof(optval));
 			break;
 		case TCP_RXTLS_MODE:
 			error = ktls_get_rx_mode(so, &optval);
 			INP_WUNLOCK(inp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &optval,
 				    sizeof(optval));
 			break;
 #endif
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 #undef INP_WLOCK_RECHECK
 #undef INP_WLOCK_RECHECK_CLEANUP
 
 /*
  * Initiate (or continue) disconnect.
  * If embryonic state, just send reset (once).
  * If in ``let data drain'' option and linger null, just drop.
  * Otherwise (hard), mark socket disconnecting and drop
  * current input data; switch states based on user close, and
  * send segment to peer (with FIN).
  */
 static void
 tcp_disconnect(struct tcpcb *tp)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct socket *so = tptosocket(tp);
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Neither tcp_close() nor tcp_drop() should return NULL, as the
 	 * socket is still open.
 	 */
 	if (tp->t_state < TCPS_ESTABLISHED &&
 	    !(tp->t_state > TCPS_LISTEN && (tp->t_flags & TF_FASTOPEN))) {
 		tp = tcp_close(tp);
 		KASSERT(tp != NULL,
 		    ("tcp_disconnect: tcp_close() returned NULL"));
 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 		tp = tcp_drop(tp, 0);
 		KASSERT(tp != NULL,
 		    ("tcp_disconnect: tcp_drop() returned NULL"));
 	} else {
 		soisdisconnecting(so);
 		sbflush(&so->so_rcv);
 		tcp_usrclosed(tp);
 		if (!(inp->inp_flags & INP_DROPPED))
 			/* Ignore stack's drop request, we already at it. */
 			(void)tcp_output_nodrop(tp);
 	}
 }
 
 /*
  * User issued close, and wish to trail through shutdown states:
  * if never received SYN, just forget it.  If got a SYN from peer,
  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
  * If already got a FIN from peer, then almost done; go to LAST_ACK
  * state.  In all other cases, have already sent FIN to peer (e.g.
  * after PRU_SHUTDOWN), and just have to play tedious game waiting
  * for peer to send FIN or not respond to keep-alives, etc.
  * We can let the user exit from the close as soon as the FIN is acked.
  */
 static void
 tcp_usrclosed(struct tcpcb *tp)
 {
 
 	NET_EPOCH_ASSERT();
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
 	switch (tp->t_state) {
 	case TCPS_LISTEN:
 #ifdef TCP_OFFLOAD
 		tcp_offload_listen_stop(tp);
 #endif
 		tcp_state_change(tp, TCPS_CLOSED);
 		/* FALLTHROUGH */
 	case TCPS_CLOSED:
 		tp = tcp_close(tp);
 		/*
 		 * tcp_close() should never return NULL here as the socket is
 		 * still open.
 		 */
 		KASSERT(tp != NULL,
 		    ("tcp_usrclosed: tcp_close() returned NULL"));
 		break;
 
 	case TCPS_SYN_SENT:
 	case TCPS_SYN_RECEIVED:
 		tp->t_flags |= TF_NEEDFIN;
 		break;
 
 	case TCPS_ESTABLISHED:
 		tcp_state_change(tp, TCPS_FIN_WAIT_1);
 		break;
 
 	case TCPS_CLOSE_WAIT:
 		tcp_state_change(tp, TCPS_LAST_ACK);
 		break;
 	}
 	if (tp->t_acktime == 0)
 		tp->t_acktime = ticks;
 	if (tp->t_state >= TCPS_FIN_WAIT_2) {
 		tcp_free_sackholes(tp);
 		soisdisconnected(tptosocket(tp));
 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
 		if (tp->t_state == TCPS_FIN_WAIT_2) {
 			int timeout;
 
 			timeout = (tcp_fast_finwait2_recycle) ?
 			    tcp_finwait2_timeout : TP_MAXIDLE(tp);
 			tcp_timer_activate(tp, TT_2MSL, timeout);
 		}
 	}
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_tstate(int t_state)
 {
 
 	switch (t_state) {
 	case TCPS_CLOSED:
 		db_printf("TCPS_CLOSED");
 		return;
 
 	case TCPS_LISTEN:
 		db_printf("TCPS_LISTEN");
 		return;
 
 	case TCPS_SYN_SENT:
 		db_printf("TCPS_SYN_SENT");
 		return;
 
 	case TCPS_SYN_RECEIVED:
 		db_printf("TCPS_SYN_RECEIVED");
 		return;
 
 	case TCPS_ESTABLISHED:
 		db_printf("TCPS_ESTABLISHED");
 		return;
 
 	case TCPS_CLOSE_WAIT:
 		db_printf("TCPS_CLOSE_WAIT");
 		return;
 
 	case TCPS_FIN_WAIT_1:
 		db_printf("TCPS_FIN_WAIT_1");
 		return;
 
 	case TCPS_CLOSING:
 		db_printf("TCPS_CLOSING");
 		return;
 
 	case TCPS_LAST_ACK:
 		db_printf("TCPS_LAST_ACK");
 		return;
 
 	case TCPS_FIN_WAIT_2:
 		db_printf("TCPS_FIN_WAIT_2");
 		return;
 
 	case TCPS_TIME_WAIT:
 		db_printf("TCPS_TIME_WAIT");
 		return;
 
 	default:
 		db_printf("unknown");
 		return;
 	}
 }
 
 static void
 db_print_tflags(u_int t_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (t_flags & TF_ACKNOW) {
 		db_printf("%sTF_ACKNOW", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_DELACK) {
 		db_printf("%sTF_DELACK", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NODELAY) {
 		db_printf("%sTF_NODELAY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NOOPT) {
 		db_printf("%sTF_NOOPT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SENTFIN) {
 		db_printf("%sTF_SENTFIN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_REQ_SCALE) {
 		db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RCVD_SCALE) {
 		db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_REQ_TSTMP) {
 		db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RCVD_TSTMP) {
 		db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SACK_PERMIT) {
 		db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NEEDSYN) {
 		db_printf("%sTF_NEEDSYN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NEEDFIN) {
 		db_printf("%sTF_NEEDFIN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NOPUSH) {
 		db_printf("%sTF_NOPUSH", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_PREVVALID) {
 		db_printf("%sTF_PREVVALID", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_WAKESOR) {
 		db_printf("%sTF_WAKESOR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_GPUTINPROG) {
 		db_printf("%sTF_GPUTINPROG", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_MORETOCOME) {
 		db_printf("%sTF_MORETOCOME", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SONOTCONN) {
 		db_printf("%sTF_SONOTCONN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_LASTIDLE) {
 		db_printf("%sTF_LASTIDLE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RXWIN0SENT) {
 		db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FASTRECOVERY) {
 		db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_WASFRECOVERY) {
 		db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SIGNATURE) {
 		db_printf("%sTF_SIGNATURE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FORCEDATA) {
 		db_printf("%sTF_FORCEDATA", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_TSO) {
 		db_printf("%sTF_TSO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_TOE) {
 		db_printf("%sTF_TOE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_CLOSED) {
 		db_printf("%sTF_CLOSED", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SENTSYN) {
 		db_printf("%sTF_SENTSYN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_LRD) {
 		db_printf("%sTF_LRD", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_CONGRECOVERY) {
 		db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_WASCRECOVERY) {
 		db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FASTOPEN) {
 		db_printf("%sTF_FASTOPEN", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_tflags2(u_int t_flags2)
 {
 	int comma;
 
 	comma = 0;
 	if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
 		db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_PLPMTU_PMTUD) {
 		db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
 		db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_LOG_AUTO) {
 		db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_DROP_AF_DATA) {
 		db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ECN_PERMIT) {
 		db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ECN_SND_CWR) {
 		db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ECN_SND_ECE) {
 		db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ACE_PERMIT) {
 		db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_HPTS_CPU_SET) {
 		db_printf("%sTF2_HPTS_CPU_SET", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_FBYTES_COMPLETE) {
 		db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_ECN_USE_ECT1) {
 		db_printf("%sTF2_ECN_USE_ECT1", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_TCP_ACCOUNTING) {
 		db_printf("%sTF2_TCP_ACCOUNTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_HPTS_CALLS) {
 		db_printf("%sTF2_HPTS_CALLS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_MBUF_L_ACKS) {
 		db_printf("%sTF2_MBUF_L_ACKS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_MBUF_ACKCMP) {
 		db_printf("%sTF2_MBUF_ACKCMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_SUPPORTS_MBUFQ) {
 		db_printf("%sTF2_SUPPORTS_MBUFQ", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_MBUF_QUEUE_READY) {
 		db_printf("%sTF2_MBUF_QUEUE_READY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_DONT_SACK_QUEUE) {
 		db_printf("%sTF2_DONT_SACK_QUEUE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_CANNOT_DO_ECN) {
 		db_printf("%sTF2_CANNOT_DO_ECN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_PROC_SACK_PROHIBIT) {
 		db_printf("%sTF2_PROC_SACK_PROHIBIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_IPSEC_TSO) {
 		db_printf("%sTF2_IPSEC_TSO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags2 & TF2_NO_ISS_CHECK) {
 		db_printf("%sTF2_NO_ISS_CHECK", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_toobflags(char t_oobflags)
 {
 	int comma;
 
 	comma = 0;
 	if (t_oobflags & TCPOOB_HAVEDATA) {
 		db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_oobflags & TCPOOB_HADDATA) {
 		db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_bblog_state(int state)
 {
 	switch (state) {
 	case TCP_LOG_STATE_RATIO_OFF:
 		db_printf("TCP_LOG_STATE_RATIO_OFF");
 		break;
 	case TCP_LOG_STATE_CLEAR:
 		db_printf("TCP_LOG_STATE_CLEAR");
 		break;
 	case TCP_LOG_STATE_OFF:
 		db_printf("TCP_LOG_STATE_OFF");
 		break;
 	case TCP_LOG_STATE_TAIL:
 		db_printf("TCP_LOG_STATE_TAIL");
 		break;
 	case TCP_LOG_STATE_HEAD:
 		db_printf("TCP_LOG_STATE_HEAD");
 		break;
 	case TCP_LOG_STATE_HEAD_AUTO:
 		db_printf("TCP_LOG_STATE_HEAD_AUTO");
 		break;
 	case TCP_LOG_STATE_CONTINUAL:
 		db_printf("TCP_LOG_STATE_CONTINUAL");
 		break;
 	case TCP_LOG_STATE_TAIL_AUTO:
 		db_printf("TCP_LOG_STATE_TAIL_AUTO");
 		break;
 	case TCP_LOG_VIA_BBPOINTS:
 		db_printf("TCP_LOG_STATE_BBPOINTS");
 		break;
 	default:
 		db_printf("UNKNOWN(%d)", state);
 		break;
 	}
 }
 
 static void
-db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
+db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, tp);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
 	   TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
 
 	db_print_indent(indent);
 	db_printf("t_callout: %p   t_timers: %p\n",
 	    &tp->t_callout, &tp->t_timers);
 
 	db_print_indent(indent);
 	db_printf("t_state: %d (", tp->t_state);
 	db_print_tstate(tp->t_state);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("t_flags: 0x%x (", tp->t_flags);
 	db_print_tflags(tp->t_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("t_flags2: 0x%x (", tp->t_flags2);
 	db_print_tflags2(tp->t_flags2);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: 0x%08x\n",
 	    tp->snd_una, tp->snd_max, tp->snd_nxt);
 
 	db_print_indent(indent);
 	db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
 	   tp->snd_up, tp->snd_wl1, tp->snd_wl2);
 
 	db_print_indent(indent);
 	db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
 	    tp->iss, tp->irs, tp->rcv_nxt);
 
 	db_print_indent(indent);
 	db_printf("rcv_adv: 0x%08x   rcv_wnd: %u   rcv_up: 0x%08x\n",
 	    tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
 
 	db_print_indent(indent);
 	db_printf("snd_wnd: %u   snd_cwnd: %u\n",
 	   tp->snd_wnd, tp->snd_cwnd);
 
 	db_print_indent(indent);
 	db_printf("snd_ssthresh: %u   snd_recover: "
 	    "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
 
 	db_print_indent(indent);
 	db_printf("t_rcvtime: %u   t_startime: %u\n",
 	    tp->t_rcvtime, tp->t_starttime);
 
 	db_print_indent(indent);
 	db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
 	    tp->t_rtttime, tp->t_rtseq);
 
 	db_print_indent(indent);
 	db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
 	    tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
 
 	db_print_indent(indent);
 	db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u\n",
 	    tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin);
 
 	db_print_indent(indent);
 	db_printf("t_rttupdated: %u   max_sndwnd: %u   t_softerror: %d\n",
 	    tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
 
 	db_print_indent(indent);
 	db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
 	db_print_toobflags(tp->t_oobflags);
 	db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
 
 	db_print_indent(indent);
 	db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
 	    tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
 
 	db_print_indent(indent);
 	db_printf("ts_recent: %u   ts_recent_age: %u\n",
 	    tp->ts_recent, tp->ts_recent_age);
 
 	db_print_indent(indent);
 	db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
 	    "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
 
 	db_print_indent(indent);
 	db_printf("snd_ssthresh_prev: %u   snd_recover_prev: 0x%08x   "
 	    "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
 	    tp->snd_recover_prev, tp->t_badrxtwin);
 
 	db_print_indent(indent);
 	db_printf("snd_numholes: %d  snd_holes first: %p\n",
 	    tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
 
 	db_print_indent(indent);
 	db_printf("snd_fack: 0x%08x   rcv_numsacks: %d\n",
 	    tp->snd_fack, tp->rcv_numsacks);
 
 	/* Skip sackblks, sackhint. */
 
 	db_print_indent(indent);
 	db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
 	    tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
 
 	db_print_indent(indent);
 	db_printf("t_fb.tfb_tcp_block_name: %s\n", tp->t_fb->tfb_tcp_block_name);
 
 	db_print_indent(indent);
 	db_printf("t_cc.name: %s\n", tp->t_cc->name);
 
 	db_print_indent(indent);
 	db_printf("_t_logstate: %d (", tp->_t_logstate);
 	db_print_bblog_state(tp->_t_logstate);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("t_lognum: %d   t_loglimit: %d   t_logsn: %u\n",
 	    tp->t_lognum, tp->t_loglimit, tp->t_logsn);
+
+	if (show_bblog) {
+		db_print_bblog_entries(&tp->t_logs, indent);
+	}
 }
 
 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
 {
 	struct tcpcb *tp;
+	bool show_bblog;
 
 	if (!have_addr) {
 		db_printf("usage: show tcpcb <addr>\n");
 		return;
 	}
+	show_bblog = strchr(modif, 'b') != NULL;
 	tp = (struct tcpcb *)addr;
 
-	db_print_tcpcb(tp, "tcpcb", 0);
+	db_print_tcpcb(tp, "tcpcb", 0, show_bblog);
 }
 
 DB_SHOW_ALL_COMMAND(tcpcbs, db_show_all_tcpcbs)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	struct inpcb *inp;
-	bool only_locked;
+	bool only_locked, show_bblog;
 
 	only_locked = strchr(modif, 'l') != NULL;
+	show_bblog = strchr(modif, 'b') != NULL;
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		CK_LIST_FOREACH(inp, &V_tcbinfo.ipi_listhead, inp_list) {
 			if (only_locked &&
 			    inp->inp_lock.rw_lock == RW_UNLOCKED)
 				continue;
-			db_print_tcpcb(intotcpcb(inp), "tcpcb", 0);
+			db_print_tcpcb(intotcpcb(inp), "tcpcb", 0, show_bblog);
 			if (db_pager_quit)
 				break;
 		}
 		CURVNET_RESTORE();
 		if (db_pager_quit)
 			break;
 	}
 }
 #endif