Index: head/share/man/man4/ddb.4
===================================================================
--- head/share/man/man4/ddb.4	(revision 352111)
+++ head/share/man/man4/ddb.4	(revision 352112)
@@ -1,1597 +1,1611 @@
 .\"
 .\" Mach Operating System
 .\" Copyright (c) 1991,1990 Carnegie Mellon University
 .\" Copyright (c) 2007 Robert N. M. Watson
 .\" All Rights Reserved.
 .\"
 .\" Permission to use, copy, modify and distribute this software and its
 .\" documentation is hereby granted, provided that both the copyright
 .\" notice and this permission notice appear in all copies of the
 .\" software, derivative works or modified versions, and any portions
 .\" thereof, and that both notices appear in supporting documentation.
 .\"
 .\" CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 .\" CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 .\" ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 .\"
 .\" Carnegie Mellon requests users of this software to return to
 .\"
 .\"  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 .\"  School of Computer Science
 .\"  Carnegie Mellon University
 .\"  Pittsburgh PA 15213-3890
 .\"
 .\" any improvements or extensions that they make and grant Carnegie Mellon
 .\" the rights to redistribute these changes.
 .\"
 .\" changed a \# to #, since groff choked on it.
 .\"
 .\" HISTORY
 .\" ddb.4,v
 .\" Revision 1.1  1993/07/15  18:41:02  brezak
 .\" Man page for DDB
 .\"
 .\" Revision 2.6  92/04/08  08:52:57  rpd
 .\" 	Changes from OSF.
 .\" 	[92/01/17  14:19:22  jsb]
 .\" 	Changes for OSF debugger modifications.
 .\" 	[91/12/12            tak]
 .\"
 .\" Revision 2.5  91/06/25  13:50:22  rpd
 .\" 	Added some watchpoint explanation.
 .\" 	[91/06/25            rpd]
 .\"
 .\" Revision 2.4  91/06/17  15:47:31  jsb
 .\" 	Added documentation for continue/c, match, search, and watchpoints.
 .\" 	I've not actually explained what a watchpoint is; maybe Rich can
 .\" 	do that (hint, hint).
 .\" 	[91/06/17  10:58:08  jsb]
 .\"
 .\" Revision 2.3  91/05/14  17:04:23  mrt
 .\" 	Correcting copyright
 .\"
 .\" Revision 2.2  91/02/14  14:10:06  mrt
 .\" 	Changed to new Mach copyright
 .\" 	[91/02/12  18:10:12  mrt]
 .\"
 .\" Revision 2.2  90/08/30  14:23:15  dbg
 .\" 	Created.
 .\" 	[90/08/30            dbg]
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 30, 2018
+.Dd September 9, 2019
 .Dt DDB 4
 .Os
 .Sh NAME
 .Nm ddb
 .Nd interactive kernel debugger
 .Sh SYNOPSIS
 In order to enable kernel debugging facilities include:
 .Bd -ragged -offset indent
 .Cd options KDB
 .Cd options DDB
 .Ed
 .Pp
 To prevent activation of the debugger on kernel
 .Xr panic 9 :
 .Bd -ragged -offset indent
 .Cd options KDB_UNATTENDED
 .Ed
 .Pp
 In order to print a stack trace of the current thread on the console
 for a panic:
 .Bd -ragged -offset indent
 .Cd options KDB_TRACE
 .Ed
 .Pp
 To print the numerical value of symbols in addition to the symbolic
 representation, define:
 .Bd -ragged -offset indent
 .Cd options DDB_NUMSYM
 .Ed
 .Pp
 To enable the
 .Xr gdb 1
 backend, so that remote debugging with
 .Xr kgdb 1
 is possible, include:
 .Bd -ragged -offset indent
 .Cd options GDB
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 kernel debugger is an interactive debugger with a syntax inspired by
 .Xr gdb 1 .
 If linked into the running kernel,
 it can be invoked locally with the
 .Ql debug
 .Xr keymap 5
 action, usually mapped to Ctrl+Alt+Esc, or by setting the
 .Va debug.kdb.enter
 sysctl to 1.
 The debugger is also invoked on kernel
 .Xr panic 9
 if the
 .Va debug.debugger_on_panic
 .Xr sysctl 8
 MIB variable is set non-zero,
 which is the default
 unless the
 .Dv KDB_UNATTENDED
 option is specified.
 .Pp
 The current location is called
 .Va dot .
 The
 .Va dot
 is displayed with
 a hexadecimal format at a prompt.
 The commands
 .Ic examine
 and
 .Ic write
 update
 .Va dot
 to the address of the last line
 examined or the last location modified, and set
 .Va next
 to the address of
 the next location to be examined or changed.
 Other commands do not change
 .Va dot ,
 and set
 .Va next
 to be the same as
 .Va dot .
 .Pp
 The general command syntax is:
 .Ar command Ns Op Li / Ns Ar modifier
 .Oo Ar addr Oc Ns Op , Ns Ar count
 .Pp
 A blank line repeats the previous command from the address
 .Va next
 with
 count 1 and no modifiers.
 Specifying
 .Ar addr
 sets
 .Va dot
 to the address.
 Omitting
 .Ar addr
 uses
 .Va dot .
 A missing
 .Ar count
 is taken
 to be 1 for printing commands or infinity for stack traces.
 A
 .Ar count
 of -1 is equivalent to a missing
 .Ar count .
 Options that are supplied but not supported by the given
 .Ar command
 are usually ignored.
 .Pp
 The
 .Nm
 debugger has a pager feature (like the
 .Xr more 1
 command)
 for the output.
 If an output line exceeds the number set in the
 .Va lines
 variable, it displays
 .Dq Li --More--
 and waits for a response.
 The valid responses for it are:
 .Pp
 .Bl -tag -compact -width ".Li SPC"
 .It Li SPC
 one more page
 .It Li RET
 one more line
 .It Li q
 abort the current command, and return to the command input mode
 .El
 .Pp
 Finally,
 .Nm
 provides a small (currently 10 items) command history, and offers
 simple
 .Nm emacs Ns -style
 command line editing capabilities.
 In addition to
 the
 .Nm emacs
 control keys, the usual
 .Tn ANSI
 arrow keys may be used to
 browse through the history buffer, and move the cursor within the
 current line.
 .Sh COMMANDS
 .Bl -tag -width indent -compact
 .It Xo
 .Ic examine Ns Op Li / Ns Cm AISabcdghilmorsuxz ...
 .Oo Ar addr Oc Ns Op , Ns Ar count
 .Xc
 .It Xo
 .Ic x       Ns Op Li / Ns Cm AISabcdghilmorsuxz ...
 .Oo Ar addr Oc Ns Op , Ns Ar count
 .Xc
 Display the addressed locations according to the formats in the modifier.
 Multiple modifier formats display multiple locations.
 If no format is specified, the last format specified for this command
 is used.
 .Pp
 The format characters are:
 .Bl -tag -compact -width indent
 .It Cm b
 look at by bytes (8 bits)
 .It Cm h
 look at by half words (16 bits)
 .It Cm l
 look at by long words (32 bits)
 .It Cm g
 look at by quad words (64 bits)
 .It Cm a
 print the location being displayed
 .It Cm A
 print the location with a line number if possible
 .It Cm x
 display in unsigned hex
 .It Cm z
 display in signed hex
 .It Cm o
 display in unsigned octal
 .It Cm d
 display in signed decimal
 .It Cm u
 display in unsigned decimal
 .It Cm r
 display in current radix, signed
 .It Cm c
 display low 8 bits as a character.
 Non-printing characters are displayed as an octal escape code (e.g.,
 .Ql \e000 ) .
 .It Cm s
 display the null-terminated string at the location.
 Non-printing characters are displayed as octal escapes.
 .It Cm m
 display in unsigned hex with character dump at the end of each line.
 The location is also displayed in hex at the beginning of each line.
 .It Cm i
 display as a disassembled instruction
 .It Cm I
 display as an disassembled instruction with possible alternate formats depending on the
 machine.
 On i386, this selects the alternate format for the instruction decoding
 (16 bits in a 32-bit code segment and vice versa).
 .It Cm S
 display a symbol name for the pointer stored at the address
 .El
 .Pp
 .It Ic xf
 Examine forward:
 execute an
 .Ic examine
 command with the last specified parameters to it
 except that the next address displayed by it is used as the start address.
 .Pp
 .It Ic xb
 Examine backward:
 execute an
 .Ic examine
 command with the last specified parameters to it
 except that the last start address subtracted by the size displayed by it
 is used as the start address.
 .Pp
 .It Ic print Ns Op Li / Ns Cm acdoruxz
 .It Ic p Ns Op Li / Ns Cm acdoruxz
 Print
 .Ar addr Ns s
 according to the modifier character (as described above for
 .Cm examine ) .
 Valid formats are:
 .Cm a , x , z , o , d , u , r ,
 and
 .Cm c .
 If no modifier is specified, the last one specified to it is used.
 The argument
 .Ar addr
 can be a string, in which case it is printed as it is.
 For example:
 .Bd -literal -offset indent
 print/x "eax = " $eax "\enecx = " $ecx "\en"
 .Ed
 .Pp
 will print like:
 .Bd -literal -offset indent
 eax = xxxxxx
 ecx = yyyyyy
 .Ed
 .Pp
 .It Xo
 .Ic write Ns Op Li / Ns Cm bhl
 .Ar addr expr1 Op Ar expr2 ...
 .Xc
 .It Xo
 .Ic w Ns Op Li / Ns Cm bhl
 .Ar addr expr1 Op Ar expr2 ...
 .Xc
 Write the expressions specified after
 .Ar addr
 on the command line at succeeding locations starting with
 .Ar addr .
 The write unit size can be specified in the modifier with a letter
 .Cm b
 (byte),
 .Cm h
 (half word) or
 .Cm l
 (long word) respectively.
 If omitted,
 long word is assumed.
 .Pp
 .Sy Warning :
 since there is no delimiter between expressions, strange
 things may happen.
 It is best to enclose each expression in parentheses.
 .Pp
 .It Ic set Li $ Ns Ar variable Oo Li = Oc Ar expr
 Set the named variable or register with the value of
 .Ar expr .
 Valid variable names are described below.
 .Pp
 .It Ic break Ns Oo Li / Ns Cm u Oc Oo Ar addr Oc Ns Op , Ns Ar count
 .It Ic b     Ns Oo Li / Ns Cm u Oc Oo Ar addr Oc Ns Op , Ns Ar count
 Set a break point at
 .Ar addr .
 If
 .Ar count
 is supplied, the
 .Ic continue
 command will not stop at this break point on the first
 .Ar count
 \- 1 times that it is hit.
 If the break point is set, a break point number is
 printed with
 .Ql # .
 This number can be used in deleting the break point
 or adding conditions to it.
 .Pp
 If the
 .Cm u
 modifier is specified, this command sets a break point in user
 address space.
 Without the
 .Cm u
 option, the address is considered to be in the kernel
 space, and a wrong space address is rejected with an error message.
 This modifier can be used only if it is supported by machine dependent
 routines.
 .Pp
 .Sy Warning :
 If a user text is shadowed by a normal user space debugger,
 user space break points may not work correctly.
 Setting a break
 point at the low-level code paths may also cause strange behavior.
 .Pp
 .It Ic delete Op Ar addr
 .It Ic d      Op Ar addr
 .It Ic delete Li # Ns Ar number
 .It Ic d      Li # Ns Ar number
 Delete the specified break point.
 The break point can be specified by a
 break point number with
 .Ql # ,
 or by using the same
 .Ar addr
 specified in the original
 .Ic break
 command, or by omitting
 .Ar addr
 to get the default address of
 .Va dot .
 .Pp
 .It Ic watch Oo Ar addr Oc Ns Op , Ns Ar size
 Set a watchpoint for a region.
 Execution stops when an attempt to modify the region occurs.
 The
 .Ar size
 argument defaults to 4.
 If you specify a wrong space address, the request is rejected
 with an error message.
 .Pp
 .Sy Warning :
 Attempts to watch wired kernel memory
 may cause unrecoverable error in some systems such as i386.
 Watchpoints on user addresses work best.
 .Pp
 .It Ic hwatch Oo Ar addr Oc Ns Op , Ns Ar size
 Set a hardware watchpoint for a region if supported by the
 architecture.
 Execution stops when an attempt to modify the region occurs.
 The
 .Ar size
 argument defaults to 4.
 .Pp
 .Sy Warning :
 The hardware debug facilities do not have a concept of separate
 address spaces like the watch command does.
 Use
 .Ic hwatch
 for setting watchpoints on kernel address locations only, and avoid
 its use on user mode address spaces.
 .Pp
 .It Ic dhwatch Oo Ar addr Oc Ns Op , Ns Ar size
 Delete specified hardware watchpoint.
 .Pp
 .It Ic step Ns Oo Li / Ns Cm p Oc Ns Op , Ns Ar count
 .It Ic s    Ns Oo Li / Ns Cm p Oc Ns Op , Ns Ar count
 Single step
 .Ar count
 times.
 If the
 .Cm p
 modifier is specified, print each instruction at each step.
 Otherwise, only print the last instruction.
 .Pp
 .Sy Warning :
 depending on machine type, it may not be possible to
 single-step through some low-level code paths or user space code.
 On machines with software-emulated single-stepping (e.g., pmax),
 stepping through code executed by interrupt handlers will probably
 do the wrong thing.
 .Pp
 .It Ic continue Ns Op Li / Ns Cm c
 .It Ic c Ns Op Li / Ns Cm c
 Continue execution until a breakpoint or watchpoint.
 If the
 .Cm c
 modifier is specified, count instructions while executing.
 Some machines (e.g., pmax) also count loads and stores.
 .Pp
 .Sy Warning :
 when counting, the debugger is really silently single-stepping.
 This means that single-stepping on low-level code may cause strange
 behavior.
 .Pp
 .It Ic until Ns Op Li / Ns Cm p
 Stop at the next call or return instruction.
 If the
 .Cm p
 modifier is specified, print the call nesting depth and the
 cumulative instruction count at each call or return.
 Otherwise,
 only print when the matching return is hit.
 .Pp
 .It Ic next Ns Op Li / Ns Cm p
 .It Ic match Ns Op Li / Ns Cm p
 Stop at the matching return instruction.
 If the
 .Cm p
 modifier is specified, print the call nesting depth and the
 cumulative instruction count at each call or return.
 Otherwise, only print when the matching return is hit.
 .Pp
 .It Xo
 .Ic trace Ns Op Li / Ns Cm u
 .Op Ar pid | tid Ns
 .Op , Ns Ar count
 .Xc
 .It Xo
 .Ic t Ns Op Li / Ns Cm u
 .Op Ar pid | tid Ns
 .Op , Ns Ar count
 .Xc
 .It Xo
 .Ic where Ns Op Li / Ns Cm u
 .Op Ar pid | tid Ns
 .Op , Ns Ar count
 .Xc
 .It Xo
 .Ic bt Ns Op Li / Ns Cm u
 .Op Ar pid | tid Ns
 .Op , Ns Ar count
 .Xc
 Stack trace.
 The
 .Cm u
 option traces user space; if omitted,
 .Ic trace
 only traces
 kernel space.
 The optional argument
 .Ar count
 is the number of frames to be traced.
 If
 .Ar count
 is omitted, all frames are printed.
 .Pp
 .Sy Warning :
 User space stack trace is valid
 only if the machine dependent code supports it.
 .Pp
 .It Xo
 .Ic search Ns Op Li / Ns Cm bhl
 .Ar addr
 .Ar value
 .Op Ar mask Ns
 .Op , Ns Ar count
 .Xc
 Search memory for
 .Ar value .
 The optional
 .Ar count
 argument limits the search.
 .\"
 .Pp
 .It Xo
 .Ic findstack
 .Ar addr
 .Xc
 Prints the thread address for a thread kernel-mode stack of which contains the
 specified address.
 If the thread is not found, search the thread stack cache and prints the
 cached stack address.
 Otherwise, prints nothing.
 .Pp
 .It Ic show Cm all procs Ns Op Li / Ns Cm a
 .It Ic ps Ns Op Li / Ns Cm a
 Display all process information.
 The process information may not be shown if it is not
 supported in the machine, or the bottom of the stack of the
 target process is not in the main memory at that time.
 The
 .Cm a
 modifier will print command line arguments for each process.
 .\"
 .Pp
 .It Ic show Cm all trace
 .It Ic alltrace
 Show a stack trace for every thread in the system.
 .Pp
 .It Ic show Cm all ttys
 Show all TTY's within the system.
 Output is similar to
 .Xr pstat 8 ,
 but also includes the address of the TTY structure.
 .\"
 .Pp
 .It Ic show Cm all vnets
 Show the same output as "show vnet" does, but lists all
 virtualized network stacks within the system.
 .\"
 .Pp
 .It Ic show Cm allchains
 Show the same information like "show lockchain" does, but
 for every thread in the system.
 .\"
 .Pp
 .It Ic show Cm alllocks
 Show all locks that are currently held.
 This command is only available if
 .Xr witness 4
 is included in the kernel.
 .\"
 .Pp
 .It Ic show Cm allpcpu
 The same as "show pcpu", but for every CPU present in the system.
 .\"
 .Pp
 .It Ic show Cm allrman
 Show information related with resource management, including
 interrupt request lines, DMA request lines, I/O ports, I/O memory
 addresses, and Resource IDs.
 .\"
 .Pp
 .It Ic show Cm apic
 Dump data about APIC IDT vector mappings.
 .\"
 .Pp
 .It Ic show Cm breaks
 Show breakpoints set with the "break" command.
 .\"
 .Pp
 .It Ic show Cm bio Ar addr
 Show information about the bio structure
 .Vt struct bio
 present at
 .Ar addr .
 See the
 .Pa sys/bio.h
 header file and
 .Xr g_bio 9
 for more details on the exact meaning of the structure fields.
 .\"
 .Pp
 .It Ic show Cm buffer Ar addr
 Show information about the buf structure
 .Vt struct buf
 present at
 .Ar addr .
 See the
 .Pa sys/buf.h
 header file for more details on the exact meaning of the structure fields.
 .\"
 .Pp
 .It Ic show Cm callout Ar addr
 Show information about the callout structure
 .Vt struct callout
 present at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm cbstat
 Show brief information about the TTY subsystem.
 .\"
 .Pp
 .It Ic show Cm cdev
 Without argument, show the list of all created cdev's, consisting of devfs
 node name and struct cdev address.
 When address of cdev is supplied, show some internal devfs state of the cdev.
 .\"
 .Pp
 .It Ic show Cm conifhk
 Lists hooks currently waiting for completion in
 run_interrupt_driven_config_hooks().
 .\"
 .Pp
 .It Ic show Cm cpusets
 Print numbered root and assigned CPU affinity sets.
 See
 .Xr cpuset 2
 for more details.
 .\"
 .Pp
 .It Ic show Cm cyrixreg
 Show registers specific to the Cyrix processor.
 .\"
 .Pp
 .It Ic show Cm devmap
 Prints the contents of the static device mapping table.
 Currently only available on the
 ARM
 architecture.
 .\"
 .Pp
 .It Ic show Cm domain Ar addr
 Print protocol domain structure
 .Vt struct domain
 at address
 .Ar addr .
 See the
 .Pa sys/domain.h
 header file for more details on the exact meaning of the structure fields.
 .\"
 .Pp
 .It Ic show Cm ffs Op Ar addr
 Show brief information about ffs mount at the address
 .Ar addr ,
 if argument is given.
 Otherwise, provides the summary about each ffs mount.
 .\"
 .Pp
 .It Ic show Cm file Ar addr
 Show information about the file structure
 .Vt struct file
 present at address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm files
 Show information about every file structure in the system.
 .\"
 .Pp
 .It Ic show Cm freepages
 Show the number of physical pages in each of the free lists.
 .\"
 .Pp
 .It Ic show Cm geom Op Ar addr
 If the
 .Ar addr
 argument is not given, displays the entire GEOM topology.
 If
 .Ar addr
 is given, displays details about the given GEOM object (class, geom,
 provider or consumer).
 .\"
 .Pp
 .It Ic show Cm idt
 Show IDT layout.
 The first column specifies the IDT vector.
 The second one is the name of the interrupt/trap handler.
 Those functions are machine dependent.
 .\"
 .Pp
 .It Ic show Cm igi_list Ar addr
 Show information about the IGMP structure
 .Vt struct igmp_ifsoftc
 present at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm inodedeps Op Ar addr
 Show brief information about each inodedep structure.
 If
 .Ar addr
 is given, only inodedeps belonging to the fs located at the
 supplied address are shown.
 .\"
 .Pp
 .It Ic show Cm inpcb Ar addr
 Show information on IP Control Block
 .Vt struct in_pcb
 present at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm intr
 Dump information about interrupt handlers.
 .\"
 .Pp
 .It Ic show Cm intrcnt
 Dump the interrupt statistics.
 .\"
 .Pp
 .It Ic show Cm irqs
 Show interrupt lines and their respective kernel threads.
 .\"
 .Pp
 .It Ic show Cm jails
 Show the list of
 .Xr jail 8
 instances.
 In addition to what
 .Xr jls 8
 shows, also list kernel internal details.
 .\"
 .Pp
 .It Ic show Cm lapic
 Show information from the local APIC registers for this CPU.
 .\"
 .Pp
 .It Ic show Cm lock Ar addr
 Show lock structure.
 The output format is as follows:
 .Bl -tag -width "flags"
 .It Ic class:
 Class of the lock.
 Possible types include
 .Xr mutex 9 ,
 .Xr rmlock 9 ,
 .Xr rwlock 9 ,
 .Xr sx 9 .
 .It Ic name:
 Name of the lock.
 .It Ic flags:
 Flags passed to the lock initialization function.
 .Em flags
 values are lock class specific.
 .It Ic state:
 Current state of a lock.
 .Em state
 values are lock class specific.
 .It Ic owner:
 Lock owner.
 .El
 .\"
 .Pp
 .It Ic show Cm lockchain Ar addr
 Show all threads a particular thread at address
 .Ar addr
 is waiting on based on non-spin locks.
 .\"
 .Pp
 .It Ic show Cm lockedbufs
 Show the same information as "show buf", but for every locked
 .Vt struct buf
 object.
 .\"
 .Pp
 .It Ic show Cm lockedvnods
 List all locked vnodes in the system.
 .\"
 .Pp
 .It Ic show Cm locks
 Prints all locks that are currently acquired.
 This command is only available if
 .Xr witness 4
 is included in the kernel.
 .\"
 .Pp
 .It Ic show Cm locktree
 .\"
 .Pp
 .It Ic show Cm malloc
 Prints
 .Xr malloc 9
 memory allocator statistics.
 The output format is as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "Requests"
 .It Ic Type
 Specifies a type of memory.
 It is the same as a description string used while defining the
 given memory type with
 .Xr MALLOC_DECLARE 9 .
 .It Ic InUse
 Number of memory allocations of the given type, for which
 .Xr free 9
 has not been called yet.
 .It Ic MemUse
 Total memory consumed by the given allocation type.
 .It Ic Requests
 Number of memory allocation requests for the given
 memory type.
 .El
 .Pp
 The same information can be gathered in userspace with
 .Dq Nm vmstat Fl m .
 .\"
 .Pp
 .It Ic show Cm map Ns Oo Li / Ns Cm f Oc Ar addr
 Prints the VM map at
 .Ar addr .
 If the
 .Cm f
 modifier is specified the
 complete map is printed.
 .\"
 .Pp
 .It Ic show Cm msgbuf
 Print the system's message buffer.
 It is the same output as in the
 .Dq Nm dmesg
 case.
 It is useful if you got a kernel panic, attached a serial cable
 to the machine and want to get the boot messages from before the
 system hang.
 .\"
 .It Ic show Cm mount
 Displays short info about all currently mounted file systems.
 .Pp
 .It Ic show Cm mount Ar addr
 Displays details about the given mount point.
 .\"
 .Pp
 .It Ic show Cm object Ns Oo Li / Ns Cm f Oc Ar addr
 Prints the VM object at
 .Ar addr .
 If the
 .Cm f
 option is specified the
 complete object is printed.
 .\"
 .Pp
 .It Ic show Cm panic
 Print the panic message if set.
 .\"
 .Pp
 .It Ic show Cm page
 Show statistics on VM pages.
 .\"
 .Pp
 .It Ic show Cm pageq
 Show statistics on VM page queues.
 .\"
 .Pp
 .It Ic show Cm pciregs
 Print PCI bus registers.
 The same information can be gathered in userspace by running
 .Dq Nm pciconf Fl lv .
 .\"
 .Pp
 .It Ic show Cm pcpu
 Print current processor state.
 The output format is as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "spin locks held:"
 .It Ic cpuid
 Processor identifier.
 .It Ic curthread
 Thread pointer, process identifier and the name of the process.
 .It Ic curpcb
 Control block pointer.
 .It Ic fpcurthread
 FPU thread pointer.
 .It Ic idlethread
 Idle thread pointer.
 .It Ic APIC ID
 CPU identifier coming from APIC.
 .It Ic currentldt
 LDT pointer.
 .It Ic spin locks held
 Names of spin locks held.
 .El
 .\"
 .Pp
 .It Ic show Cm pgrpdump
 Dump process groups present within the system.
 .\"
 .Pp
 .It Ic show Cm proc Op Ar addr
 If no
 .Op Ar addr
 is specified, print information about the current process.
 Otherwise, show information about the process at address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm procvm
 Show process virtual memory layout.
 .\"
 .Pp
 .It Ic show Cm protosw Ar addr
 Print protocol switch structure
 .Vt struct protosw
 at address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm registers Ns Op Li / Ns Cm u
 Display the register set.
 If the
 .Cm u
 modifier is specified, it displays user registers instead of
 kernel registers or the currently saved one.
 .Pp
 .Sy Warning :
 The support of the
 .Cm u
 modifier depends on the machine.
 If not supported, incorrect information will be displayed.
 .\"
 .Pp
 .It Ic show Cm rman Ar addr
 Show resource manager object
 .Vt struct rman
 at address
 .Ar addr .
 Addresses of particular pointers can be gathered with "show allrman"
 command.
+.\"
+.Pp
+.It Ic show Cm route Ar addr
+Show route table result for destination
+.Ar addr .
+At this time, INET and INET6 formatted addresses are supported.
+.\"
+.Pp
+.It Ic show Cm routetable Oo Ar af Oc
+Show full route table or tables.
+If
+.Ar af
+is specified, show only routes for the given numeric address family.
+If no argument is specified, dump the route table for all address families.
 .\"
 .Pp
 .It Ic show Cm rtc
 Show real time clock value.
 Useful for long debugging sessions.
 .\"
 .Pp
 .It Ic show Cm sleepchain
 Deprecated.
 Now an alias for
 .Ic show Cm lockchain .
 .\"
 .Pp
 .It Ic show Cm sleepq
 .It Ic show Cm sleepqueue
 Both commands provide the same functionality.
 They show sleepqueue
 .Vt struct sleepqueue
 structure.
 Sleepqueues are used within the
 .Fx
 kernel to implement sleepable
 synchronization primitives (thread holding a lock might sleep or
 be context switched), which at the time of writing are:
 .Xr condvar 9 ,
 .Xr sx 9
 and standard
 .Xr msleep 9
 interface.
 .\"
 .Pp
 .It Ic show Cm sockbuf Ar addr
 .It Ic show Cm socket Ar addr
 Those commands print
 .Vt struct sockbuf
 and
 .Vt struct socket
 objects placed at
 .Ar addr .
 Output consists of all values present in structures mentioned.
 For exact interpretation and more details, visit
 .Pa sys/socket.h
 header file.
 .\"
 .Pp
 .It Ic show Cm sysregs
 Show system registers (e.g.,
 .Li cr0-4
 on i386.)
 Not present on some platforms.
 .\"
 .Pp
 .It Ic show Cm tcpcb Ar addr
 Print TCP control block
 .Vt struct tcpcb
 lying at address
 .Ar addr .
 For exact interpretation of output, visit
 .Pa netinet/tcp.h
 header file.
 .\"
 .Pp
 .It Ic show Cm thread Op Ar addr | tid
 If no
 .Ar addr
 or
 .Ar tid
 is specified, show detailed information about current thread.
 Otherwise, print information about the thread with ID
 .Ar tid
 or kernel address
 .Ar addr .
 (If the argument is a decimal number, it is assumed to be a tid.)
 .\"
 .Pp
 .It Ic show Cm threads
 Show all threads within the system.
 Output format is as follows:
 .Pp
 .Bl -tag -compact -offset indent -width "Second column"
 .It Ic First column
 Thread identifier (TID)
 .It Ic Second column
 Thread structure address
 .It Ic Third column
 Backtrace.
 .El
 .\"
 .Pp
 .It Ic show Cm tty Ar addr
 Display the contents of a TTY structure in a readable form.
 .\"
 .Pp
 .It Ic show Cm turnstile Ar addr
 Show turnstile
 .Vt struct turnstile
 structure at address
 .Ar addr .
 Turnstiles are structures used within the
 .Fx
 kernel to implement
 synchronization primitives which, while holding a specific type of lock, cannot
 sleep or context switch to another thread.
 Currently, those are:
 .Xr mutex 9 ,
 .Xr rwlock 9 ,
 .Xr rmlock 9 .
 .\"
 .Pp
 .It Ic show Cm uma
 Show UMA allocator statistics.
 Output consists five columns:
 .Pp
 .Bl -tag -compact -offset indent -width "Requests"
 .It Cm "Zone"
 Name of the UMA zone.
 The same string that was passed to
 .Xr uma_zcreate 9
 as a first argument.
 .It Cm "Size"
 Size of a given memory object (slab).
 .It Cm "Used"
 Number of slabs being currently used.
 .It Cm "Free"
 Number of free slabs within the UMA zone.
 .It Cm "Requests"
 Number of allocations requests to the given zone.
 .El
 .Pp
 The very same information might be gathered in the userspace
 with the help of
 .Dq Nm vmstat Fl z .
 .\"
 .Pp
 .It Ic show Cm unpcb Ar addr
 Shows UNIX domain socket private control block
 .Vt struct unpcb
 present at the address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm vmochk
 Prints, whether the internal VM objects are in a map somewhere
 and none have zero ref counts.
 .\"
 .Pp
 .It Ic show Cm vmopag
 This is supposed to show physical addresses consumed by a
 VM object.
 Currently, it is not possible to use this command when
 .Xr witness 4
 is compiled in the kernel.
 .\"
 .Pp
 .It Ic show Cm vnet Ar addr
 Prints virtualized network stack
 .Vt struct vnet
 structure present at the address
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm vnode Op Ar addr
 Prints vnode
 .Vt struct vnode
 structure lying at
 .Op Ar addr .
 For the exact interpretation of the output, look at the
 .Pa sys/vnode.h
 header file.
 .\"
 .Pp
 .It Ic show Cm vnodebufs Ar addr
 Shows clean/dirty buffer lists of the vnode located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm vpath Ar addr
 Walk the namecache to lookup the pathname of the vnode located at
 .Ar addr .
 .\"
 .Pp
 .It Ic show Cm watches
 Displays all watchpoints.
 Shows watchpoints set with "watch" command.
 .\"
 .Pp
 .It Ic show Cm witness
 Shows information about lock acquisition coming from the
 .Xr witness 4
 subsystem.
 .\"
 .Pp
 .It Ic gdb
 Toggles between remote GDB and DDB mode.
 In remote GDB mode, another machine is required that runs
 .Xr gdb 1
 using the remote debug feature, with a connection to the serial
 console port on the target machine.
 Currently only available on the
 i386
 architecture.
 .Pp
 .It Ic halt
 Halt the system.
 .Pp
 .It Ic kill Ar sig pid
 Send signal
 .Ar sig
 to process
 .Ar pid .
 The signal is acted on upon returning from the debugger.
 This command can be used to kill a process causing resource contention
 in the case of a hung system.
 See
 .Xr signal 3
 for a list of signals.
 Note that the arguments are reversed relative to
 .Xr kill 2 .
 .Pp
 .It Ic reboot Op Ar seconds
 .It Ic reset Op Ar seconds
 Hard reset the system.
 If the optional argument
 .Ar seconds
 is given, the debugger will wait for this long, at most a week,
 before rebooting.
 .Pp
 .It Ic help
 Print a short summary of the available commands and command
 abbreviations.
 .Pp
 .It Ic capture on
 .It Ic capture off
 .It Ic capture reset
 .It Ic capture status
 .Nm
 supports a basic output capture facility, which can be used to retrieve the
 results of debugging commands from userspace using
 .Xr sysctl 3 .
 .Ic capture on
 enables output capture;
 .Ic capture off
 disables capture.
 .Ic capture reset
 will clear the capture buffer and disable capture.
 .Ic capture status
 will report current buffer use, buffer size, and disposition of output
 capture.
 .Pp
 Userspace processes may inspect and manage
 .Nm
 capture state using
 .Xr sysctl 8 :
 .Pp
 .Va debug.ddb.capture.bufsize
 may be used to query or set the current capture buffer size.
 .Pp
 .Va debug.ddb.capture.maxbufsize
 may be used to query the compile-time limit on the capture buffer size.
 .Pp
 .Va debug.ddb.capture.bytes
 may be used to query the number of bytes of output currently in the capture
 buffer.
 .Pp
 .Va debug.ddb.capture.data
 returns the contents of the buffer as a string to an appropriately privileged
 process.
 .Pp
 This facility is particularly useful in concert with the scripting and
 .Xr textdump 4
 facilities, allowing scripted debugging output to be captured and
 committed to disk as part of a textdump for later analysis.
 The contents of the capture buffer may also be inspected in a kernel core dump
 using
 .Xr kgdb 1 .
 .Pp
 .It Ic run
 .It Ic script
 .It Ic scripts
 .It Ic unscript
 Run, define, list, and delete scripts.
 See the
 .Sx SCRIPTING
 section for more information on the scripting facility.
 .Pp
 .It Ic textdump dump
 .It Ic textdump set
 .It Ic textdump status
 .It Ic textdump unset
 Use the
 .Ic textdump dump
 command to immediately perform a textdump.
 More information may be found in
 .Xr textdump 4 .
 The
 .Ic textdump set
 command may be used to force the next kernel core dump to be a textdump
 rather than a traditional memory dump or minidump.
 .Ic textdump status
 reports whether a textdump has been scheduled.
 .Ic textdump unset
 cancels a request to perform a textdump as the next kernel core dump.
 .Pp
 .It Ic thread Ar addr | tid
 Switch the debugger to the thread with ID
 .Ar tid ,
 if the argument is a decimal number, or address
 .Ar addr ,
 otherwise.
 .El
 .Sh VARIABLES
 The debugger accesses registers and variables as
 .Li $ Ns Ar name .
 Register names are as in the
 .Dq Ic show Cm registers
 command.
 Some variables are suffixed with numbers, and may have some modifier
 following a colon immediately after the variable name.
 For example, register variables can have a
 .Cm u
 modifier to indicate user register (e.g.,
 .Dq Li $eax:u ) .
 .Pp
 Built-in variables currently supported are:
 .Pp
 .Bl -tag -width ".Va tabstops" -compact
 .It Va radix
 Input and output radix.
 .It Va maxoff
 Addresses are printed as
 .Dq Ar symbol Ns Li + Ns Ar offset
 unless
 .Ar offset
 is greater than
 .Va maxoff .
 .It Va maxwidth
 The width of the displayed line.
 .It Va lines
 The number of lines.
 It is used by the built-in pager.
 Setting it to 0 disables paging.
 .It Va tabstops
 Tab stop width.
 .It Va work Ns Ar xx
 Work variable;
 .Ar xx
 can take values from 0 to 31.
 .El
 .Sh EXPRESSIONS
 Most expression operators in C are supported except
 .Ql ~ ,
 .Ql ^ ,
 and unary
 .Ql & .
 Special rules in
 .Nm
 are:
 .Bl -tag -width ".No Identifiers"
 .It Identifiers
 The name of a symbol is translated to the value of the symbol, which
 is the address of the corresponding object.
 .Ql \&.
 and
 .Ql \&:
 can be used in the identifier.
 If supported by an object format dependent routine,
 .Sm off
 .Oo Ar filename : Oc Ar func : lineno ,
 .Sm on
 .Oo Ar filename : Oc Ns Ar variable ,
 and
 .Oo Ar filename : Oc Ns Ar lineno
 can be accepted as a symbol.
 .It Numbers
 Radix is determined by the first two letters:
 .Ql 0x :
 hex,
 .Ql 0o :
 octal,
 .Ql 0t :
 decimal; otherwise, follow current radix.
 .It Li \&.
 .Va dot
 .It Li +
 .Va next
 .It Li ..
 address of the start of the last line examined.
 Unlike
 .Va dot
 or
 .Va next ,
 this is only changed by
 .Ic examine
 or
 .Ic write
 command.
 .It Li '
 last address explicitly specified.
 .It Li $ Ns Ar variable
 Translated to the value of the specified variable.
 It may be followed by a
 .Ql \&:
 and modifiers as described above.
 .It Ar a Ns Li # Ns Ar b
 A binary operator which rounds up the left hand side to the next
 multiple of right hand side.
 .It Li * Ns Ar expr
 Indirection.
 It may be followed by a
 .Ql \&:
 and modifiers as described above.
 .El
 .Sh SCRIPTING
 .Nm
 supports a basic scripting facility to allow automating tasks or responses to
 specific events.
 Each script consists of a list of DDB commands to be executed sequentially,
 and is assigned a unique name.
 Certain script names have special meaning, and will be automatically run on
 various
 .Nm
 events if scripts by those names have been defined.
 .Pp
 The
 .Ic script
 command may be used to define a script by name.
 Scripts consist of a series of
 .Nm
 commands separated with the
 .Ql \&;
 character.
 For example:
 .Bd -literal -offset indent
 script kdb.enter.panic=bt; show pcpu
 script lockinfo=show alllocks; show lockedvnods
 .Ed
 .Pp
 The
 .Ic scripts
 command lists currently defined scripts.
 .Pp
 The
 .Ic run
 command execute a script by name.
 For example:
 .Bd -literal -offset indent
 run lockinfo
 .Ed
 .Pp
 The
 .Ic unscript
 command may be used to delete a script by name.
 For example:
 .Bd -literal -offset indent
 unscript kdb.enter.panic
 .Ed
 .Pp
 These functions may also be performed from userspace using the
 .Xr ddb 8
 command.
 .Pp
 Certain scripts are run automatically, if defined, for specific
 .Nm
 events.
 The follow scripts are run when various events occur:
 .Bl -tag -width kdb.enter.powerfail
 .It Va kdb.enter.acpi
 The kernel debugger was entered as a result of an
 .Xr acpi 4
 event.
 .It Va kdb.enter.bootflags
 The kernel debugger was entered at boot as a result of the debugger boot
 flag being set.
 .It Va kdb.enter.break
 The kernel debugger was entered as a result of a serial or console break.
 .It Va kdb.enter.cam
 The kernel debugger was entered as a result of a
 .Xr CAM 4
 event.
 .It Va kdb.enter.mac
 The kernel debugger was entered as a result of an assertion failure in the
 .Xr mac_test 4
 module of the
 TrustedBSD MAC Framework.
 .It Va kdb.enter.ndis
 The kernel debugger was entered as a result of an
 .Xr ndis 4
 breakpoint event.
 .It Va kdb.enter.netgraph
 The kernel debugger was entered as a result of a
 .Xr netgraph 4
 event.
 .It Va kdb.enter.panic
 .Xr panic 9
 was called.
 .It Va kdb.enter.powerfail
 The kernel debugger was entered as a result of a powerfail NMI on the sparc64
 platform.
 .It Va kdb.enter.powerpc
 The kernel debugger was entered as a result of an unimplemented interrupt
 type on the powerpc platform.
 .It Va kdb.enter.sysctl
 The kernel debugger was entered as a result of the
 .Va debug.kdb.enter
 sysctl being set.
 .It Va kdb.enter.trapsig
 The kernel debugger was entered as a result of a trapsig event on the sparc64
 platform.
 .It Va kdb.enter.unionfs
 The kernel debugger was entered as a result of an assertion failure in the
 union file system.
 .It Va kdb.enter.unknown
 The kernel debugger was entered, but no reason has been set.
 .It Va kdb.enter.vfslock
 The kernel debugger was entered as a result of a VFS lock violation.
 .It Va kdb.enter.watchdog
 The kernel debugger was entered as a result of a watchdog firing.
 .It Va kdb.enter.witness
 The kernel debugger was entered as a result of a
 .Xr witness 4
 violation.
 .El
 .Pp
 In the event that none of these scripts is found,
 .Nm
 will attempt to execute a default script:
 .Bl -tag -width kdb.enter.powerfail
 .It Va kdb.enter.default
 The kernel debugger was entered, but a script exactly matching the reason for
 entering was not defined.
 This can be used as a catch-all to handle cases not specifically of interest;
 for example,
 .Va kdb.enter.witness
 might be defined to have special handling, and
 .Va kdb.enter.default
 might be defined to simply panic and reboot.
 .El
 .Sh HINTS
 On machines with an ISA expansion bus, a simple NMI generation card can be
 constructed by connecting a push button between the A01 and B01 (CHCHK# and
 GND) card fingers.
 Momentarily shorting these two fingers together may cause the bridge chipset to
 generate an NMI, which causes the kernel to pass control to
 .Nm .
 Some bridge chipsets do not generate a NMI on CHCHK#, so your mileage may vary.
 The NMI allows one to break into the debugger on a wedged machine to
 diagnose problems.
 Other bus' bridge chipsets may be able to generate NMI using bus specific
 methods.
 There are many PCI and PCIe add-in cards which can generate NMI for
 debugging.
 Modern server systems typically use IPMI to generate signals to enter the
 debugger.
 The
 .Va devel/ipmitool
 port can be used to send the
 .Cd chassis power diag
 command which delivers an NMI to the processor.
 Embedded systems often use JTAG for debugging, but rarely use it in
 combination with
 .Nm .
 .Pp
 For serial consoles, you can enter the debugger by sending a BREAK
 condition on the serial line if
 .Cd options BREAK_TO_DEBUGGER
 is specified in the kernel.
 Most terminal emulation programs can send a break sequence with a
 special key sequence or via a menu item.
 However, in some setups, sending the break can be difficult to arrange
 or happens spuriously, so if the kernel contains
 .Cd options ALT_BREAK_TO_DEBUGGER
 then the sequence of CR TILDE CTRL-B enters the debugger;
 CR TILDE CTRL-P causes a panic instead of entering the
 debugger; and
 CR TILDE CTRL-R causes an immediate reboot.
 In all the above sequences, CR is a Carriage Return and is usually
 sent by hitting the Enter or Return key.
 TILDE is the ASCII tilde character (~).
 CTRL-x is Control x created by hitting the control key and then x
 and then releasing both.
 .Pp
 The break to enter the debugger behavior may be enabled at run-time
 by setting the
 .Xr sysctl 8
 .Va debug.kdb.break_to_debugger
 to 1.
 The alternate sequence to enter the debugger behavior may be enabled
 at run-time by setting the
 .Xr sysctl 8
 .Va debug.kdb.alt_break_to_debugger
 to 1.
 The debugger may be entered by setting the
 .Xr sysctl 8
 .Va debug.kdb.enter
 to 1.
 .Sh FILES
 Header files mentioned in this manual page can be found below
 .Pa /usr/include
 directory.
 .Pp
 .Bl -dash -compact
 .It
 .Pa sys/buf.h
 .It
 .Pa sys/domain.h
 .It
 .Pa netinet/in_pcb.h
 .It
 .Pa sys/socket.h
 .It
 .Pa sys/vnode.h
 .El
 .Sh SEE ALSO
 .Xr gdb 1 ,
 .Xr kgdb 1 ,
 .Xr acpi 4 ,
 .Xr CAM 4 ,
 .Xr mac_test 4 ,
 .Xr ndis 4 ,
 .Xr netgraph 4 ,
 .Xr textdump 4 ,
 .Xr witness 4 ,
 .Xr ddb 8 ,
 .Xr sysctl 8 ,
 .Xr panic 9
 .Sh HISTORY
 The
 .Nm
 debugger was developed for Mach, and ported to
 .Bx 386 0.1 .
 This manual page translated from
 .Xr man 7
 macros by
 .An Garrett Wollman .
 .Pp
 .An Robert N. M. Watson
 added support for
 .Nm
 output capture,
 .Xr textdump 4
 and scripting in
 .Fx 7.1 .
Index: head/sys/net/rtsock.c
===================================================================
--- head/sys/net/rtsock.c	(revision 352111)
+++ head/sys/net/rtsock.c	(revision 352112)
@@ -1,1997 +1,2404 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
  * $FreeBSD$
  */
+#include "opt_ddb.h"
 #include "opt_mpath.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/domain.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+#endif
+
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/raw_cb.h>
 #include <net/route.h>
 #include <net/route_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 
 struct if_msghdr32 {
 	uint16_t ifm_msglen;
 	uint8_t	ifm_version;
 	uint8_t	ifm_type;
 	int32_t	ifm_addrs;
 	int32_t	ifm_flags;
 	uint16_t ifm_index;
 	uint16_t _ifm_spare1;
 	struct	if_data ifm_data;
 };
 
 struct if_msghdrl32 {
 	uint16_t ifm_msglen;
 	uint8_t	ifm_version;
 	uint8_t	ifm_type;
 	int32_t	ifm_addrs;
 	int32_t	ifm_flags;
 	uint16_t ifm_index;
 	uint16_t _ifm_spare1;
 	uint16_t ifm_len;
 	uint16_t ifm_data_off;
 	uint32_t _ifm_spare2;
 	struct	if_data ifm_data;
 };
 
 struct ifa_msghdrl32 {
 	uint16_t ifam_msglen;
 	uint8_t	ifam_version;
 	uint8_t	ifam_type;
 	int32_t	ifam_addrs;
 	int32_t	ifam_flags;
 	uint16_t ifam_index;
 	uint16_t _ifam_spare1;
 	uint16_t ifam_len;
 	uint16_t ifam_data_off;
 	int32_t	ifam_metric;
 	struct	if_data ifam_data;
 };
 
 #define SA_SIZE32(sa)						\
     (  (((struct sockaddr *)(sa))->sa_len == 0) ?		\
 	sizeof(int)		:				\
 	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) )
 
 #endif /* COMPAT_FREEBSD32 */
 
 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 
 /* NB: these are not modified */
 static struct	sockaddr route_src = { 2, PF_ROUTE, };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
 
 /* These are external hooks for CARP. */
 int	(*carp_get_vhid_p)(struct ifaddr *);
 
 /*
  * Used by rtsock/raw_input callback code to decide whether to filter the update
  * notification to a socket bound to a particular FIB.
  */
 #define	RTS_FILTER_FIB	M_PROTO8
 
 typedef struct {
 	int	ip_count;	/* attached w/ AF_INET */
 	int	ip6_count;	/* attached w/ AF_INET6 */
 	int	any_count;	/* total attached */
 } route_cb_t;
 VNET_DEFINE_STATIC(route_cb_t, route_cb);
 #define	V_route_cb VNET(route_cb)
 
 struct mtx rtsock_mtx;
 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
 
 #define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
 #define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
 #define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
 
 static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
 
 struct walkarg {
 	int	w_tmemsize;
 	int	w_op, w_arg;
 	caddr_t	w_tmem;
 	struct sysctl_req *w_req;
 };
 
 static void	rts_input(struct mbuf *m);
 static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
 static int	rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
 			struct walkarg *w, int *plen);
 static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
 			struct rt_addrinfo *rtinfo);
 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	sysctl_ifmalist(int af, struct walkarg *w);
 static int	route_output(struct mbuf *m, struct socket *so, ...);
 static void	rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
 static void	rt_dispatch(struct mbuf *, sa_family_t);
 static struct sockaddr	*rtsock_fix_netmask(struct sockaddr *dst,
 			struct sockaddr *smask, struct sockaddr_storage *dmask);
 
 static struct netisr_handler rtsock_nh = {
 	.nh_name = "rtsock",
 	.nh_handler = rts_input,
 	.nh_proto = NETISR_ROUTE,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 static int
 sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&rtsock_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
         if (error || !req->newptr)
                 return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&rtsock_nh, qlimit));
 }
 SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
     0, 0, sysctl_route_netisr_maxqlen, "I",
     "maximum routing socket dispatch queue length");
 
 static void
 vnet_rts_init(void)
 {
 	int tmp;
 
 	if (IS_DEFAULT_VNET(curvnet)) {
 		if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
 			rtsock_nh.nh_qlimit = tmp;
 		netisr_register(&rtsock_nh);
 	}
 #ifdef VIMAGE
 	 else
 		netisr_register_vnet(&rtsock_nh);
 #endif
 }
 VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     vnet_rts_init, 0);
 
 #ifdef VIMAGE
 static void
 vnet_rts_uninit(void)
 {
 
 	netisr_unregister_vnet(&rtsock_nh);
 }
 VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     vnet_rts_uninit, 0);
 #endif
 
 static int
 raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
     struct rawcb *rp)
 {
 	int fibnum;
 
 	KASSERT(m != NULL, ("%s: m is NULL", __func__));
 	KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
 	KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
 
 	/* No filtering requested. */
 	if ((m->m_flags & RTS_FILTER_FIB) == 0)
 		return (0);
 
 	/* Check if it is a rts and the fib matches the one of the socket. */
 	fibnum = M_GETFIB(m);
 	if (proto->sp_family != PF_ROUTE ||
 	    rp->rcb_socket == NULL ||
 	    rp->rcb_socket->so_fibnum == fibnum)
 		return (0);
 
 	/* Filtering requested and no match, the socket shall be skipped. */
 	return (1);
 }
 
 static void
 rts_input(struct mbuf *m)
 {
 	struct sockproto route_proto;
 	unsigned short *family;
 	struct m_tag *tag;
 
 	route_proto.sp_family = PF_ROUTE;
 	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
 	if (tag != NULL) {
 		family = (unsigned short *)(tag + 1);
 		route_proto.sp_protocol = *family;
 		m_tag_delete(m, tag);
 	} else
 		route_proto.sp_protocol = 0;
 
 	raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb);
 }
 
 /*
  * It really doesn't make any sense at all for this code to share much
  * with raw_usrreq.c, since its functionality is so restricted.  XXX
  */
 static void
 rts_abort(struct socket *so)
 {
 
 	raw_usrreqs.pru_abort(so);
 }
 
 static void
 rts_close(struct socket *so)
 {
 
 	raw_usrreqs.pru_close(so);
 }
 
 /* pru_accept is EOPNOTSUPP */
 
 static int
 rts_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct rawcb *rp;
 	int error;
 
 	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
 
 	/* XXX */
 	rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
 
 	so->so_pcb = (caddr_t)rp;
 	so->so_fibnum = td->td_proc->p_fibnum;
 	error = raw_attach(so, proto);
 	rp = sotorawcb(so);
 	if (error) {
 		so->so_pcb = NULL;
 		free(rp, M_PCB);
 		return error;
 	}
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
 		V_route_cb.ip_count++;
 		break;
 	case AF_INET6:
 		V_route_cb.ip6_count++;
 		break;
 	}
 	V_route_cb.any_count++;
 	RTSOCK_UNLOCK();
 	soisconnected(so);
 	so->so_options |= SO_USELOOPBACK;
 	return 0;
 }
 
 static int
 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
 }
 
 static int
 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
 }
 
 /* pru_connect2 is EOPNOTSUPP */
 /* pru_control is EOPNOTSUPP */
 
 static void
 rts_detach(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
 
 	KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
 
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
 		V_route_cb.ip_count--;
 		break;
 	case AF_INET6:
 		V_route_cb.ip6_count--;
 		break;
 	}
 	V_route_cb.any_count--;
 	RTSOCK_UNLOCK();
 	raw_usrreqs.pru_detach(so);
 }
 
 static int
 rts_disconnect(struct socket *so)
 {
 
 	return (raw_usrreqs.pru_disconnect(so));
 }
 
 /* pru_listen is EOPNOTSUPP */
 
 static int
 rts_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 
 	return (raw_usrreqs.pru_peeraddr(so, nam));
 }
 
 /* pru_rcvd is EOPNOTSUPP */
 /* pru_rcvoob is EOPNOTSUPP */
 
 static int
 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	 struct mbuf *control, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
 }
 
 /* pru_sense is null */
 
 static int
 rts_shutdown(struct socket *so)
 {
 
 	return (raw_usrreqs.pru_shutdown(so));
 }
 
 static int
 rts_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 
 	return (raw_usrreqs.pru_sockaddr(so, nam));
 }
 
 static struct pr_usrreqs route_usrreqs = {
 	.pru_abort =		rts_abort,
 	.pru_attach =		rts_attach,
 	.pru_bind =		rts_bind,
 	.pru_connect =		rts_connect,
 	.pru_detach =		rts_detach,
 	.pru_disconnect =	rts_disconnect,
 	.pru_peeraddr =		rts_peeraddr,
 	.pru_send =		rts_send,
 	.pru_shutdown =		rts_shutdown,
 	.pru_sockaddr =		rts_sockaddr,
 	.pru_close =		rts_close,
 };
 
 #ifndef _SOCKADDR_UNION_DEFINED
 #define	_SOCKADDR_UNION_DEFINED
 /*
  * The union of all possible address formats we handle.
  */
 union sockaddr_union {
 	struct sockaddr		sa;
 	struct sockaddr_in	sin;
 	struct sockaddr_in6	sin6;
 };
 #endif /* _SOCKADDR_UNION_DEFINED */
 
 static int
 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
     struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
 {
 #if defined(INET) || defined(INET6)
 	struct epoch_tracker et;
 #endif
 
 	/* First, see if the returned address is part of the jail. */
 	if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
 		info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 		return (0);
 	}
 
 	switch (info->rti_info[RTAX_DST]->sa_family) {
 #ifdef INET
 	case AF_INET:
 	{
 		struct in_addr ia;
 		struct ifaddr *ifa;
 		int found;
 
 		found = 0;
 		/*
 		 * Try to find an address on the given outgoing interface
 		 * that belongs to the jail.
 		 */
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa;
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET)
 				continue;
 			ia = ((struct sockaddr_in *)sa)->sin_addr;
 			if (prison_check_ip4(cred, &ia) == 0) {
 				found = 1;
 				break;
 			}
 		}
 		NET_EPOCH_EXIT(et);
 		if (!found) {
 			/*
 			 * As a last resort return the 'default' jail address.
 			 */
 			ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
 			    sin_addr;
 			if (prison_get_ip4(cred, &ia) != 0)
 				return (ESRCH);
 		}
 		bzero(&saun->sin, sizeof(struct sockaddr_in));
 		saun->sin.sin_len = sizeof(struct sockaddr_in);
 		saun->sin.sin_family = AF_INET;
 		saun->sin.sin_addr.s_addr = ia.s_addr;
 		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
 		break;
 	}
 #endif
 #ifdef INET6
 	case AF_INET6:
 	{
 		struct in6_addr ia6;
 		struct ifaddr *ifa;
 		int found;
 
 		found = 0;
 		/*
 		 * Try to find an address on the given outgoing interface
 		 * that belongs to the jail.
 		 */
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa;
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET6)
 				continue;
 			bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
 			    &ia6, sizeof(struct in6_addr));
 			if (prison_check_ip6(cred, &ia6) == 0) {
 				found = 1;
 				break;
 			}
 		}
 		NET_EPOCH_EXIT(et);
 		if (!found) {
 			/*
 			 * As a last resort return the 'default' jail address.
 			 */
 			ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
 			    sin6_addr;
 			if (prison_get_ip6(cred, &ia6) != 0)
 				return (ESRCH);
 		}
 		bzero(&saun->sin6, sizeof(struct sockaddr_in6));
 		saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
 		saun->sin6.sin6_family = AF_INET6;
 		bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
 		if (sa6_recoverscope(&saun->sin6) != 0)
 			return (ESRCH);
 		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
 		break;
 	}
 #endif
 	default:
 		return (ESRCH);
 	}
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 route_output(struct mbuf *m, struct socket *so, ...)
 {
 	RIB_RLOCK_TRACKER;
 	struct rt_msghdr *rtm = NULL;
 	struct rtentry *rt = NULL;
 	struct rib_head *rnh;
 	struct rt_addrinfo info;
 	struct sockaddr_storage ss;
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 	int i, rti_need_deembed = 0;
 #endif
 	int alloc_len = 0, len, error = 0, fibnum;
 	struct ifnet *ifp = NULL;
 	union sockaddr_union saun;
 	sa_family_t saf = AF_UNSPEC;
 	struct rawcb *rp = NULL;
 	struct walkarg w;
 
 	fibnum = so->so_fibnum;
 
 #define senderr(e) { error = e; goto flush;}
 	if (m == NULL || ((m->m_len < sizeof(long)) &&
 		       (m = m_pullup(m, sizeof(long))) == NULL))
 		return (ENOBUFS);
 	if ((m->m_flags & M_PKTHDR) == 0)
 		panic("route_output");
 	len = m->m_pkthdr.len;
 	if (len < sizeof(*rtm) ||
 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen)
 		senderr(EINVAL);
 
 	/*
 	 * Most of current messages are in range 200-240 bytes,
 	 * minimize possible re-allocation on reply using larger size
 	 * buffer aligned on 1k boundaty.
 	 */
 	alloc_len = roundup2(len, 1024);
 	if ((rtm = malloc(alloc_len, M_TEMP, M_NOWAIT)) == NULL)
 		senderr(ENOBUFS);
 
 	m_copydata(m, 0, len, (caddr_t)rtm);
 	bzero(&info, sizeof(info));
 	bzero(&w, sizeof(w));
 
 	if (rtm->rtm_version != RTM_VERSION) {
 		/* Do not touch message since format is unknown */
 		free(rtm, M_TEMP);
 		rtm = NULL;
 		senderr(EPROTONOSUPPORT);
 	}
 
 	/*
 	 * Starting from here, it is possible
 	 * to alter original message and insert
 	 * caller PID and error value.
 	 */
 
 	rtm->rtm_pid = curproc->p_pid;
 	info.rti_addrs = rtm->rtm_addrs;
 
 	info.rti_mflags = rtm->rtm_inits;
 	info.rti_rmx = &rtm->rtm_rmx;
 
 	/*
 	 * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
 	 * link-local address because rtrequest requires addresses with
 	 * embedded scope id.
 	 */
 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
 		senderr(EINVAL);
 
 	if (rtm->rtm_flags & RTF_RNH_LOCKED)
 		senderr(EINVAL);
 	info.rti_flags = rtm->rtm_flags;
 	if (info.rti_info[RTAX_DST] == NULL ||
 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
 	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
 		senderr(EINVAL);
 	saf = info.rti_info[RTAX_DST]->sa_family;
 	/*
 	 * Verify that the caller has the appropriate privilege; RTM_GET
 	 * is the only operation the non-superuser is allowed.
 	 */
 	if (rtm->rtm_type != RTM_GET) {
 		error = priv_check(curthread, PRIV_NET_ROUTE);
 		if (error)
 			senderr(error);
 	}
 
 	/*
 	 * The given gateway address may be an interface address.
 	 * For example, issuing a "route change" command on a route
 	 * entry that was created from a tunnel, and the gateway
 	 * address given is the local end point. In this case the 
 	 * RTF_GATEWAY flag must be cleared or the destination will
 	 * not be reachable even though there is no error message.
 	 */
 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
 	    info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
 		struct rt_addrinfo ginfo;
 		struct sockaddr *gdst;
 
 		bzero(&ginfo, sizeof(ginfo));
 		bzero(&ss, sizeof(ss));
 		ss.ss_len = sizeof(ss);
 
 		ginfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&ss;
 		gdst = info.rti_info[RTAX_GATEWAY];
 
 		/* 
 		 * A host route through the loopback interface is 
 		 * installed for each interface adddress. In pre 8.0
 		 * releases the interface address of a PPP link type
 		 * is not reachable locally. This behavior is fixed as 
 		 * part of the new L2/L3 redesign and rewrite work. The
 		 * signature of this interface address route is the
 		 * AF_LINK sa_family type of the rt_gateway, and the
 		 * rt_ifp has the IFF_LOOPBACK flag set.
 		 */
 		if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) {
 			if (ss.ss_family == AF_LINK &&
 			    ginfo.rti_ifp->if_flags & IFF_LOOPBACK) {
 				info.rti_flags &= ~RTF_GATEWAY;
 				info.rti_flags |= RTF_GWFLAG_COMPAT;
 			}
 			rib_free_info(&ginfo);
 		}
 	}
 
 	switch (rtm->rtm_type) {
 		struct rtentry *saved_nrt;
 
 	case RTM_ADD:
 	case RTM_CHANGE:
 		if (rtm->rtm_type == RTM_ADD) {
 			if (info.rti_info[RTAX_GATEWAY] == NULL)
 				senderr(EINVAL);
 		}
 		saved_nrt = NULL;
 
 		/* support for new ARP code */
 		if (info.rti_info[RTAX_GATEWAY] != NULL &&
 		    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
 		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
 			error = lla_rt_output(rtm, &info);
 #ifdef INET6
 			if (error == 0)
 				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
 #endif
 			break;
 		}
 		error = rtrequest1_fib(rtm->rtm_type, &info, &saved_nrt,
 		    fibnum);
 		if (error == 0 && saved_nrt != NULL) {
 #ifdef INET6
 			rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
 #endif
 			RT_LOCK(saved_nrt);
 			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
 			RT_REMREF(saved_nrt);
 			RT_UNLOCK(saved_nrt);
 		}
 		break;
 
 	case RTM_DELETE:
 		saved_nrt = NULL;
 		/* support for new ARP code */
 		if (info.rti_info[RTAX_GATEWAY] && 
 		    (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
 		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
 			error = lla_rt_output(rtm, &info);
 #ifdef INET6
 			if (error == 0)
 				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
 #endif
 			break;
 		}
 		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, fibnum);
 		if (error == 0) {
 			RT_LOCK(saved_nrt);
 			rt = saved_nrt;
 			goto report;
 		}
 #ifdef INET6
 		/* rt_msg2() will not be used when RTM_DELETE fails. */
 		rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
 #endif
 		break;
 
 	case RTM_GET:
 		rnh = rt_tables_get_rnh(fibnum, saf);
 		if (rnh == NULL)
 			senderr(EAFNOSUPPORT);
 
 		RIB_RLOCK(rnh);
 
 		if (info.rti_info[RTAX_NETMASK] == NULL &&
 		    rtm->rtm_type == RTM_GET) {
 			/*
 			 * Provide longest prefix match for
 			 * address lookup (no mask).
 			 * 'route -n get addr'
 			 */
 			rt = (struct rtentry *) rnh->rnh_matchaddr(
 			    info.rti_info[RTAX_DST], &rnh->head);
 		} else
 			rt = (struct rtentry *) rnh->rnh_lookup(
 			    info.rti_info[RTAX_DST],
 			    info.rti_info[RTAX_NETMASK], &rnh->head);
 
 		if (rt == NULL) {
 			RIB_RUNLOCK(rnh);
 			senderr(ESRCH);
 		}
 #ifdef RADIX_MPATH
 		/*
 		 * for RTM_CHANGE/LOCK, if we got multipath routes,
 		 * we require users to specify a matching RTAX_GATEWAY.
 		 *
 		 * for RTM_GET, gate is optional even with multipath.
 		 * if gate == NULL the first match is returned.
 		 * (no need to call rt_mpath_matchgate if gate == NULL)
 		 */
 		if (rt_mpath_capable(rnh) &&
 		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
 			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
 			if (!rt) {
 				RIB_RUNLOCK(rnh);
 				senderr(ESRCH);
 			}
 		}
 #endif
 		/*
 		 * If performing proxied L2 entry insertion, and
 		 * the actual PPP host entry is found, perform
 		 * another search to retrieve the prefix route of
 		 * the local end point of the PPP link.
 		 */
 		if (rtm->rtm_flags & RTF_ANNOUNCE) {
 			struct sockaddr laddr;
 
 			if (rt->rt_ifp != NULL && 
 			    rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
 				struct epoch_tracker et;
 				struct ifaddr *ifa;
 
 				NET_EPOCH_ENTER(et);
 				ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1,
 						RT_ALL_FIBS);
 				if (ifa != NULL)
 					rt_maskedcopy(ifa->ifa_addr,
 						      &laddr,
 						      ifa->ifa_netmask);
 				NET_EPOCH_EXIT(et);
 			} else
 				rt_maskedcopy(rt->rt_ifa->ifa_addr,
 					      &laddr,
 					      rt->rt_ifa->ifa_netmask);
 			/* 
 			 * refactor rt and no lock operation necessary
 			 */
 			rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr,
 			    &rnh->head);
 			if (rt == NULL) {
 				RIB_RUNLOCK(rnh);
 				senderr(ESRCH);
 			}
 		} 
 		RT_LOCK(rt);
 		RT_ADDREF(rt);
 		RIB_RUNLOCK(rnh);
 
 report:
 		RT_LOCK_ASSERT(rt);
 		if ((rt->rt_flags & RTF_HOST) == 0
 		    ? jailed_without_vnet(curthread->td_ucred)
 		    : prison_if(curthread->td_ucred,
 		    rt_key(rt)) != 0) {
 			RT_UNLOCK(rt);
 			senderr(ESRCH);
 		}
 		info.rti_info[RTAX_DST] = rt_key(rt);
 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 		info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
 		    rt_mask(rt), &ss);
 		info.rti_info[RTAX_GENMASK] = 0;
 		if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
 			ifp = rt->rt_ifp;
 			if (ifp) {
 				info.rti_info[RTAX_IFP] =
 				    ifp->if_addr->ifa_addr;
 				error = rtm_get_jailed(&info, ifp, rt,
 				    &saun, curthread->td_ucred);
 				if (error != 0) {
 					RT_UNLOCK(rt);
 					senderr(error);
 				}
 				if (ifp->if_flags & IFF_POINTOPOINT)
 					info.rti_info[RTAX_BRD] =
 					    rt->rt_ifa->ifa_dstaddr;
 				rtm->rtm_index = ifp->if_index;
 			} else {
 				info.rti_info[RTAX_IFP] = NULL;
 				info.rti_info[RTAX_IFA] = NULL;
 			}
 		} else if ((ifp = rt->rt_ifp) != NULL) {
 			rtm->rtm_index = ifp->if_index;
 		}
 
 		/* Check if we need to realloc storage */
 		rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len);
 		if (len > alloc_len) {
 			struct rt_msghdr *new_rtm;
 			new_rtm = malloc(len, M_TEMP, M_NOWAIT);
 			if (new_rtm == NULL) {
 				RT_UNLOCK(rt);
 				senderr(ENOBUFS);
 			}
 			bcopy(rtm, new_rtm, rtm->rtm_msglen);
 			free(rtm, M_TEMP);
 			rtm = new_rtm;
 			alloc_len = len;
 		}
 
 		w.w_tmem = (caddr_t)rtm;
 		w.w_tmemsize = alloc_len;
 		rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len);
 
 		if (rt->rt_flags & RTF_GWFLAG_COMPAT)
 			rtm->rtm_flags = RTF_GATEWAY | 
 				(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
 		else
 			rtm->rtm_flags = rt->rt_flags;
 		rt_getmetrics(rt, &rtm->rtm_rmx);
 		rtm->rtm_addrs = info.rti_addrs;
 
 		RT_UNLOCK(rt);
 		break;
 
 	default:
 		senderr(EOPNOTSUPP);
 	}
 
 flush:
 	if (rt != NULL)
 		RTFREE(rt);
 	/*
 	 * Check to see if we don't want our own messages.
 	 */
 	if ((so->so_options & SO_USELOOPBACK) == 0) {
 		if (V_route_cb.any_count <= 1) {
 			if (rtm != NULL)
 				free(rtm, M_TEMP);
 			m_freem(m);
 			return (error);
 		}
 		/* There is another listener, so construct message */
 		rp = sotorawcb(so);
 	}
 
 	if (rtm != NULL) {
 #ifdef INET6
 		if (rti_need_deembed) {
 			/* sin6_scope_id is recovered before sending rtm. */
 			sin6 = (struct sockaddr_in6 *)&ss;
 			for (i = 0; i < RTAX_MAX; i++) {
 				if (info.rti_info[i] == NULL)
 					continue;
 				if (info.rti_info[i]->sa_family != AF_INET6)
 					continue;
 				bcopy(info.rti_info[i], sin6, sizeof(*sin6));
 				if (sa6_recoverscope(sin6) == 0)
 					bcopy(sin6, info.rti_info[i],
 						    sizeof(*sin6));
 			}
 		}
 #endif
 		if (error != 0)
 			rtm->rtm_errno = error;
 		else
 			rtm->rtm_flags |= RTF_DONE;
 
 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
 			m_freem(m);
 			m = NULL;
 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
 
 		free(rtm, M_TEMP);
 	}
 	if (m != NULL) {
 		M_SETFIB(m, fibnum);
 		m->m_flags |= RTS_FILTER_FIB;
 		if (rp) {
 			/*
 			 * XXX insure we don't get a copy by
 			 * invalidating our protocol
 			 */
 			unsigned short family = rp->rcb_proto.sp_family;
 			rp->rcb_proto.sp_family = 0;
 			rt_dispatch(m, saf);
 			rp->rcb_proto.sp_family = family;
 		} else
 			rt_dispatch(m, saf);
 	}
 
 	return (error);
 }
 
 static void
 rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
 {
 
 	bzero(out, sizeof(*out));
 	out->rmx_mtu = rt->rt_mtu;
 	out->rmx_weight = rt->rt_weight;
 	out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
 	/* Kernel -> userland timebase conversion. */
 	out->rmx_expire = rt->rt_expire ?
 	    rt->rt_expire - time_uptime + time_second : 0;
 }
 
 /*
  * Extract the addresses of the passed sockaddrs.
  * Do a little sanity checking so as to avoid bad memory references.
  * This data is derived straight from userland.
  */
 static int
 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
 {
 	struct sockaddr *sa;
 	int i;
 
 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
 			continue;
 		sa = (struct sockaddr *)cp;
 		/*
 		 * It won't fit.
 		 */
 		if (cp + sa->sa_len > cplim)
 			return (EINVAL);
 		/*
 		 * there are no more.. quit now
 		 * If there are more bits, they are in error.
 		 * I've seen this. route(1) can evidently generate these. 
 		 * This causes kernel to core dump.
 		 * for compatibility, If we see this, point to a safe address.
 		 */
 		if (sa->sa_len == 0) {
 			rtinfo->rti_info[i] = &sa_zero;
 			return (0); /* should be EINVAL but for compat */
 		}
 		/* accept it */
 #ifdef INET6
 		if (sa->sa_family == AF_INET6)
 			sa6_embedscope((struct sockaddr_in6 *)sa,
 			    V_ip6_use_defzone);
 #endif
 		rtinfo->rti_info[i] = sa;
 		cp += SA_SIZE(sa);
 	}
 	return (0);
 }
 
 /*
  * Fill in @dmask with valid netmask leaving original @smask
  * intact. Mostly used with radix netmasks.
  */
 static struct sockaddr *
 rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask,
     struct sockaddr_storage *dmask)
 {
 	if (dst == NULL || smask == NULL)
 		return (NULL);
 
 	memset(dmask, 0, dst->sa_len);
 	memcpy(dmask, smask, smask->sa_len);
 	dmask->ss_len = dst->sa_len;
 	dmask->ss_family = dst->sa_family;
 
 	return ((struct sockaddr *)dmask);
 }
 
 /*
  * Writes information related to @rtinfo object to newly-allocated mbuf.
  * Assumes MCLBYTES is enough to construct any message.
  * Used for OS notifications of vaious events (if/ifa announces,etc)
  *
  * Returns allocated mbuf or NULL on failure.
  */
 static struct mbuf *
 rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	int i;
 	struct sockaddr *sa;
 #ifdef INET6
 	struct sockaddr_storage ss;
 	struct sockaddr_in6 *sin6;
 #endif
 	int len, dlen;
 
 	switch (type) {
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
 		len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_DELMADDR:
 	case RTM_NEWMADDR:
 		len = sizeof(struct ifma_msghdr);
 		break;
 
 	case RTM_IFINFO:
 		len = sizeof(struct if_msghdr);
 		break;
 
 	case RTM_IFANNOUNCE:
 	case RTM_IEEE80211:
 		len = sizeof(struct if_announcemsghdr);
 		break;
 
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
 
 	/* XXXGL: can we use MJUMPAGESIZE cluster here? */
 	KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
 	if (len > MHLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	else
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (m);
 
 	m->m_pkthdr.len = m->m_len = len;
 	rtm = mtod(m, struct rt_msghdr *);
 	bzero((caddr_t)rtm, len);
 	for (i = 0; i < RTAX_MAX; i++) {
 		if ((sa = rtinfo->rti_info[i]) == NULL)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
 #ifdef INET6
 		if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
 			sin6 = (struct sockaddr_in6 *)&ss;
 			bcopy(sa, sin6, sizeof(*sin6));
 			if (sa6_recoverscope(sin6) == 0)
 				sa = (struct sockaddr *)sin6;
 		}
 #endif
 		m_copyback(m, len, dlen, (caddr_t)sa);
 		len += dlen;
 	}
 	if (m->m_pkthdr.len != len) {
 		m_freem(m);
 		return (NULL);
 	}
 	rtm->rtm_msglen = len;
 	rtm->rtm_version = RTM_VERSION;
 	rtm->rtm_type = type;
 	return (m);
 }
 
 /*
  * Writes information related to @rtinfo object to preallocated buffer.
  * Stores needed size in @plen. If @w is NULL, calculates size without
  * writing.
  * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
  *
  * Returns 0 on success.
  *
  */
 static int
 rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
 {
 	int i;
 	int len, buflen = 0, dlen;
 	caddr_t cp = NULL;
 	struct rt_msghdr *rtm = NULL;
 #ifdef INET6
 	struct sockaddr_storage ss;
 	struct sockaddr_in6 *sin6;
 #endif
 #ifdef COMPAT_FREEBSD32
 	bool compat32 = false;
 #endif
 
 	switch (type) {
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
 		if (w != NULL && w->w_op == NET_RT_IFLISTL) {
 #ifdef COMPAT_FREEBSD32
 			if (w->w_req->flags & SCTL_MASK32) {
 				len = sizeof(struct ifa_msghdrl32);
 				compat32 = true;
 			} else
 #endif
 				len = sizeof(struct ifa_msghdrl);
 		} else
 			len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_IFINFO:
 #ifdef COMPAT_FREEBSD32
 		if (w != NULL && w->w_req->flags & SCTL_MASK32) {
 			if (w->w_op == NET_RT_IFLISTL)
 				len = sizeof(struct if_msghdrl32);
 			else
 				len = sizeof(struct if_msghdr32);
 			compat32 = true;
 			break;
 		}
 #endif
 		if (w != NULL && w->w_op == NET_RT_IFLISTL)
 			len = sizeof(struct if_msghdrl);
 		else
 			len = sizeof(struct if_msghdr);
 		break;
 
 	case RTM_NEWMADDR:
 		len = sizeof(struct ifma_msghdr);
 		break;
 
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
 
 	if (w != NULL) {
 		rtm = (struct rt_msghdr *)w->w_tmem;
 		buflen = w->w_tmemsize - len;
 		cp = (caddr_t)w->w_tmem + len;
 	}
 
 	rtinfo->rti_addrs = 0;
 	for (i = 0; i < RTAX_MAX; i++) {
 		struct sockaddr *sa;
 
 		if ((sa = rtinfo->rti_info[i]) == NULL)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 #ifdef COMPAT_FREEBSD32
 		if (compat32)
 			dlen = SA_SIZE32(sa);
 		else
 #endif
 			dlen = SA_SIZE(sa);
 		if (cp != NULL && buflen >= dlen) {
 #ifdef INET6
 			if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
 				sin6 = (struct sockaddr_in6 *)&ss;
 				bcopy(sa, sin6, sizeof(*sin6));
 				if (sa6_recoverscope(sin6) == 0)
 					sa = (struct sockaddr *)sin6;
 			}
 #endif
 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
 			cp += dlen;
 			buflen -= dlen;
 		} else if (cp != NULL) {
 			/*
 			 * Buffer too small. Count needed size
 			 * and return with error.
 			 */
 			cp = NULL;
 		}
 
 		len += dlen;
 	}
 
 	if (cp != NULL) {
 		dlen = ALIGN(len) - len;
 		if (buflen < dlen)
 			cp = NULL;
 		else {
 			bzero(cp, dlen);
 			cp += dlen;
 			buflen -= dlen;
 		}
 	}
 	len = ALIGN(len);
 
 	if (cp != NULL) {
 		/* fill header iff buffer is large enough */
 		rtm->rtm_version = RTM_VERSION;
 		rtm->rtm_type = type;
 		rtm->rtm_msglen = len;
 	}
 
 	*plen = len;
 
 	if (w != NULL && cp == NULL)
 		return (ENOBUFS);
 
 	return (0);
 }
 
 /*
  * This routine is called to generate a message from the routing
  * socket indicating that a redirect has occurred, a routing lookup
  * has failed, or that a protocol has detected timeouts to a particular
  * destination.
  */
 void
 rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
     int fibnum)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
 
 	if (V_route_cb.any_count == 0)
 		return;
 	m = rtsock_msg_mbuf(type, rtinfo);
 	if (m == NULL)
 		return;
 
 	if (fibnum != RT_ALL_FIBS) {
 		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
 		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
 		M_SETFIB(m, fibnum);
 		m->m_flags |= RTS_FILTER_FIB;
 	}
 
 	rtm = mtod(m, struct rt_msghdr *);
 	rtm->rtm_flags = RTF_DONE | flags;
 	rtm->rtm_errno = error;
 	rtm->rtm_addrs = rtinfo->rti_addrs;
 	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
 }
 
 void
 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 {
 
 	rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
 }
 
 /*
  * This routine is called to generate a message from the routing
  * socket indicating that the status of a network interface has changed.
  */
 void
 rt_ifmsg(struct ifnet *ifp)
 {
 	struct if_msghdr *ifm;
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	if (V_route_cb.any_count == 0)
 		return;
 	bzero((caddr_t)&info, sizeof(info));
 	m = rtsock_msg_mbuf(RTM_IFINFO, &info);
 	if (m == NULL)
 		return;
 	ifm = mtod(m, struct if_msghdr *);
 	ifm->ifm_index = ifp->if_index;
 	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 	if_data_copy(ifp, &ifm->ifm_data);
 	ifm->ifm_addrs = 0;
 	rt_dispatch(m, AF_UNSPEC);
 }
 
 /*
  * Announce interface address arrival/withdraw.
  * Please do not call directly, use rt_addrmsg().
  * Assume input data to be valid.
  * Returns 0 on success.
  */
 int
 rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
 {
 	struct rt_addrinfo info;
 	struct sockaddr *sa;
 	int ncmd;
 	struct mbuf *m;
 	struct ifa_msghdr *ifam;
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct sockaddr_storage ss;
 
 	if (V_route_cb.any_count == 0)
 		return (0);
 
 	ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
 	info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
 	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
 	    info.rti_info[RTAX_IFP], ifa->ifa_netmask, &ss);
 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 	if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
 		return (ENOBUFS);
 	ifam = mtod(m, struct ifa_msghdr *);
 	ifam->ifam_index = ifp->if_index;
 	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
 	ifam->ifam_flags = ifa->ifa_flags;
 	ifam->ifam_addrs = info.rti_addrs;
 
 	if (fibnum != RT_ALL_FIBS) {
 		M_SETFIB(m, fibnum);
 		m->m_flags |= RTS_FILTER_FIB;
 	}
 
 	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
 
 	return (0);
 }
 
 /*
  * Announce route addition/removal.
  * Please do not call directly, use rt_routemsg().
  * Note that @rt data MAY be inconsistent/invalid:
  * if some userland app sends us "invalid" route message (invalid mask,
  * no dst, wrong address families, etc...) we need to pass it back
  * to app (and any other rtsock consumers) with rtm_errno field set to
  * non-zero value.
  *
  * Returns 0 on success.
  */
 int
 rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
     int fibnum)
 {
 	struct rt_addrinfo info;
 	struct sockaddr *sa;
 	struct mbuf *m;
 	struct rt_msghdr *rtm;
 	struct sockaddr_storage ss;
 
 	if (V_route_cb.any_count == 0)
 		return (0);
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = sa = rt_key(rt);
 	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(sa, rt_mask(rt), &ss);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	if ((m = rtsock_msg_mbuf(cmd, &info)) == NULL)
 		return (ENOBUFS);
 	rtm = mtod(m, struct rt_msghdr *);
 	rtm->rtm_index = ifp->if_index;
 	rtm->rtm_flags |= rt->rt_flags;
 	rtm->rtm_errno = error;
 	rtm->rtm_addrs = info.rti_addrs;
 
 	if (fibnum != RT_ALL_FIBS) {
 		M_SETFIB(m, fibnum);
 		m->m_flags |= RTS_FILTER_FIB;
 	}
 
 	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
 
 	return (0);
 }
 
 /*
  * This is the analogue to the rt_newaddrmsg which performs the same
  * function but for multicast group memberhips.  This is easier since
  * there is no route state to worry about.
  */
 void
 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 {
 	struct rt_addrinfo info;
 	struct mbuf *m = NULL;
 	struct ifnet *ifp = ifma->ifma_ifp;
 	struct ifma_msghdr *ifmam;
 
 	if (V_route_cb.any_count == 0)
 		return;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 	if (ifp && ifp->if_addr)
 		info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
 	else
 		info.rti_info[RTAX_IFP] = NULL;
 	/*
 	 * If a link-layer address is present, present it as a ``gateway''
 	 * (similarly to how ARP entries, e.g., are presented).
 	 */
 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
 	m = rtsock_msg_mbuf(cmd, &info);
 	if (m == NULL)
 		return;
 	ifmam = mtod(m, struct ifma_msghdr *);
 	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
 	    __func__));
 	ifmam->ifmam_index = ifp->if_index;
 	ifmam->ifmam_addrs = info.rti_addrs;
 	rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
 }
 
 static struct mbuf *
 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
 	struct rt_addrinfo *info)
 {
 	struct if_announcemsghdr *ifan;
 	struct mbuf *m;
 
 	if (V_route_cb.any_count == 0)
 		return NULL;
 	bzero((caddr_t)info, sizeof(*info));
 	m = rtsock_msg_mbuf(type, info);
 	if (m != NULL) {
 		ifan = mtod(m, struct if_announcemsghdr *);
 		ifan->ifan_index = ifp->if_index;
 		strlcpy(ifan->ifan_name, ifp->if_xname,
 			sizeof(ifan->ifan_name));
 		ifan->ifan_what = what;
 	}
 	return m;
 }
 
 /*
  * This is called to generate routing socket messages indicating
  * IEEE80211 wireless events.
  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
  */
 void
 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
 {
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
 	if (m != NULL) {
 		/*
 		 * Append the ieee80211 data.  Try to stick it in the
 		 * mbuf containing the ifannounce msg; otherwise allocate
 		 * a new mbuf and append.
 		 *
 		 * NB: we assume m is a single mbuf.
 		 */
 		if (data_len > M_TRAILINGSPACE(m)) {
 			struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
 			if (n == NULL) {
 				m_freem(m);
 				return;
 			}
 			bcopy(data, mtod(n, void *), data_len);
 			n->m_len = data_len;
 			m->m_next = n;
 		} else if (data_len > 0) {
 			bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
 			m->m_len += data_len;
 		}
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len += data_len;
 		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
 		rt_dispatch(m, AF_UNSPEC);
 	}
 }
 
 /*
  * This is called to generate routing socket messages indicating
  * network interface arrival and departure.
  */
 void
 rt_ifannouncemsg(struct ifnet *ifp, int what)
 {
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
 	if (m != NULL)
 		rt_dispatch(m, AF_UNSPEC);
 }
 
 static void
 rt_dispatch(struct mbuf *m, sa_family_t saf)
 {
 	struct m_tag *tag;
 
 	/*
 	 * Preserve the family from the sockaddr, if any, in an m_tag for
 	 * use when injecting the mbuf into the routing socket buffer from
 	 * the netisr.
 	 */
 	if (saf != AF_UNSPEC) {
 		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
 		    M_NOWAIT);
 		if (tag == NULL) {
 			m_freem(m);
 			return;
 		}
 		*(unsigned short *)(tag + 1) = saf;
 		m_tag_prepend(m, tag);
 	}
 #ifdef VIMAGE
 	if (V_loif)
 		m->m_pkthdr.rcvif = V_loif;
 	else {
 		m_freem(m);
 		return;
 	}
 #endif
 	netisr_queue(NETISR_ROUTE, m);	/* mbuf is free'd on failure. */
 }
 
 /*
  * This is used in dumping the kernel table via sysctl().
  */
 static int
 sysctl_dumpentry(struct radix_node *rn, void *vw)
 {
 	struct walkarg *w = vw;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int error = 0, size;
 	struct rt_addrinfo info;
 	struct sockaddr_storage ss;
 
 	NET_EPOCH_ASSERT();
 
 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
 		return 0;
 	if ((rt->rt_flags & RTF_HOST) == 0
 	    ? jailed_without_vnet(w->w_req->td->td_ucred)
 	    : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
 		return (0);
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
 	    rt_mask(rt), &ss);
 	info.rti_info[RTAX_GENMASK] = 0;
 	if (rt->rt_ifp && !(rt->rt_ifp->if_flags & IFF_DYING)) {
 		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
 	}
 	if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
 		return (error);
 	if (w->w_req && w->w_tmem) {
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
 		bzero(&rtm->rtm_index,
 		    sizeof(*rtm) - offsetof(struct rt_msghdr, rtm_index));
 		if (rt->rt_flags & RTF_GWFLAG_COMPAT)
 			rtm->rtm_flags = RTF_GATEWAY | 
 				(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
 		else
 			rtm->rtm_flags = rt->rt_flags;
 		rt_getmetrics(rt, &rtm->rtm_rmx);
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_addrs = info.rti_addrs;
 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
 		return (error);
 	}
 	return (error);
 }
 
 static int
 sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd,
     struct rt_addrinfo *info, struct walkarg *w, int len)
 {
 	struct if_msghdrl *ifm;
 	struct if_data *ifd;
 
 	ifm = (struct if_msghdrl *)w->w_tmem;
 
 #ifdef COMPAT_FREEBSD32
 	if (w->w_req->flags & SCTL_MASK32) {
 		struct if_msghdrl32 *ifm32;
 
 		ifm32 = (struct if_msghdrl32 *)ifm;
 		ifm32->ifm_addrs = info->rti_addrs;
 		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifm32->ifm_index = ifp->if_index;
 		ifm32->_ifm_spare1 = 0;
 		ifm32->ifm_len = sizeof(*ifm32);
 		ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
 		ifm32->_ifm_spare2 = 0;
 		ifd = &ifm32->ifm_data;
 	} else
 #endif
 	{
 		ifm->ifm_addrs = info->rti_addrs;
 		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifm->ifm_index = ifp->if_index;
 		ifm->_ifm_spare1 = 0;
 		ifm->ifm_len = sizeof(*ifm);
 		ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
 		ifm->_ifm_spare2 = 0;
 		ifd = &ifm->ifm_data;
 	}
 
 	memcpy(ifd, src_ifd, sizeof(*ifd));
 
 	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
 }
 
 static int
 sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd,
     struct rt_addrinfo *info, struct walkarg *w, int len)
 {
 	struct if_msghdr *ifm;
 	struct if_data *ifd;
 
 	ifm = (struct if_msghdr *)w->w_tmem;
 
 #ifdef COMPAT_FREEBSD32
 	if (w->w_req->flags & SCTL_MASK32) {
 		struct if_msghdr32 *ifm32;
 
 		ifm32 = (struct if_msghdr32 *)ifm;
 		ifm32->ifm_addrs = info->rti_addrs;
 		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifm32->ifm_index = ifp->if_index;
 		ifm32->_ifm_spare1 = 0;
 		ifd = &ifm32->ifm_data;
 	} else
 #endif
 	{
 		ifm->ifm_addrs = info->rti_addrs;
 		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifm->ifm_index = ifp->if_index;
 		ifm->_ifm_spare1 = 0;
 		ifd = &ifm->ifm_data;
 	}
 
 	memcpy(ifd, src_ifd, sizeof(*ifd));
 
 	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
 }
 
 static int
 sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
     struct walkarg *w, int len)
 {
 	struct ifa_msghdrl *ifam;
 	struct if_data *ifd;
 
 	ifam = (struct ifa_msghdrl *)w->w_tmem;
 
 #ifdef COMPAT_FREEBSD32
 	if (w->w_req->flags & SCTL_MASK32) {
 		struct ifa_msghdrl32 *ifam32;
 
 		ifam32 = (struct ifa_msghdrl32 *)ifam;
 		ifam32->ifam_addrs = info->rti_addrs;
 		ifam32->ifam_flags = ifa->ifa_flags;
 		ifam32->ifam_index = ifa->ifa_ifp->if_index;
 		ifam32->_ifam_spare1 = 0;
 		ifam32->ifam_len = sizeof(*ifam32);
 		ifam32->ifam_data_off =
 		    offsetof(struct ifa_msghdrl32, ifam_data);
 		ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
 		ifd = &ifam32->ifam_data;
 	} else
 #endif
 	{
 		ifam->ifam_addrs = info->rti_addrs;
 		ifam->ifam_flags = ifa->ifa_flags;
 		ifam->ifam_index = ifa->ifa_ifp->if_index;
 		ifam->_ifam_spare1 = 0;
 		ifam->ifam_len = sizeof(*ifam);
 		ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
 		ifam->ifam_metric = ifa->ifa_ifp->if_metric;
 		ifd = &ifam->ifam_data;
 	}
 
 	bzero(ifd, sizeof(*ifd));
 	ifd->ifi_datalen = sizeof(struct if_data);
 	ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
 	ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
 	ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
 	ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
 
 	/* Fixup if_data carp(4) vhid. */
 	if (carp_get_vhid_p != NULL)
 		ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
 
 	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
 }
 
 static int
 sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
     struct walkarg *w, int len)
 {
 	struct ifa_msghdr *ifam;
 
 	ifam = (struct ifa_msghdr *)w->w_tmem;
 	ifam->ifam_addrs = info->rti_addrs;
 	ifam->ifam_flags = ifa->ifa_flags;
 	ifam->ifam_index = ifa->ifa_ifp->if_index;
 	ifam->_ifam_spare1 = 0;
 	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
 
 	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
 }
 
 static int
 sysctl_iflist(int af, struct walkarg *w)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct if_data ifd;
 	struct rt_addrinfo info;
 	int len, error = 0;
 	struct sockaddr_storage ss;
 	struct epoch_tracker et;
 
 	bzero((caddr_t)&info, sizeof(info));
 	bzero(&ifd, sizeof(ifd));
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
 		if_data_copy(ifp, &ifd);
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 		error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
 		if (error != 0)
 			goto done;
 		info.rti_info[RTAX_IFP] = NULL;
 		if (w->w_req && w->w_tmem) {
 			if (w->w_op == NET_RT_IFLISTL)
 				error = sysctl_iflist_ifml(ifp, &ifd, &info, w,
 				    len);
 			else
 				error = sysctl_iflist_ifm(ifp, &ifd, &info, w,
 				    len);
 			if (error)
 				goto done;
 		}
 		while ((ifa = CK_STAILQ_NEXT(ifa, ifa_link)) != NULL) {
 			if (af && af != ifa->ifa_addr->sa_family)
 				continue;
 			if (prison_if(w->w_req->td->td_ucred,
 			    ifa->ifa_addr) != 0)
 				continue;
 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
 			info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
 			    ifa->ifa_addr, ifa->ifa_netmask, &ss);
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
 			if (error != 0)
 				goto done;
 			if (w->w_req && w->w_tmem) {
 				if (w->w_op == NET_RT_IFLISTL)
 					error = sysctl_iflist_ifaml(ifa, &info,
 					    w, len);
 				else
 					error = sysctl_iflist_ifam(ifa, &info,
 					    w, len);
 				if (error)
 					goto done;
 			}
 		}
 		info.rti_info[RTAX_IFA] = NULL;
 		info.rti_info[RTAX_NETMASK] = NULL;
 		info.rti_info[RTAX_BRD] = NULL;
 	}
 done:
 	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
 static int
 sysctl_ifmalist(int af, struct walkarg *w)
 {
 	struct rt_addrinfo info;
 	struct epoch_tracker et;
 	struct ifaddr *ifa;
 	struct ifmultiaddr *ifma;
 	struct ifnet *ifp;
 	int error, len;
 
 	error = 0;
 	bzero((caddr_t)&info, sizeof(info));
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
 		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (af && af != ifma->ifma_addr->sa_family)
 				continue;
 			if (prison_if(w->w_req->td->td_ucred,
 			    ifma->ifma_addr) != 0)
 				continue;
 			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 			info.rti_info[RTAX_GATEWAY] =
 			    (ifma->ifma_addr->sa_family != AF_LINK) ?
 			    ifma->ifma_lladdr : NULL;
 			error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
 			if (error != 0)
 				break;
 			if (w->w_req && w->w_tmem) {
 				struct ifma_msghdr *ifmam;
 
 				ifmam = (struct ifma_msghdr *)w->w_tmem;
 				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
 				ifmam->ifmam_flags = 0;
 				ifmam->ifmam_addrs = info.rti_addrs;
 				ifmam->_ifmam_spare1 = 0;
 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
 				if (error != 0)
 					break;
 			}
 		}
 		if (error != 0)
 			break;
 	}
 	NET_EPOCH_EXIT(et);
 	return (error);
 }
 
 static int
 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 {
 	RIB_RLOCK_TRACKER;
 	int	*name = (int *)arg1;
 	u_int	namelen = arg2;
 	struct rib_head *rnh = NULL; /* silence compiler. */
 	int	i, lim, error = EINVAL;
 	int	fib = 0;
 	u_char	af;
 	struct	walkarg w;
 
 	name ++;
 	namelen--;
 	if (req->newptr)
 		return (EPERM);
 	if (name[1] == NET_RT_DUMP) {
 		if (namelen == 3)
 			fib = req->td->td_proc->p_fibnum;
 		else if (namelen == 4)
 			fib = (name[3] == RT_ALL_FIBS) ?
 			    req->td->td_proc->p_fibnum : name[3];
 		else
 			return ((namelen < 3) ? EISDIR : ENOTDIR);
 		if (fib < 0 || fib >= rt_numfibs)
 			return (EINVAL);
 	} else if (namelen != 3)
 		return ((namelen < 3) ? EISDIR : ENOTDIR);
 	af = name[0];
 	if (af > AF_MAX)
 		return (EINVAL);
 	bzero(&w, sizeof(w));
 	w.w_op = name[1];
 	w.w_arg = name[2];
 	w.w_req = req;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 	
 	/*
 	 * Allocate reply buffer in advance.
 	 * All rtsock messages has maximum length of u_short.
 	 */
 	w.w_tmemsize = 65536;
 	w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
 
 	switch (w.w_op) {
 
 	case NET_RT_DUMP:
 	case NET_RT_FLAGS:
 		if (af == 0) {			/* dump all tables */
 			i = 1;
 			lim = AF_MAX;
 		} else				/* dump only one table */
 			i = lim = af;
 
 		/*
 		 * take care of llinfo entries, the caller must
 		 * specify an AF
 		 */
 		if (w.w_op == NET_RT_FLAGS &&
 		    (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
 			if (af != 0)
 				error = lltable_sysctl_dumparp(af, w.w_req);
 			else
 				error = EINVAL;
 			break;
 		}
 		/*
 		 * take care of routing entries
 		 */
 		for (error = 0; error == 0 && i <= lim; i++) {
 			rnh = rt_tables_get_rnh(fib, i);
 			if (rnh != NULL) {
 				struct epoch_tracker et;
 
 				RIB_RLOCK(rnh); 
 				NET_EPOCH_ENTER(et);
 			    	error = rnh->rnh_walktree(&rnh->head,
 				    sysctl_dumpentry, &w);
 				NET_EPOCH_EXIT(et);
 				RIB_RUNLOCK(rnh);
 			} else if (af != 0)
 				error = EAFNOSUPPORT;
 		}
 		break;
 
 	case NET_RT_IFLIST:
 	case NET_RT_IFLISTL:
 		error = sysctl_iflist(af, &w);
 		break;
 
 	case NET_RT_IFMALIST:
 		error = sysctl_ifmalist(af, &w);
 		break;
 	}
 
 	free(w.w_tmem, M_TEMP);
 	return (error);
 }
 
 static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
 
 /*
  * Definitions of protocols supported in the ROUTE domain.
  */
 
 static struct domain routedomain;		/* or at least forward */
 
 static struct protosw routesw[] = {
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&routedomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_output =		route_output,
 	.pr_ctlinput =		raw_ctlinput,
 	.pr_init =		raw_init,
 	.pr_usrreqs =		&route_usrreqs
 }
 };
 
 static struct domain routedomain = {
 	.dom_family =		PF_ROUTE,
 	.dom_name =		 "route",
 	.dom_protosw =		routesw,
 	.dom_protoswNPROTOSW =	&routesw[nitems(routesw)]
 };
 
 VNET_DOMAIN_SET(route);
+
+#ifdef DDB
+/*
+ * Unfortunately, RTF_ values are expressed as raw masks rather than powers of
+ * 2, so we cannot use them as nice C99 initializer indices below.
+ */
+static const char * const rtf_flag_strings[] = {
+	"UP",
+	"GATEWAY",
+	"HOST",
+	"REJECT",
+	"DYNAMIC",
+	"MODIFIED",
+	"DONE",
+	"UNUSED_0x80",
+	"UNUSED_0x100",
+	"XRESOLVE",
+	"LLDATA",
+	"STATIC",
+	"BLACKHOLE",
+	"UNUSED_0x2000",
+	"PROTO2",
+	"PROTO1",
+	"UNUSED_0x10000",
+	"UNUSED_0x20000",
+	"PROTO3",
+	"FIXEDMTU",
+	"PINNED",
+	"LOCAL",
+	"BROADCAST",
+	"MULTICAST",
+	/* Big gap. */
+	[28] = "STICKY",
+	[30] = "RNH_LOCKED",
+	[31] = "GWFLAG_COMPAT",
+};
+
+static const char * __pure
+rt_flag_name(unsigned idx)
+{
+	if (idx >= nitems(rtf_flag_strings))
+		return ("INVALID_FLAG");
+	if (rtf_flag_strings[idx] == NULL)
+		return ("UNKNOWN");
+	return (rtf_flag_strings[idx]);
+}
+
+static void
+rt_dumpaddr_ddb(const char *name, const struct sockaddr *sa)
+{
+	char buf[INET6_ADDRSTRLEN], *res;
+
+	res = NULL;
+	if (sa == NULL)
+		res = "NULL";
+	else if (sa->sa_family == AF_INET) {
+		res = inet_ntop(AF_INET,
+		    &((const struct sockaddr_in *)sa)->sin_addr,
+		    buf, sizeof(buf));
+	} else if (sa->sa_family == AF_INET6) {
+		res = inet_ntop(AF_INET6,
+		    &((const struct sockaddr_in6 *)sa)->sin6_addr,
+		    buf, sizeof(buf));
+	} else if (sa->sa_family == AF_LINK) {
+		res = "on link";
+	}
+
+	if (res != NULL) {
+		db_printf("%s <%s> ", name, res);
+		return;
+	}
+
+	db_printf("%s <af:%d> ", name, sa->sa_family);
+}
+
+static int
+rt_dumpentry_ddb(struct radix_node *rn, void *arg __unused)
+{
+	struct sockaddr_storage ss;
+	struct rtentry *rt;
+	int flags, idx;
+
+	/* If RNTORT is important, put it in a header. */
+	rt = (void *)rn;
+
+	rt_dumpaddr_ddb("dst", rt_key(rt));
+	rt_dumpaddr_ddb("gateway", rt->rt_gateway);
+	rt_dumpaddr_ddb("netmask", rtsock_fix_netmask(rt_key(rt), rt_mask(rt),
+	    &ss));
+	if (rt->rt_ifp != NULL && (rt->rt_ifp->if_flags & IFF_DYING) == 0) {
+		rt_dumpaddr_ddb("ifp", rt->rt_ifp->if_addr->ifa_addr);
+		rt_dumpaddr_ddb("ifa", rt->rt_ifa->ifa_addr);
+	}
+
+	db_printf("flags ");
+	flags = rt->rt_flags;
+	if (flags == 0)
+		db_printf("none");
+
+	while ((idx = ffs(flags)) > 0) {
+		idx--;
+
+		if (flags != rt->rt_flags)
+			db_printf(",");
+		db_printf(rt_flag_name(idx));
+
+		flags &= ~(1ul << idx);
+	}
+
+	db_printf("\n");
+	return (0);
+}
+
+DB_SHOW_COMMAND(routetable, db_show_routetable_cmd)
+{
+	struct rib_head *rnh;
+	int error, i, lim;
+
+	if (have_addr)
+		i = lim = addr;
+	else {
+		i = 1;
+		lim = AF_MAX;
+	}
+
+	for (; i <= lim; i++) {
+		rnh = rt_tables_get_rnh(0, i);
+		if (rnh == NULL) {
+			if (have_addr) {
+				db_printf("%s: AF %d not supported?\n",
+				    __func__, i);
+				break;
+			}
+			continue;
+		}
+
+		if (!have_addr && i > 1)
+			db_printf("\n");
+
+		db_printf("Route table for AF %d%s%s%s:\n", i,
+		    (i == AF_INET || i == AF_INET6) ? " (" : "",
+		    (i == AF_INET) ? "INET" : (i == AF_INET6) ? "INET6" : "",
+		    (i == AF_INET || i == AF_INET6) ? ")" : "");
+
+		error = rnh->rnh_walktree(&rnh->head, rt_dumpentry_ddb, NULL);
+		if (error != 0)
+			db_printf("%s: walktree(%d): %d\n", __func__, i,
+			    error);
+	}
+}
+
+_DB_FUNC(_show, route, db_show_route_cmd, db_show_table, CS_OWN, NULL)
+{
+	char buf[INET6_ADDRSTRLEN], *bp;
+	const void *dst_addrp;
+	struct sockaddr *dstp;
+	struct rtentry *rt;
+	union {
+		struct sockaddr_in dest_sin;
+		struct sockaddr_in6 dest_sin6;
+	} u;
+	uint16_t hextets[8];
+	unsigned i, tets;
+	int t, af, exp, tokflags;
+
+	/*
+	 * Undecoded address family.  No double-colon expansion seen yet.
+	 */
+	af = -1;
+	exp = -1;
+	/* Assume INET6 to start; we can work back if guess was wrong. */
+	tokflags = DRT_WSPACE | DRT_HEX | DRT_HEXADECIMAL;
+
+	/*
+	 * db_command has lexed 'show route' for us.
+	 */
+	t = db_read_token_flags(tokflags);
+	if (t == tWSPACE)
+		t = db_read_token_flags(tokflags);
+
+	/*
+	 * tEOL: Just 'show route' isn't a valid mode.
+	 * tMINUS: It's either '-h' or some invalid option.  Regardless, usage.
+	 */
+	if (t == tEOL || t == tMINUS)
+		goto usage;
+
+	db_unread_token(t);
+
+	tets = nitems(hextets);
+
+	/*
+	 * Each loop iteration, we expect to read one octet (v4) or hextet
+	 * (v6), followed by an appropriate field separator ('.' or ':' or
+	 * '::').
+	 *
+	 * At the start of each loop, we're looking for a number (octet or
+	 * hextet).
+	 *
+	 * INET6 addresses have a special case where they may begin with '::'.
+	 */
+	for (i = 0; i < tets; i++) {
+		t = db_read_token_flags(tokflags);
+
+		if (t == tCOLONCOLON) {
+			/* INET6 with leading '::' or invalid. */
+			if (i != 0) {
+				db_printf("Parse error: unexpected extra "
+				    "colons.\n");
+				goto exit;
+			}
+
+			af = AF_INET6;
+			exp = i;
+			hextets[i] = 0;
+			continue;
+		} else if (t == tNUMBER) {
+			/*
+			 * Lexer separates out '-' as tMINUS, but make the
+			 * assumption explicit here.
+			 */
+			MPASS(db_tok_number >= 0);
+
+			if (af == AF_INET && db_tok_number > UINT8_MAX) {
+				db_printf("Not a valid v4 octet: %ld\n",
+				    db_tok_number);
+				goto exit;
+			}
+			hextets[i] = db_tok_number;
+		} else if (t == tEOL) {
+			/*
+			 * We can only detect the end of an IPv6 address in
+			 * compact representation with EOL.
+			 */
+			if (af != AF_INET6 || exp < 0) {
+				db_printf("Parse failed.  Got unexpected EOF "
+				    "when the address is not a compact-"
+				    "representation IPv6 address.\n");
+				goto exit;
+			}
+			break;
+		} else {
+			db_printf("Parse failed.  Unexpected token %d.\n", t);
+			goto exit;
+		}
+
+		/* Next, look for a separator, if appropriate. */
+		if (i == tets - 1)
+			continue;
+
+		t = db_read_token_flags(tokflags);
+		if (af < 0) {
+			if (t == tCOLON) {
+				af = AF_INET6;
+				continue;
+			}
+			if (t == tCOLONCOLON) {
+				af = AF_INET6;
+				i++;
+				hextets[i] = 0;
+				exp = i;
+				continue;
+			}
+			if (t == tDOT) {
+				unsigned hn, dn;
+
+				af = AF_INET;
+				/* Need to fixup the first parsed number. */
+				if (hextets[0] > 0x255 ||
+				    (hextets[0] & 0xf0) > 0x90 ||
+				    (hextets[0] & 0xf) > 9) {
+					db_printf("Not a valid v4 octet: %x\n",
+					    hextets[0]);
+					goto exit;
+				}
+
+				hn = hextets[0];
+				dn = (hn >> 8) * 100 +
+				    ((hn >> 4) & 0xf) * 10 +
+				    (hn & 0xf);
+
+				hextets[0] = dn;
+
+				/* Switch to decimal for remaining octets. */
+				tokflags &= ~DRT_RADIX_MASK;
+				tokflags |= DRT_DECIMAL;
+
+				tets = 4;
+				continue;
+			}
+
+			db_printf("Parse error.  Unexpected token %d.\n", t);
+			goto exit;
+		} else if (af == AF_INET) {
+			if (t == tDOT)
+				continue;
+			db_printf("Expected '.' (%d) between octets but got "
+			    "(%d).\n", tDOT, t);
+			goto exit;
+
+		} else if (af == AF_INET6) {
+			if (t == tCOLON)
+				continue;
+			if (t == tCOLONCOLON) {
+				if (exp < 0) {
+					i++;
+					hextets[i] = 0;
+					exp = i;
+					continue;
+				}
+				db_printf("Got bogus second '::' in v6 "
+				    "address.\n");
+				goto exit;
+			}
+			if (t == tEOL) {
+				/*
+				 * Handle in the earlier part of the loop
+				 * because we need to handle trailing :: too.
+				 */
+				db_unread_token(t);
+				continue;
+			}
+
+			db_printf("Expected ':' (%d) or '::' (%d) between "
+			    "hextets but got (%d).\n", tCOLON, tCOLONCOLON, t);
+			goto exit;
+		}
+	}
+
+	/* Check for trailing garbage. */
+	if (i == tets) {
+		t = db_read_token_flags(tokflags);
+		if (t != tEOL) {
+			db_printf("Got unexpected garbage after address "
+			    "(%d).\n", t);
+			goto exit;
+		}
+	}
+
+	/*
+	 * Need to expand compact INET6 addresses.
+	 *
+	 * Technically '::' for a single ':0:' is MUST NOT but just in case,
+	 * don't bother expanding that form (exp >= 0 && i == tets case).
+	 */
+	if (af == AF_INET6 && exp >= 0 && i < tets) {
+		if (exp + 1 < i) {
+			memmove(&hextets[exp + 1 + (nitems(hextets) - i)],
+			    &hextets[exp + 1],
+			    (i - (exp + 1)) * sizeof(hextets[0]));
+		}
+		memset(&hextets[exp + 1], 0, (nitems(hextets) - i) *
+		    sizeof(hextets[0]));
+	}
+
+	memset(&u, 0, sizeof(u));
+	if (af == AF_INET) {
+		u.dest_sin.sin_family = AF_INET;
+		u.dest_sin.sin_len = sizeof(u.dest_sin);
+		u.dest_sin.sin_addr.s_addr = htonl(
+		    ((uint32_t)hextets[0] << 24) |
+		    ((uint32_t)hextets[1] << 16) |
+		    ((uint32_t)hextets[2] << 8) |
+		    (uint32_t)hextets[3]);
+		dstp = (void *)&u.dest_sin;
+		dst_addrp = &u.dest_sin.sin_addr;
+	} else if (af == AF_INET6) {
+		u.dest_sin6.sin6_family = AF_INET6;
+		u.dest_sin6.sin6_len = sizeof(u.dest_sin6);
+		for (i = 0; i < nitems(hextets); i++)
+			u.dest_sin6.sin6_addr.s6_addr16[i] = htons(hextets[i]);
+		dstp = (void *)&u.dest_sin6;
+		dst_addrp = &u.dest_sin6.sin6_addr;
+	} else
+		MPASS(false);
+
+	bp = inet_ntop(af, dst_addrp, buf, sizeof(buf));
+	if (bp != NULL)
+		db_printf("Looking up route to destination '%s'\n", bp);
+
+	CURVNET_SET(vnet0);
+	rt = rtalloc1(dstp, 0, RTF_RNH_LOCKED);
+	CURVNET_RESTORE();
+
+	if (rt == NULL) {
+		db_printf("Could not get route for that server.\n");
+		return;
+	}
+
+	rt_dumpentry_ddb((void *)rt, NULL);
+	RTFREE_LOCKED(rt);
+
+	return;
+usage:
+	db_printf("Usage: 'show route <address>'\n"
+	    "  Currently accepts only dotted-decimal INET or colon-separated\n"
+	    "  hextet INET6 addresses.\n");
+exit:
+	db_skip_to_eol();
+}
+#endif