Index: head/sbin/ipfw/ipfw.8
===================================================================
--- head/sbin/ipfw/ipfw.8	(revision 178887)
+++ head/sbin/ipfw/ipfw.8	(revision 178888)
@@ -1,2738 +1,2750 @@
 .\"
 .\" $FreeBSD$
 .\"
 .Dd November 26, 2007
 .Dt IPFW 8
 .Os
 .Sh NAME
 .Nm ipfw
 .Nd IP firewall and traffic shaper control program
 .Sh SYNOPSIS
 .Nm
 .Op Fl cq
 .Cm add
 .Ar rule
 .Nm
 .Op Fl acdefnNStT
 .Op Cm set Ar N
 .Brq Cm list | show
 .Op Ar rule | first-last ...
 .Nm
 .Op Fl f | q
 .Op Cm set Ar N
 .Cm flush
 .Nm
 .Op Fl q
 .Op Cm set Ar N
 .Brq Cm delete | zero | resetlog
 .Op Ar number ...
 .Nm
 .Cm enable
 .Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive
 .Nm
 .Cm disable
 .Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive
 .Pp
 .Nm
 .Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ...
 .Nm
 .Cm set move
 .Op Cm rule
 .Ar number Cm to Ar number
 .Nm
 .Cm set swap Ar number number
 .Nm
 .Cm set show
 .Pp
 .Nm
 .Cm table Ar number Cm add Ar addr Ns Oo / Ns Ar masklen Oc Op Ar value
 .Nm
 .Cm table Ar number Cm delete Ar addr Ns Op / Ns Ar masklen
 .Nm
 .Cm table Ar number Cm flush
 .Nm
 .Cm table Ar number Cm list
 .Pp
 .Nm
 .Brq Cm pipe | queue
 .Ar number
 .Cm config
 .Ar config-options
 .Nm
 .Op Fl s Op Ar field
 .Brq Cm pipe | queue
 .Brq Cm delete | list | show
 .Op Ar number ...
 .Pp
 .Nm
 .Cm nat
 .Ar number
 .Cm config
 .Ar config-options
 .Pp
 .Nm
 .Op Fl cfnNqS
 .Oo
 .Fl p Ar preproc
 .Oo
 .Ar preproc-flags
 .Oc
 .Oc
 .Ar pathname
 .Sh DESCRIPTION
 The
 .Nm
 utility is the user interface for controlling the
 .Xr ipfw 4
 firewall and the
 .Xr dummynet 4
 traffic shaper in
 .Fx .
 .Pp
 An
 .Nm
 configuration, or
 .Em ruleset ,
 is made of a list of
 .Em rules
 numbered from 1 to 65535.
 Packets are passed to
 .Nm
 from a number of different places in the protocol stack
 (depending on the source and destination of the packet,
 it is possible that
 .Nm
 is invoked multiple times on the same packet).
 The packet passed to the firewall is compared
 against each of the rules in the firewall
 .Em ruleset .
 When a match is found, the action corresponding to the
 matching rule is performed.
 .Pp
 Depending on the action and certain system settings, packets
 can be reinjected into the firewall at some rule after the
 matching one for further processing.
 .Pp
 An
 .Nm
 ruleset always includes a
 .Em default
 rule (numbered 65535) which cannot be modified or deleted,
 and matches all packets.
 The action associated with the
 .Em default
 rule can be either
 .Cm deny
 or
 .Cm allow
 depending on how the kernel is configured.
 .Pp
 If the ruleset includes one or more rules with the
 .Cm keep-state
 or
 .Cm limit
 option, then
 .Nm
 assumes a
 .Em stateful
 behaviour, i.e., upon a match it will create dynamic rules matching
 the exact parameters (addresses and ports) of the matching packet.
 .Pp
 These dynamic rules, which have a limited lifetime, are checked
 at the first occurrence of a
 .Cm check-state ,
 .Cm keep-state
 or
 .Cm limit
 rule, and are typically used to open the firewall on-demand to
 legitimate traffic only.
 See the
 .Sx STATEFUL FIREWALL
 and
 .Sx EXAMPLES
 Sections below for more information on the stateful behaviour of
 .Nm .
 .Pp
 All rules (including dynamic ones) have a few associated counters:
 a packet count, a byte count, a log count and a timestamp
 indicating the time of the last match.
 Counters can be displayed or reset with
 .Nm
 commands.
 .Pp
 Rules can be added with the
 .Cm add
 command; deleted individually or in groups with the
 .Cm delete
 command, and globally (except those in set 31) with the
 .Cm flush
 command; displayed, optionally with the content of the
 counters, using the
 .Cm show
 and
 .Cm list
 commands.
 Finally, counters can be reset with the
 .Cm zero
 and
 .Cm resetlog
 commands.
 .Pp
 Also, each rule belongs to one of 32 different
 .Em sets
 , and there are
 .Nm
 commands to atomically manipulate sets, such as enable,
 disable, swap sets, move all rules in a set to another
 one, delete all rules in a set.
 These can be useful to
 install temporary configurations, or to test them.
 See Section
 .Sx SETS OF RULES
 for more information on
 .Em sets .
 .Pp
 The following options are available:
 .Bl -tag -width indent
 .It Fl a
 While listing, show counter values.
 The
 .Cm show
 command just implies this option.
 .It Fl b
 Only show the action and the comment, not the body of a rule.
 Implies
 .Fl c .
 .It Fl c
 When entering or showing rules, print them in compact form,
 i.e., without the optional "ip from any to any" string
 when this does not carry any additional information.
 .It Fl d
 While listing, show dynamic rules in addition to static ones.
 .It Fl e
 While listing, if the
 .Fl d
 option was specified, also show expired dynamic rules.
 .It Fl f
 Do not ask for confirmation for commands that can cause problems
 if misused,
 .No i.e. Cm flush .
 If there is no tty associated with the process, this is implied.
 .It Fl i
 While listing a table (see the
 .Sx LOOKUP TABLES
 section below for more information on lookup tables), format values
 as IP addresses. By default, values are shown as integers.
 .It Fl n
 Only check syntax of the command strings, without actually passing
 them to the kernel.
 .It Fl N
 Try to resolve addresses and service names in output.
 .It Fl q
 While
 .Cm add Ns ing ,
 .Cm zero Ns ing ,
 .Cm resetlog Ns ging
 or
 .Cm flush Ns ing ,
 be quiet about actions
 (implies
 .Fl f ) .
 This is useful for adjusting rules by executing multiple
 .Nm
 commands in a script
 (e.g.,
 .Ql sh\ /etc/rc.firewall ) ,
 or by processing a file of many
 .Nm
 rules across a remote login session.
 It also stops a table add or delete
 from failing if the entry already exists or is not present.
 If a
 .Cm flush
 is performed in normal (verbose) mode (with the default kernel
 configuration), it prints a message.
 Because all rules are flushed, the message might not be delivered
 to the login session, causing the remote login session to be closed
 and the remainder of the ruleset to not be processed.
 Access to the console would then be required to recover.
 .It Fl S
 While listing rules, show the
 .Em set
 each rule belongs to.
 If this flag is not specified, disabled rules will not be
 listed.
 .It Fl s Op Ar field
 While listing pipes, sort according to one of the four
 counters (total or current packets or bytes).
 .It Fl t
 While listing, show last match timestamp (converted with ctime()).
 .It Fl T
 While listing, show last match timestamp (as seconds from the epoch).
 This form can be more convenient for postprocessing by scripts.
 .El
 .Pp
 To ease configuration, rules can be put into a file which is
 processed using
 .Nm
 as shown in the last synopsis line.
 An absolute
 .Ar pathname
 must be used.
 The file will be read line by line and applied as arguments to the
 .Nm
 utility.
 .Pp
 Optionally, a preprocessor can be specified using
 .Fl p Ar preproc
 where
 .Ar pathname
 is to be piped through.
 Useful preprocessors include
 .Xr cpp 1
 and
 .Xr m4 1 .
 If
 .Ar preproc
 does not start with a slash
 .Pq Ql /
 as its first character, the usual
 .Ev PATH
 name search is performed.
 Care should be taken with this in environments where not all
 file systems are mounted (yet) by the time
 .Nm
 is being run (e.g.\& when they are mounted over NFS).
 Once
 .Fl p
 has been specified, any additional arguments as passed on to the preprocessor
 for interpretation.
 This allows for flexible configuration files (like conditionalizing
 them on the local hostname) and the use of macros to centralize
 frequently required arguments like IP addresses.
 .Pp
 The
 .Nm
 .Cm pipe
 and
 .Cm queue
 commands are used to configure the traffic shaper, as shown in the
 .Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 Section below.
 .Pp
 If the world and the kernel get out of sync the
 .Nm
 ABI may break, preventing you from being able to add any rules.
 This can
 adversely effect the booting process.
 You can use
 .Nm
 .Cm disable
 .Cm firewall
 to temporarily disable the firewall to regain access to the network,
 allowing you to fix the problem.
 .Sh PACKET FLOW
 A packet is checked against the active ruleset in multiple places
 in the protocol stack, under control of several sysctl variables.
 These places and variables are shown below, and it is important to
 have this picture in mind in order to design a correct ruleset.
 .Bd -literal -offset indent
        ^    to upper layers    V
        |                       |
        +----------->-----------+
        ^                       V
  [ip(6)_input]           [ip(6)_output]     net.inet(6).ip(6).fw.enable=1
        |                       |
        ^                       V
  [ether_demux]        [ether_output_frame]  net.link.ether.ipfw=1
        |                       |
        +-->--[bdg_forward]-->--+            net.link.bridge.ipfw=1
        ^                       V
        |      to devices       |
 .Ed
 .Pp
 As can be noted from the above picture, the number of
 times the same packet goes through the firewall can
 vary between 0 and 4 depending on packet source and
 destination, and system configuration.
 .Pp
 Note that as packets flow through the stack, headers can be
 stripped or added to it, and so they may or may not be available
 for inspection.
 E.g., incoming packets will include the MAC header when
 .Nm
 is invoked from
 .Cm ether_demux() ,
 but the same packets will have the MAC header stripped off when
 .Nm
 is invoked from
 .Cm ip_input()
 or
 .Cm ip6_input() .
 .Pp
 Also note that each packet is always checked against the complete ruleset,
 irrespective of the place where the check occurs, or the source of the packet.
 If a rule contains some match patterns or actions which are not valid
 for the place of invocation (e.g.\& trying to match a MAC header within
 .Cm ip_input
 or
 .Cm ip6_input ),
 the match pattern will not match, but a
 .Cm not
 operator in front of such patterns
 .Em will
 cause the pattern to
 .Em always
 match on those packets.
 It is thus the responsibility of
 the programmer, if necessary, to write a suitable ruleset to
 differentiate among the possible places.
 .Cm skipto
 rules can be useful here, as an example:
 .Bd -literal -offset indent
 # packets from ether_demux or bdg_forward
 ipfw add 10 skipto 1000 all from any to any layer2 in
 # packets from ip_input
 ipfw add 10 skipto 2000 all from any to any not layer2 in
 # packets from ip_output
 ipfw add 10 skipto 3000 all from any to any not layer2 out
 # packets from ether_output_frame
 ipfw add 10 skipto 4000 all from any to any layer2 out
 .Ed
 .Pp
 (yes, at the moment there is no way to differentiate between
 ether_demux and bdg_forward).
 .Sh SYNTAX
 In general, each keyword or argument must be provided as
 a separate command line argument, with no leading or trailing
 spaces.
 Keywords are case-sensitive, whereas arguments may
 or may not be case-sensitive depending on their nature
 (e.g.\& uid's are, hostnames are not).
 .Pp
 In
 .Nm ipfw2
 you can introduce spaces after commas ',' to make
 the line more readable.
 You can also put the entire
 command (including flags) into a single argument.
 E.g., the following forms are equivalent:
 .Bd -literal -offset indent
 ipfw -q add deny src-ip 10.0.0.0/24,127.0.0.1/8
 ipfw -q add deny src-ip 10.0.0.0/24, 127.0.0.1/8
 ipfw "-q add deny src-ip 10.0.0.0/24, 127.0.0.1/8"
 .Ed
 .Sh RULE FORMAT
 The format of
 .Nm
 rules is the following:
 .Bd -ragged -offset indent
 .Bk -words
 .Op Ar rule_number
 .Op Cm set Ar set_number
 .Op Cm prob Ar match_probability
 .Ar action
 .Op Cm log Op Cm logamount Ar number
 .Op Cm altq Ar queue
 .Oo
 .Bro Cm tag | untag
 .Brc Ar number
 .Oc
 .Ar body
 .Ek
 .Ed
 .Pp
 where the body of the rule specifies which information is used
 for filtering packets, among the following:
 .Pp
 .Bl -tag -width "Source and dest. addresses and ports" -offset XXX -compact
 .It Layer-2 header fields
 When available
 .It IPv4 and IPv6 Protocol
 TCP, UDP, ICMP, etc.
 .It Source and dest. addresses and ports
 .It Direction
 See Section
 .Sx PACKET FLOW
 .It Transmit and receive interface
 By name or address
 .It Misc. IP header fields
 Version, type of service, datagram length, identification,
 fragment flag (non-zero IP offset),
 Time To Live
 .It IP options
 .It IPv6 Extension headers
 Fragmentation, Hop-by-Hop options,
 Routing Headers, Source routing rthdr0, Mobile IPv6 rthdr2, IPSec options.
 .It IPv6 Flow-ID
 .It Misc. TCP header fields
 TCP flags (SYN, FIN, ACK, RST, etc.),
 sequence number, acknowledgment number,
 window
 .It TCP options
 .It ICMP types
 for ICMP packets
 .It ICMP6 types
 for ICMP6 packets
 .It User/group ID
 When the packet can be associated with a local socket.
 .It Divert status
 Whether a packet came from a divert socket (e.g.,
 .Xr natd 8 ) .
+.It Fib annotation state
+Whether a packet has been tagged for using a specific FIB (routing table)
+in future forwarding decisions.
 .El
 .Pp
 Note that some of the above information, e.g.\& source MAC or IP addresses and
 TCP/UDP ports, could easily be spoofed, so filtering on those fields
 alone might not guarantee the desired results.
 .Bl -tag -width indent
 .It Ar rule_number
 Each rule is associated with a
 .Ar rule_number
 in the range 1..65535, with the latter reserved for the
 .Em default
 rule.
 Rules are checked sequentially by rule number.
 Multiple rules can have the same number, in which case they are
 checked (and listed) according to the order in which they have
 been added.
 If a rule is entered without specifying a number, the kernel will
 assign one in such a way that the rule becomes the last one
 before the
 .Em default
 rule.
 Automatic rule numbers are assigned by incrementing the last
 non-default rule number by the value of the sysctl variable
 .Ar net.inet.ip.fw.autoinc_step
 which defaults to 100.
 If this is not possible (e.g.\& because we would go beyond the
 maximum allowed rule number), the number of the last
 non-default value is used instead.
 .It Cm set Ar set_number
 Each rule is associated with a
 .Ar set_number
 in the range 0..31.
 Sets can be individually disabled and enabled, so this parameter
 is of fundamental importance for atomic ruleset manipulation.
 It can be also used to simplify deletion of groups of rules.
 If a rule is entered without specifying a set number,
 set 0 will be used.
 .br
 Set 31 is special in that it cannot be disabled,
 and rules in set 31 are not deleted by the
 .Nm ipfw flush
 command (but you can delete them with the
 .Nm ipfw delete set 31
 command).
 Set 31 is also used for the
 .Em default
 rule.
 .It Cm prob Ar match_probability
 A match is only declared with the specified probability
 (floating point number between 0 and 1).
 This can be useful for a number of applications such as
 random packet drop or
 (in conjunction with
 .Nm dummynet )
 to simulate the effect of multiple paths leading to out-of-order
 packet delivery.
 .Pp
 Note: this condition is checked before any other condition, including
 ones such as keep-state or check-state which might have side effects.
 .It Cm log Op Cm logamount Ar number
 When a packet matches a rule with the
 .Cm log
 keyword, a message will be
 logged to
 .Xr syslogd 8
 with a
 .Dv LOG_SECURITY
 facility.
 The logging only occurs if the sysctl variable
 .Va net.inet.ip.fw.verbose
 is set to 1
 (which is the default when the kernel is compiled with
 .Dv IPFIREWALL_VERBOSE )
 and the number of packets logged so far for that
 particular rule does not exceed the
 .Cm logamount
 parameter.
 If no
 .Cm logamount
 is specified, the limit is taken from the sysctl variable
 .Va net.inet.ip.fw.verbose_limit .
 In both cases, a value of 0 removes the logging limit.
 .Pp
 Once the limit is reached, logging can be re-enabled by
 clearing the logging counter or the packet counter for that entry, see the
 .Cm resetlog
 command.
 .Pp
 Note: logging is done after all other packet matching conditions
 have been successfully verified, and before performing the final
 action (accept, deny, etc.) on the packet.
 .It Cm tag Ar number
 When a packet matches a rule with the
 .Cm tag
 keyword, the numeric tag for the given
 .Ar number
 in the range 1..65534 will be attached to the packet.
 The tag acts as an internal marker (it is not sent out over
 the wire) that can be used to identify these packets later on.
 This can be used, for example, to provide trust between interfaces
 and to start doing policy-based filtering.
 A packet can have mutiple tags at the same time.
 Tags are "sticky", meaning once a tag is applied to a packet by a
 matching rule it exists until explicit removal.
 Tags are kept with the packet everywhere within the kernel, but are
 lost when packet leaves the kernel, for example, on transmitting
 packet out to the network or sending packet to a
 .Xr divert 4
 socket.
 .Pp
 To check for previously applied tags, use the
 .Cm tagged
 rule option.
 To delete previously applied tag, use the
 .Cm untag
 keyword.
 .Pp
 Note: since tags are kept with the packet everywhere in kernelspace,
 they can be set and unset anywhere in kernel network subsystem
 (using
 .Xr mbuf_tags 9
 facility), not only by means of
 .Xr ipfw 4
 .Cm tag
 and
 .Cm untag
 keywords.
 For example, there can be a specialized
 .Xr netgraph 4
 node doing traffic analyzing and tagging for later inspecting
 in firewall.
 .It Cm untag Ar number
 When a packet matches a rule with the
 .Cm untag
 keyword, the tag with the number
 .Ar number
 is searched among the tags attached to this packet and,
 if found, removed from it.
 Other tags bound to packet, if present, are left untouched.
 .It Cm altq Ar queue
 When a packet matches a rule with the
 .Cm altq
 keyword, the ALTQ identifier for the given
 .Ar queue
 (see
 .Xr altq 4 )
 will be attached.
 Note that this ALTQ tag is only meaningful for packets going "out" of IPFW,
 and not being rejected or going to divert sockets.
 Note that if there is insufficient memory at the time the packet is
 processed, it will not be tagged, so it is wise to make your ALTQ
 "default" queue policy account for this.
 If multiple
 .Cm altq
 rules match a single packet, only the first one adds the ALTQ classification
 tag.
 In doing so, traffic may be shaped by using
 .Cm count Cm altq Ar queue
 rules for classification early in the ruleset, then later applying
 the filtering decision.
 For example,
 .Cm check-state
 and
 .Cm keep-state
 rules may come later and provide the actual filtering decisions in
 addition to the fallback ALTQ tag.
 .Pp
 You must run
 .Xr pfctl 8
 to set up the queues before IPFW will be able to look them up by name,
 and if the ALTQ disciplines are rearranged, the rules in containing the
 queue identifiers in the kernel will likely have gone stale and need
 to be reloaded.
 Stale queue identifiers will probably result in misclassification.
 .Pp
 All system ALTQ processing can be turned on or off via
 .Nm
 .Cm enable Ar altq
 and
 .Nm
 .Cm disable Ar altq .
 The usage of
 .Va net.inet.ip.fw.one_pass
 is irrelevant to ALTQ traffic shaping, as the actual rule action is followed
 always after adding an ALTQ tag.
 .El
 .Ss RULE ACTIONS
 A rule can be associated with one of the following actions, which
 will be executed when the packet matches the body of the rule.
 .Bl -tag -width indent
 .It Cm allow | accept | pass | permit
 Allow packets that match rule.
 The search terminates.
 .It Cm check-state
 Checks the packet against the dynamic ruleset.
 If a match is found, execute the action associated with
 the rule which generated this dynamic rule, otherwise
 move to the next rule.
 .br
 .Cm Check-state
 rules do not have a body.
 If no
 .Cm check-state
 rule is found, the dynamic ruleset is checked at the first
 .Cm keep-state
 or
 .Cm limit
 rule.
 .It Cm count
 Update counters for all packets that match rule.
 The search continues with the next rule.
 .It Cm deny | drop
 Discard packets that match this rule.
 The search terminates.
 .It Cm divert Ar port
 Divert packets that match this rule to the
 .Xr divert 4
 socket bound to port
 .Ar port .
 The search terminates.
 .It Cm fwd | forward Ar ipaddr | tablearg Ns Op , Ns Ar port
 Change the next-hop on matching packets to
 .Ar ipaddr ,
 which can be an IP address or a host name.
 The next hop can also be supplied by the last table
 looked up for the packet by using the
 .Cm tablearg
 keyword instead of an explicit address.
 The search terminates if this rule matches.
 .Pp
 If
 .Ar ipaddr
 is a local address, then matching packets will be forwarded to
 .Ar port
 (or the port number in the packet if one is not specified in the rule)
 on the local machine.
 .br
 If
 .Ar ipaddr
 is not a local address, then the port number
 (if specified) is ignored, and the packet will be
 forwarded to the remote address, using the route as found in
 the local routing table for that IP.
 .br
 A
 .Ar fwd
 rule will not match layer-2 packets (those received
 on ether_input, ether_output, or bridged).
 .br
 The
 .Cm fwd
 action does not change the contents of the packet at all.
 In particular, the destination address remains unmodified, so
 packets forwarded to another system will usually be rejected by that system
 unless there is a matching rule on that system to capture them.
 For packets forwarded locally,
 the local address of the socket will be
 set to the original destination address of the packet.
 This makes the
 .Xr netstat 1
 entry look rather weird but is intended for
 use with transparent proxy servers.
 .Pp
 To enable
 .Cm fwd
 a custom kernel needs to be compiled with the option
 .Cd "options IPFIREWALL_FORWARD" .
 .It Cm nat Ar nat_nr
 Pass packet to a
 nat instance
 (for network address translation, address redirect, etc.):
 see the
 .Sx NETWORK ADDRESS TRANSLATION (NAT)
 Section for further information.
 .It Cm pipe Ar pipe_nr
 Pass packet to a
 .Nm dummynet
 .Dq pipe
 (for bandwidth limitation, delay, etc.).
 See the
 .Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 Section for further information.
 The search terminates; however, on exit from the pipe and if
 the
 .Xr sysctl 8
 variable
 .Va net.inet.ip.fw.one_pass
 is not set, the packet is passed again to the firewall code
 starting from the next rule.
 .It Cm queue Ar queue_nr
 Pass packet to a
 .Nm dummynet
 .Dq queue
 (for bandwidth limitation using WF2Q+).
 .It Cm reject
 (Deprecated).
 Synonym for
 .Cm unreach host .
 .It Cm reset
 Discard packets that match this rule, and if the
 packet is a TCP packet, try to send a TCP reset (RST) notice.
 The search terminates.
 .It Cm reset6
 Discard packets that match this rule, and if the
 packet is a TCP packet, try to send a TCP reset (RST) notice.
 The search terminates.
 .It Cm skipto Ar number
 Skip all subsequent rules numbered less than
 .Ar number .
 The search continues with the first rule numbered
 .Ar number
 or higher.
 .It Cm tee Ar port
 Send a copy of packets matching this rule to the
 .Xr divert 4
 socket bound to port
 .Ar port .
 The search continues with the next rule.
 .It Cm unreach Ar code
 Discard packets that match this rule, and try to send an ICMP
 unreachable notice with code
 .Ar code ,
 where
 .Ar code
 is a number from 0 to 255, or one of these aliases:
 .Cm net , host , protocol , port ,
 .Cm needfrag , srcfail , net-unknown , host-unknown ,
 .Cm isolated , net-prohib , host-prohib , tosnet ,
 .Cm toshost , filter-prohib , host-precedence
 or
 .Cm precedence-cutoff .
 The search terminates.
 .It Cm unreach6 Ar code
 Discard packets that match this rule, and try to send an ICMPv6
 unreachable notice with code
 .Ar code ,
 where
 .Ar code
 is a number from 0, 1, 3 or 4, or one of these aliases:
 .Cm no-route, admin-prohib, address
 or
 .Cm port .
 The search terminates.
 .It Cm netgraph Ar cookie
 Divert packet into netgraph with given
 .Ar cookie .
 The search terminates.
 If packet is later returned from netgraph it is either
 accepted or continues with the next rule, depending on
 .Va net.inet.ip.fw.one_pass
 sysctl variable.
 .It Cm ngtee Ar cookie
 A copy of packet is diverted into netgraph, original
 packet is either accepted or continues with the next rule, depending on
 .Va net.inet.ip.fw.one_pass
 sysctl variable.
 See
 .Xr ng_ipfw 4
 for more information on
 .Cm netgraph
 and
 .Cm ngtee
 actions.
+.It Cm setfib Ar fibnum
+The packet is tagged so as to use the FIB (routing table)
+.Ar fibnum
+in any subsequent forwarding decisions. Initially this is
+limited to the values  0 through 15. See 
+.Xr setfib 8
 .El
 .Ss RULE BODY
 The body of a rule contains zero or more patterns (such as
 specific source and destination addresses or ports,
 protocol options, incoming or outgoing interfaces, etc.)
 that the packet must match in order to be recognised.
 In general, the patterns are connected by (implicit)
 .Cm and
 operators -- i.e., all must match in order for the
 rule to match.
 Individual patterns can be prefixed by the
 .Cm not
 operator to reverse the result of the match, as in
 .Pp
 .Dl "ipfw add 100 allow ip from not 1.2.3.4 to any"
 .Pp
 Additionally, sets of alternative match patterns
 .Pq Em or-blocks
 can be constructed by putting the patterns in
 lists enclosed between parentheses ( ) or braces { }, and
 using the
 .Cm or
 operator as follows:
 .Pp
 .Dl "ipfw add 100 allow ip from { x or not y or z } to any"
 .Pp
 Only one level of parentheses is allowed.
 Beware that most shells have special meanings for parentheses
 or braces, so it is advisable to put a backslash \\ in front of them
 to prevent such interpretations.
 .Pp
 The body of a rule must in general include a source and destination
 address specifier.
 The keyword
 .Ar any
 can be used in various places to specify that the content of
 a required field is irrelevant.
 .Pp
 The rule body has the following format:
 .Bd -ragged -offset indent
 .Op Ar proto Cm from Ar src Cm to Ar dst
 .Op Ar options
 .Ed
 .Pp
 The first part (proto from src to dst) is for backward
 compatibility with earlier versions of
 .Fx .
 In modern
 .Fx
 any match pattern (including MAC headers, IP protocols,
 addresses and ports) can be specified in the
 .Ar options
 section.
 .Pp
 Rule fields have the following meaning:
 .Bl -tag -width indent
 .It Ar proto : protocol | Cm { Ar protocol Cm or ... }
 .It Ar protocol : Oo Cm not Oc Ar protocol-name | protocol-number
 An IP protocol specified by number or name
 (for a complete list see
 .Pa /etc/protocols ) ,
 or one of the following keywords:
 .Bl -tag -width indent
 .It Cm ip4 | ipv4
 Matches IPv4 packets.
 .It Cm ip6 | ipv6
 Matches IPv6 packets.
 .It Cm ip | all
 Matches any packet.
 .El
 .Pp
 The
 .Cm ipv6
 in
 .Cm proto
 option will be treated as inner protocol.
 And, the
 .Cm ipv4
 is not available in
 .Cm proto
 option.
 .Pp
 The
 .Cm { Ar protocol Cm or ... }
 format (an
 .Em or-block )
 is provided for convenience only but its use is deprecated.
 .It Ar src No and Ar dst : Bro Cm addr | Cm { Ar addr Cm or ... } Brc Op Oo Cm not Oc Ar ports
 An address (or a list, see below)
 optionally followed by
 .Ar ports
 specifiers.
 .Pp
 The second format
 .Em ( or-block
 with multiple addresses) is provided for convenience only and
 its use is discouraged.
 .It Ar addr : Oo Cm not Oc Bro
 .Cm any | me | me6 |
 .Cm table Ns Pq Ar number Ns Op , Ns Ar value
 .Ar | addr-list | addr-set
 .Brc
 .It Cm any
 matches any IP address.
 .It Cm me
 matches any IP address configured on an interface in the system.
 .It Cm me6
 matches any IPv6 address configured on an interface in the system.
 The address list is evaluated at the time the packet is
 analysed.
 .It Cm table Ns Pq Ar number Ns Op , Ns Ar value
 Matches any IPv4 address for which an entry exists in the lookup table
 .Ar number .
 If an optional 32-bit unsigned
 .Ar value
 is also specified, an entry will match only if it has this value.
 See the
 .Sx LOOKUP TABLES
 section below for more information on lookup tables.
 .It Ar addr-list : ip-addr Ns Op Ns , Ns Ar addr-list
 .It Ar ip-addr :
 A host or subnet address specified in one of the following ways:
 .Bl -tag -width indent
 .It Ar numeric-ip | hostname
 Matches a single IPv4 address, specified as dotted-quad or a hostname.
 Hostnames are resolved at the time the rule is added to the firewall list.
 .It Ar addr Ns / Ns Ar masklen
 Matches all addresses with base
 .Ar addr
 (specified as an IP address, a network number, or a hostname)
 and mask width of
 .Cm masklen
 bits.
 As an example, 1.2.3.4/25 or 1.2.3.0/25 will match
 all IP numbers from 1.2.3.0 to 1.2.3.127 .
 .It Ar addr Ns : Ns Ar mask
 Matches all addresses with base
 .Ar addr
 (specified as an IP address, a network number, or a hostname)
 and the mask of
 .Ar mask ,
 specified as a dotted quad.
 As an example, 1.2.3.4:255.0.255.0 or 1.0.3.0:255.0.255.0 will match
 1.*.3.*.
 This form is advised only for non-contiguous
 masks.
 It is better to resort to the
 .Ar addr Ns / Ns Ar masklen
 format for contiguous masks, which is more compact and less
 error-prone.
 .El
 .It Ar addr-set : addr Ns Oo Ns / Ns Ar masklen Oc Ns Cm { Ns Ar list Ns Cm }
 .It Ar list : Bro Ar num | num-num Brc Ns Op Ns , Ns Ar list
 Matches all addresses with base address
 .Ar addr
 (specified as an IP address, a network number, or a hostname)
 and whose last byte is in the list between braces { } .
 Note that there must be no spaces between braces and
 numbers (spaces after commas are allowed).
 Elements of the list can be specified as single entries
 or ranges.
 The
 .Ar masklen
 field is used to limit the size of the set of addresses,
 and can have any value between 24 and 32.
 If not specified,
 it will be assumed as 24.
 .br
 This format is particularly useful to handle sparse address sets
 within a single rule.
 Because the matching occurs using a
 bitmask, it takes constant time and dramatically reduces
 the complexity of rulesets.
 .br
 As an example, an address specified as 1.2.3.4/24{128,35-55,89}
 or 1.2.3.0/24{128,35-55,89}
 will match the following IP addresses:
 .br
 1.2.3.128, 1.2.3.35 to 1.2.3.55, 1.2.3.89 .
 .It Ar addr6-list : ip6-addr Ns Op Ns , Ns Ar addr6-list
 .It Ar ip6-addr :
 A host or subnet specified one of the following ways:
 .Pp
 .Bl -tag -width indent
 .It Ar numeric-ip | hostname
 Matches a single IPv6 address as allowed by
 .Xr inet_pton 3
 or a hostname.
 Hostnames are resolved at the time the rule is added to the firewall
 list.
 .It Ar addr Ns / Ns Ar masklen
 Matches all IPv6 addresses with base
 .Ar addr
 (specified as allowed by
 .Xr inet_pton
 or a hostname)
 and mask width of
 .Cm masklen
 bits.
 .El
 .Pp
 No support for sets of IPv6 addresses is provided because IPv6 addresses
 are typically random past the initial prefix.
 .It Ar ports : Bro Ar port | port Ns \&- Ns Ar port Ns Brc Ns Op , Ns Ar ports
 For protocols which support port numbers (such as TCP and UDP), optional
 .Cm ports
 may be specified as one or more ports or port ranges, separated
 by commas but no spaces, and an optional
 .Cm not
 operator.
 The
 .Ql \&-
 notation specifies a range of ports (including boundaries).
 .Pp
 Service names (from
 .Pa /etc/services )
 may be used instead of numeric port values.
 The length of the port list is limited to 30 ports or ranges,
 though one can specify larger ranges by using an
 .Em or-block
 in the
 .Cm options
 section of the rule.
 .Pp
 A backslash
 .Pq Ql \e
 can be used to escape the dash
 .Pq Ql -
 character in a service name (from a shell, the backslash must be
 typed twice to avoid the shell itself interpreting it as an escape
 character).
 .Pp
 .Dl "ipfw add count tcp from any ftp\e\e-data-ftp to any"
 .Pp
 Fragmented packets which have a non-zero offset (i.e., not the first
 fragment) will never match a rule which has one or more port
 specifications.
 See the
 .Cm frag
 option for details on matching fragmented packets.
 .El
 .Ss RULE OPTIONS (MATCH PATTERNS)
 Additional match patterns can be used within
 rules.
 Zero or more of these so-called
 .Em options
 can be present in a rule, optionally prefixed by the
 .Cm not
 operand, and possibly grouped into
 .Em or-blocks .
 .Pp
 The following match patterns can be used (listed in alphabetical order):
 .Bl -tag -width indent
 .It Cm // this is a comment.
 Inserts the specified text as a comment in the rule.
 Everything following // is considered as a comment and stored in the rule.
 You can have comment-only rules, which are listed as having a
 .Cm count
 action followed by the comment.
 .It Cm bridged
 Alias for
 .Cm layer2 .
 .It Cm diverted
 Matches only packets generated by a divert socket.
 .It Cm diverted-loopback
 Matches only packets coming from a divert socket back into the IP stack
 input for delivery.
 .It Cm diverted-output
 Matches only packets going from a divert socket back outward to the IP
 stack output for delivery.
 .It Cm dst-ip Ar ip-address
 Matches IPv4 packets whose destination IP is one of the address(es)
 specified as argument.
 .It Bro Cm dst-ip6 | dst-ipv6 Brc Ar ip6-address
 Matches IPv6 packets whose destination IP is one of the address(es)
 specified as argument.
 .It Cm dst-port Ar ports
 Matches IP packets whose destination port is one of the port(s)
 specified as argument.
 .It Cm established
 Matches TCP packets that have the RST or ACK bits set.
 .It Cm ext6hdr Ar header
 Matches IPv6 packets containing the extended header given by
 .Ar header .
 Supported headers are:
 .Pp
 Fragment,
 .Pq Cm frag ,
 Hop-to-hop options
 .Pq Cm hopopt ,
 any type of Routing Header
 .Pq Cm route ,
 Source routing Routing Header Type 0
 .Pq Cm rthdr0 ,
 Mobile IPv6 Routing Header Type 2
 .Pq Cm rthdr2 ,
 Destination options
 .Pq Cm dstopt ,
 IPSec authentication headers
 .Pq Cm ah ,
 and IPSec encapsulated security payload headers
 .Pq Cm esp .
+.It Cm fib Ar fibnum
+Matches a packet that has been tagged to use
+the given FIB (routing table) number.
 .It Cm flow-id Ar labels
 Matches IPv6 packets containing any of the flow labels given in
 .Ar labels .
 .Ar labels
 is a comma seperate list of numeric flow labels.
 .It Cm frag
 Matches packets that are fragments and not the first
 fragment of an IP datagram.
 Note that these packets will not have
 the next protocol header (e.g.\& TCP, UDP) so options that look into
 these headers cannot match.
 .It Cm gid Ar group
 Matches all TCP or UDP packets sent by or received for a
 .Ar group .
 A
 .Ar group
 may be specified by name or number.
 .It Cm jail Ar prisonID
 Matches all TCP or UDP packets sent by or received for the
 jail whos prison ID is
 .Ar prisonID .
 .It Cm icmptypes Ar types
 Matches ICMP packets whose ICMP type is in the list
 .Ar types .
 The list may be specified as any combination of
 individual types (numeric) separated by commas.
 .Em Ranges are not allowed .
 The supported ICMP types are:
 .Pp
 echo reply
 .Pq Cm 0 ,
 destination unreachable
 .Pq Cm 3 ,
 source quench
 .Pq Cm 4 ,
 redirect
 .Pq Cm 5 ,
 echo request
 .Pq Cm 8 ,
 router advertisement
 .Pq Cm 9 ,
 router solicitation
 .Pq Cm 10 ,
 time-to-live exceeded
 .Pq Cm 11 ,
 IP header bad
 .Pq Cm 12 ,
 timestamp request
 .Pq Cm 13 ,
 timestamp reply
 .Pq Cm 14 ,
 information request
 .Pq Cm 15 ,
 information reply
 .Pq Cm 16 ,
 address mask request
 .Pq Cm 17
 and address mask reply
 .Pq Cm 18 .
 .It Cm icmp6types Ar types
 Matches ICMP6 packets whose ICMP6 type is in the list of
 .Ar types .
 The list may be specified as any combination of
 individual types (numeric) separated by commas.
 .Em Ranges are not allowed .
 .It Cm in | out
 Matches incoming or outgoing packets, respectively.
 .Cm in
 and
 .Cm out
 are mutually exclusive (in fact,
 .Cm out
 is implemented as
 .Cm not in Ns No ).
 .It Cm ipid Ar id-list
 Matches IPv4 packets whose
 .Cm ip_id
 field has value included in
 .Ar id-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm iplen Ar len-list
 Matches IP packets whose total length, including header and data, is
 in the set
 .Ar len-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm ipoptions Ar spec
 Matches packets whose IPv4 header contains the comma separated list of
 options specified in
 .Ar spec .
 The supported IP options are:
 .Pp
 .Cm ssrr
 (strict source route),
 .Cm lsrr
 (loose source route),
 .Cm rr
 (record packet route) and
 .Cm ts
 (timestamp).
 The absence of a particular option may be denoted
 with a
 .Ql \&! .
 .It Cm ipprecedence Ar precedence
 Matches IPv4 packets whose precedence field is equal to
 .Ar precedence .
 .It Cm ipsec
 Matches packets that have IPSEC history associated with them
 (i.e., the packet comes encapsulated in IPSEC, the kernel
 has IPSEC support and IPSEC_FILTERTUNNEL option, and can correctly
 decapsulate it).
 .Pp
 Note that specifying
 .Cm ipsec
 is different from specifying
 .Cm proto Ar ipsec
 as the latter will only look at the specific IP protocol field,
 irrespective of IPSEC kernel support and the validity of the IPSEC data.
 .Pp
 Further note that this flag is silently ignored in kernels without
 IPSEC support.
 It does not affect rule processing when given and the
 rules are handled as if with no
 .Cm ipsec
 flag.
 .It Cm iptos Ar spec
 Matches IPv4 packets whose
 .Cm tos
 field contains the comma separated list of
 service types specified in
 .Ar spec .
 The supported IP types of service are:
 .Pp
 .Cm lowdelay
 .Pq Dv IPTOS_LOWDELAY ,
 .Cm throughput
 .Pq Dv IPTOS_THROUGHPUT ,
 .Cm reliability
 .Pq Dv IPTOS_RELIABILITY ,
 .Cm mincost
 .Pq Dv IPTOS_MINCOST ,
 .Cm congestion
 .Pq Dv IPTOS_ECN_CE .
 The absence of a particular type may be denoted
 with a
 .Ql \&! .
 .It Cm ipttl Ar ttl-list
 Matches IPv4 packets whose time to live is included in
 .Ar ttl-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm ipversion Ar ver
 Matches IP packets whose IP version field is
 .Ar ver .
 .It Cm keep-state
 Upon a match, the firewall will create a dynamic rule, whose
 default behaviour is to match bidirectional traffic between
 source and destination IP/port using the same protocol.
 The rule has a limited lifetime (controlled by a set of
 .Xr sysctl 8
 variables), and the lifetime is refreshed every time a matching
 packet is found.
 .It Cm layer2
 Matches only layer2 packets, i.e., those passed to
 .Nm
 from ether_demux() and ether_output_frame().
 .It Cm limit Bro Cm src-addr | src-port | dst-addr | dst-port Brc Ar N
 The firewall will only allow
 .Ar N
 connections with the same
 set of parameters as specified in the rule.
 One or more
 of source and destination addresses and ports can be
 specified.
 Currently,
 only IPv4 flows are supported.
 .It Cm { MAC | mac } Ar dst-mac src-mac
 Match packets with a given
 .Ar dst-mac
 and
 .Ar src-mac
 addresses, specified as the
 .Cm any
 keyword (matching any MAC address), or six groups of hex digits
 separated by colons,
 and optionally followed by a mask indicating the significant bits.
 The mask may be specified using either of the following methods:
 .Bl -enum -width indent
 .It
 A slash
 .Pq /
 followed by the number of significant bits.
 For example, an address with 33 significant bits could be specified as:
 .Pp
 .Dl "MAC 10:20:30:40:50:60/33 any"
 .Pp
 .It
 An ampersand
 .Pq &
 followed by a bitmask specified as six groups of hex digits separated
 by colons.
 For example, an address in which the last 16 bits are significant could
 be specified as:
 .Pp
 .Dl "MAC 10:20:30:40:50:60&00:00:00:00:ff:ff any"
 .Pp
 Note that the ampersand character has a special meaning in many shells
 and should generally be escaped.
 .Pp
 .El
 Note that the order of MAC addresses (destination first,
 source second) is
 the same as on the wire, but the opposite of the one used for
 IP addresses.
 .It Cm mac-type Ar mac-type
 Matches packets whose Ethernet Type field
 corresponds to one of those specified as argument.
 .Ar mac-type
 is specified in the same way as
 .Cm port numbers
 (i.e., one or more comma-separated single values or ranges).
 You can use symbolic names for known values such as
 .Em vlan , ipv4, ipv6 .
 Values can be entered as decimal or hexadecimal (if prefixed by 0x),
 and they are always printed as hexadecimal (unless the
 .Cm -N
 option is used, in which case symbolic resolution will be attempted).
 .It Cm proto Ar protocol
 Matches packets with the corresponding IP protocol.
 .It Cm recv | xmit | via Brq Ar ifX | Ar if Ns Cm * | Ar ipno | Ar any
 Matches packets received, transmitted or going through,
 respectively, the interface specified by exact name
 .Ns No ( Ar ifX Ns No ),
 by device name
 .Ns No ( Ar if Ns Ar * Ns No ),
 by IP address, or through some interface.
 .Pp
 The
 .Cm via
 keyword causes the interface to always be checked.
 If
 .Cm recv
 or
 .Cm xmit
 is used instead of
 .Cm via ,
 then only the receive or transmit interface (respectively)
 is checked.
 By specifying both, it is possible to match packets based on
 both receive and transmit interface, e.g.:
 .Pp
 .Dl "ipfw add deny ip from any to any out recv ed0 xmit ed1"
 .Pp
 The
 .Cm recv
 interface can be tested on either incoming or outgoing packets,
 while the
 .Cm xmit
 interface can only be tested on outgoing packets.
 So
 .Cm out
 is required (and
 .Cm in
 is invalid) whenever
 .Cm xmit
 is used.
 .Pp
 A packet may not have a receive or transmit interface: packets
 originating from the local host have no receive interface,
 while packets destined for the local host have no transmit
 interface.
 .It Cm setup
 Matches TCP packets that have the SYN bit set but no ACK bit.
 This is the short form of
 .Dq Li tcpflags\ syn,!ack .
 .It Cm src-ip Ar ip-address
 Matches IPv4 packets whose source IP is one of the address(es)
 specified as an argument.
 .It Cm src-ip6 Ar ip6-address
 Matches IPv6 packets whose source IP is one of the address(es)
 specified as an argument.
 .It Cm src-port Ar ports
 Matches IP packets whose source port is one of the port(s)
 specified as argument.
 .It Cm tagged Ar tag-list
 Matches packets whose tags are included in
 .Ar tag-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 Tags can be applied to the packet using
 .Cm tag
 rule action parameter (see it's description for details on tags).
 .It Cm tcpack Ar ack
 TCP packets only.
 Match if the TCP header acknowledgment number field is set to
 .Ar ack .
 .It Cm tcpdatalen Ar tcpdatalen-list
 Matches TCP packets whose length of TCP data is
 .Ar tcpdatalen-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm tcpflags Ar spec
 TCP packets only.
 Match if the TCP header contains the comma separated list of
 flags specified in
 .Ar spec .
 The supported TCP flags are:
 .Pp
 .Cm fin ,
 .Cm syn ,
 .Cm rst ,
 .Cm psh ,
 .Cm ack
 and
 .Cm urg .
 The absence of a particular flag may be denoted
 with a
 .Ql \&! .
 A rule which contains a
 .Cm tcpflags
 specification can never match a fragmented packet which has
 a non-zero offset.
 See the
 .Cm frag
 option for details on matching fragmented packets.
 .It Cm tcpseq Ar seq
 TCP packets only.
 Match if the TCP header sequence number field is set to
 .Ar seq .
 .It Cm tcpwin Ar win
 TCP packets only.
 Match if the TCP header window field is set to
 .Ar win .
 .It Cm tcpoptions Ar spec
 TCP packets only.
 Match if the TCP header contains the comma separated list of
 options specified in
 .Ar spec .
 The supported TCP options are:
 .Pp
 .Cm mss
 (maximum segment size),
 .Cm window
 (tcp window advertisement),
 .Cm sack
 (selective ack),
 .Cm ts
 (rfc1323 timestamp) and
 .Cm cc
 (rfc1644 t/tcp connection count).
 The absence of a particular option may be denoted
 with a
 .Ql \&! .
 .It Cm uid Ar user
 Match all TCP or UDP packets sent by or received for a
 .Ar user .
 A
 .Ar user
 may be matched by name or identification number.
 .It Cm verrevpath
 For incoming packets,
 a routing table lookup is done on the packet's source address.
 If the interface on which the packet entered the system matches the
 outgoing interface for the route,
 the packet matches.
 If the interfaces do not match up,
 the packet does not match.
 All outgoing packets or packets with no incoming interface match.
 .Pp
 The name and functionality of the option is intentionally similar to
 the Cisco IOS command:
 .Pp
 .Dl ip verify unicast reverse-path
 .Pp
 This option can be used to make anti-spoofing rules to reject all
 packets with source addresses not from this interface.
 See also the option
 .Cm antispoof .
 .It Cm versrcreach
 For incoming packets,
 a routing table lookup is done on the packet's source address.
 If a route to the source address exists, but not the default route
 or a blackhole/reject route, the packet matches.
 Otherwise, the packet does not match.
 All outgoing packets match.
 .Pp
 The name and functionality of the option is intentionally similar to
 the Cisco IOS command:
 .Pp
 .Dl ip verify unicast source reachable-via any
 .Pp
 This option can be used to make anti-spoofing rules to reject all
 packets whose source address is unreachable.
 .It Cm antispoof
 For incoming packets, the packet's source address is checked if it
 belongs to a directly connected network.
 If the network is directly connected, then the interface the packet
 came on in is compared to the interface the network is connected to.
 When incoming interface and directly connected interface are not the
 same, the packet does not match.
 Otherwise, the packet does match.
 All outgoing packets match.
 .Pp
 This option can be used to make anti-spoofing rules to reject all
 packets that pretend to be from a directly connected network but do
 not come in through that interface.
 This option is similar to but more restricted than
 .Cm verrevpath
 because it engages only on packets with source addresses of directly
 connected networks instead of all source addresses.
 .El
 .Sh LOOKUP TABLES
 Lookup tables are useful to handle large sparse address sets,
 typically from a hundred to several thousands of entries.
 There may be up to 128 different lookup tables, numbered 0 to 127.
 .Pp
 Each entry is represented by an
 .Ar addr Ns Op / Ns Ar masklen
 and will match all addresses with base
 .Ar addr
 (specified as an IP address or a hostname)
 and mask width of
 .Ar masklen
 bits.
 If
 .Ar masklen
 is not specified, it defaults to 32.
 When looking up an IP address in a table, the most specific
 entry will match.
 Associated with each entry is a 32-bit unsigned
 .Ar value ,
 which can optionally be checked by a rule matching code.
 When adding an entry, if
 .Ar value
 is not specified, it defaults to 0.
 .Pp
 An entry can be added to a table
 .Pq Cm add ,
 removed from a table
 .Pq Cm delete ,
 a table can be examined
 .Pq Cm list
 or flushed
 .Pq Cm flush .
 .Pp
 Internally, each table is stored in a Radix tree, the same way as
 the routing table (see
 .Xr route 4 ) .
 .Pp
 Lookup tables currently support IPv4 addresses only.
 .Pp
 The
 .Cm tablearg
 feature provides the ability to use a value, looked up in the table, as
 the argument for a rule action, action parameter or rule option.
 This can significantly reduce number of rules in some configurations.
 The
 .Cm tablearg
 argument can be used with the following actions:
 .Cm nat, pipe , queue, divert, tee, netgraph, ngtee, fwd
 action parameters:
 .Cm tag, untag,
 rule options:
 .Cm limit, tagged.
 .Pp
 When used with
 .Cm fwd
 it is possible to supply table entries with values
 that are in the form of IP addresses or hostnames.
 See the
 .Sx EXAMPLES
 Section for example usage of tables and the tablearg keyword.
 .Sh SETS OF RULES
 Each rule belongs to one of 32 different
 .Em sets
 , numbered 0 to 31.
 Set 31 is reserved for the default rule.
 .Pp
 By default, rules are put in set 0, unless you use the
 .Cm set N
 attribute when entering a new rule.
 Sets can be individually and atomically enabled or disabled,
 so this mechanism permits an easy way to store multiple configurations
 of the firewall and quickly (and atomically) switch between them.
 The command to enable/disable sets is
 .Bd -ragged -offset indent
 .Nm
 .Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ...
 .Ed
 .Pp
 where multiple
 .Cm enable
 or
 .Cm disable
 sections can be specified.
 Command execution is atomic on all the sets specified in the command.
 By default, all sets are enabled.
 .Pp
 When you disable a set, its rules behave as if they do not exist
 in the firewall configuration, with only one exception:
 .Bd -ragged -offset indent
 dynamic rules created from a rule before it had been disabled
 will still be active until they expire.
 In order to delete
 dynamic rules you have to explicitly delete the parent rule
 which generated them.
 .Ed
 .Pp
 The set number of rules can be changed with the command
 .Bd -ragged -offset indent
 .Nm
 .Cm set move
 .Brq Cm rule Ar rule-number | old-set
 .Cm to Ar new-set
 .Ed
 .Pp
 Also, you can atomically swap two rulesets with the command
 .Bd -ragged -offset indent
 .Nm
 .Cm set swap Ar first-set second-set
 .Ed
 .Pp
 See the
 .Sx EXAMPLES
 Section on some possible uses of sets of rules.
 .Sh STATEFUL FIREWALL
 Stateful operation is a way for the firewall to dynamically
 create rules for specific flows when packets that
 match a given pattern are detected.
 Support for stateful
 operation comes through the
 .Cm check-state , keep-state
 and
 .Cm limit
 options of
 .Nm rules .
 .Pp
 Dynamic rules are created when a packet matches a
 .Cm keep-state
 or
 .Cm limit
 rule, causing the creation of a
 .Em dynamic
 rule which will match all and only packets with
 a given
 .Em protocol
 between a
 .Em src-ip/src-port dst-ip/dst-port
 pair of addresses
 .Em ( src
 and
 .Em dst
 are used here only to denote the initial match addresses, but they
 are completely equivalent afterwards).
 Dynamic rules will be checked at the first
 .Cm check-state, keep-state
 or
 .Cm limit
 occurrence, and the action performed upon a match will be the same
 as in the parent rule.
 .Pp
 Note that no additional attributes other than protocol and IP addresses
 and ports are checked on dynamic rules.
 .Pp
 The typical use of dynamic rules is to keep a closed firewall configuration,
 but let the first TCP SYN packet from the inside network install a
 dynamic rule for the flow so that packets belonging to that session
 will be allowed through the firewall:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add allow tcp from my-subnet to any setup keep-state"
 .Dl "ipfw add deny tcp from any to any"
 .Pp
 A similar approach can be used for UDP, where an UDP packet coming
 from the inside will install a dynamic rule to let the response through
 the firewall:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add allow udp from my-subnet to any keep-state"
 .Dl "ipfw add deny udp from any to any"
 .Pp
 Dynamic rules expire after some time, which depends on the status
 of the flow and the setting of some
 .Cm sysctl
 variables.
 See Section
 .Sx SYSCTL VARIABLES
 for more details.
 For TCP sessions, dynamic rules can be instructed to periodically
 send keepalive packets to refresh the state of the rule when it is
 about to expire.
 .Pp
 See Section
 .Sx EXAMPLES
 for more examples on how to use dynamic rules.
 .Sh TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 .Nm
 is also the user interface for the
 .Nm dummynet
 traffic shaper.
 .Pp
 .Nm dummynet
 operates by first using the firewall to classify packets and divide them into
 .Em flows ,
 using any match pattern that can be used in
 .Nm
 rules.
 Depending on local policies, a flow can contain packets for a single
 TCP connection, or from/to a given host, or entire subnet, or a
 protocol type, etc.
 .Pp
 There are two modes of
 .Nm dummynet
 operation:
 .Dq normal
 and
 .Dq fast .
 The
 .Dq normal
 mode tries to emulate a real link: the
 .Nm dummynet
 scheduler ensures that the packet will not leave the pipe faster than it
 would on the real link with a given bandwidth.
 The
 .Dq fast
 mode allows certain packets to bypass the
 .Nm dummynet
 scheduler (if packet flow does not exceed pipe's bandwidth).
 This is the reason why the
 .Dq fast
 mode requires less CPU cycles per packet (on average) and packet latency
 can be significantly lower in comparison to a real link with the same
 bandwidth.
 The default mode is
 .Dq normal .
 The
 .Dq fast
 mode can be enabled by setting the
 .Va net.inet.ip.dummynet.io_fast
 .Xr sysctl 8
 variable to a non-zero value.
 .Pp
 Packets belonging to the same flow are then passed to either of two
 different objects, which implement the traffic regulation:
 .Bl -hang -offset XXXX
 .It Em pipe
 A pipe emulates a link with given bandwidth, propagation delay,
 queue size and packet loss rate.
 Packets are queued in front of the pipe as they come out from the classifier,
 and then transferred to the pipe according to the pipe's parameters.
 .Pp
 .It Em queue
 A queue
 is an abstraction used to implement the WF2Q+
 (Worst-case Fair Weighted Fair Queueing) policy, which is
 an efficient variant of the WFQ policy.
 .br
 The queue associates a
 .Em weight
 and a reference pipe to each flow, and then all backlogged (i.e.,
 with packets queued) flows linked to the same pipe share the pipe's
 bandwidth proportionally to their weights.
 Note that weights are not priorities; a flow with a lower weight
 is still guaranteed to get its fraction of the bandwidth even if a
 flow with a higher weight is permanently backlogged.
 .Pp
 .El
 In practice,
 .Em pipes
 can be used to set hard limits to the bandwidth that a flow can use, whereas
 .Em queues
 can be used to determine how different flow share the available bandwidth.
 .Pp
 The
 .Em pipe
 and
 .Em queue
 configuration commands are the following:
 .Bd -ragged -offset indent
 .Cm pipe Ar number Cm config Ar pipe-configuration
 .Pp
 .Cm queue Ar number Cm config Ar queue-configuration
 .Ed
 .Pp
 The following parameters can be configured for a pipe:
 .Pp
 .Bl -tag -width indent -compact
 .It Cm bw Ar bandwidth | device
 Bandwidth, measured in
 .Sm off
 .Op Cm K | M
 .Brq Cm bit/s | Byte/s .
 .Sm on
 .Pp
 A value of 0 (default) means unlimited bandwidth.
 The unit must immediately follow the number, as in
 .Pp
 .Dl "ipfw pipe 1 config bw 300Kbit/s"
 .Pp
 If a device name is specified instead of a numeric value, as in
 .Pp
 .Dl "ipfw pipe 1 config bw tun0"
 .Pp
 then the transmit clock is supplied by the specified device.
 At the moment only the
 .Xr tun 4
 device supports this
 functionality, for use in conjunction with
 .Xr ppp 8 .
 .Pp
 .It Cm delay Ar ms-delay
 Propagation delay, measured in milliseconds.
 The value is rounded to the next multiple of the clock tick
 (typically 10ms, but it is a good practice to run kernels
 with
 .Dq "options HZ=1000"
 to reduce
 the granularity to 1ms or less).
 Default value is 0, meaning no delay.
 .El
 .Pp
 The following parameters can be configured for a queue:
 .Pp
 .Bl -tag -width indent -compact
 .It Cm pipe Ar pipe_nr
 Connects a queue to the specified pipe.
 Multiple queues (with the same or different weights) can be connected to
 the same pipe, which specifies the aggregate rate for the set of queues.
 .Pp
 .It Cm weight Ar weight
 Specifies the weight to be used for flows matching this queue.
 The weight must be in the range 1..100, and defaults to 1.
 .El
 .Pp
 Finally, the following parameters can be configured for both
 pipes and queues:
 .Pp
 .Bl -tag -width XXXX -compact
 .Pp
 .It Cm buckets Ar hash-table-size
 Specifies the size of the hash table used for storing the
 various queues.
 Default value is 64 controlled by the
 .Xr sysctl 8
 variable
 .Va net.inet.ip.dummynet.hash_size ,
 allowed range is 16 to 65536.
 .Pp
 .It Cm mask Ar mask-specifier
 Packets sent to a given pipe or queue by an
 .Nm
 rule can be further classified into multiple flows, each of which is then
 sent to a different
 .Em dynamic
 pipe or queue.
 A flow identifier is constructed by masking the IP addresses,
 ports and protocol types as specified with the
 .Cm mask
 options in the configuration of the pipe or queue.
 For each different flow identifier, a new pipe or queue is created
 with the same parameters as the original object, and matching packets
 are sent to it.
 .Pp
 Thus, when
 .Em dynamic pipes
 are used, each flow will get the same bandwidth as defined by the pipe,
 whereas when
 .Em dynamic queues
 are used, each flow will share the parent's pipe bandwidth evenly
 with other flows generated by the same queue (note that other queues
 with different weights might be connected to the same pipe).
 .br
 Available mask specifiers are a combination of one or more of the following:
 .Pp
 .Cm dst-ip Ar mask ,
 .Cm dst-ip6 Ar mask ,
 .Cm src-ip Ar mask ,
 .Cm src-ip6 Ar mask ,
 .Cm dst-port Ar mask ,
 .Cm src-port Ar mask ,
 .Cm flow-id Ar mask ,
 .Cm proto Ar mask
 or
 .Cm all ,
 .Pp
 where the latter means all bits in all fields are significant.
 .Pp
 .It Cm noerror
 When a packet is dropped by a
 .Nm dummynet
 queue or pipe, the error
 is normally reported to the caller routine in the kernel, in the
 same way as it happens when a device queue fills up.
 Setting this
 option reports the packet as successfully delivered, which can be
 needed for some experimental setups where you want to simulate
 loss or congestion at a remote router.
 .Pp
 .It Cm plr Ar packet-loss-rate
 Packet loss rate.
 Argument
 .Ar packet-loss-rate
 is a floating-point number between 0 and 1, with 0 meaning no
 loss, 1 meaning 100% loss.
 The loss rate is internally represented on 31 bits.
 .Pp
 .It Cm queue Brq Ar slots | size Ns Cm Kbytes
 Queue size, in
 .Ar slots
 or
 .Cm KBytes .
 Default value is 50 slots, which
 is the typical queue size for Ethernet devices.
 Note that for slow speed links you should keep the queue
 size short or your traffic might be affected by a significant
 queueing delay.
 E.g., 50 max-sized ethernet packets (1500 bytes) mean 600Kbit
 or 20s of queue on a 30Kbit/s pipe.
 Even worse effects can result if you get packets from an
 interface with a much larger MTU, e.g.\& the loopback interface
 with its 16KB packets.
 The
 .Xr sysctl 8
 variables
 .Em net.inet.ip.dummynet.pipe_byte_limit
 and
 .Em net.inet.ip.dummynet.pipe_slot_limit
 control the maximum lengths that can be specified.
 .Pp
 .It Cm red | gred Ar w_q Ns / Ns Ar min_th Ns / Ns Ar max_th Ns / Ns Ar max_p
 Make use of the RED (Random Early Detection) queue management algorithm.
 .Ar w_q
 and
 .Ar max_p
 are floating
 point numbers between 0 and 1 (0 not included), while
 .Ar min_th
 and
 .Ar max_th
 are integer numbers specifying thresholds for queue management
 (thresholds are computed in bytes if the queue has been defined
 in bytes, in slots otherwise).
 The
 .Nm dummynet
 also supports the gentle RED variant (gred).
 Three
 .Xr sysctl 8
 variables can be used to control the RED behaviour:
 .Bl -tag -width indent
 .It Va net.inet.ip.dummynet.red_lookup_depth
 specifies the accuracy in computing the average queue
 when the link is idle (defaults to 256, must be greater than zero)
 .It Va net.inet.ip.dummynet.red_avg_pkt_size
 specifies the expected average packet size (defaults to 512, must be
 greater than zero)
 .It Va net.inet.ip.dummynet.red_max_pkt_size
 specifies the expected maximum packet size, only used when queue
 thresholds are in bytes (defaults to 1500, must be greater than zero).
 .El
 .El
 .Pp
 When used with IPv6 data,
 .Nm dummynet
 currently has several limitations.
 Information necessary to route link-local packets to an
 interface is not available after processing by
 .Nm dummynet
 so those packets are dropped in the output path.
 Care should be taken to insure that link-local packets are not passed to
 .Nm dummynet .
 .Sh CHECKLIST
 Here are some important points to consider when designing your
 rules:
 .Bl -bullet
 .It
 Remember that you filter both packets going
 .Cm in
 and
 .Cm out .
 Most connections need packets going in both directions.
 .It
 Remember to test very carefully.
 It is a good idea to be near the console when doing this.
 If you cannot be near the console,
 use an auto-recovery script such as the one in
 .Pa /usr/share/examples/ipfw/change_rules.sh .
 .It
 Do not forget the loopback interface.
 .El
 .Sh FINE POINTS
 .Bl -bullet
 .It
 There are circumstances where fragmented datagrams are unconditionally
 dropped.
 TCP packets are dropped if they do not contain at least 20 bytes of
 TCP header, UDP packets are dropped if they do not contain a full 8
 byte UDP header, and ICMP packets are dropped if they do not contain
 4 bytes of ICMP header, enough to specify the ICMP type, code, and
 checksum.
 These packets are simply logged as
 .Dq pullup failed
 since there may not be enough good data in the packet to produce a
 meaningful log entry.
 .It
 Another type of packet is unconditionally dropped, a TCP packet with a
 fragment offset of one.
 This is a valid packet, but it only has one use, to try
 to circumvent firewalls.
 When logging is enabled, these packets are
 reported as being dropped by rule -1.
 .It
 If you are logged in over a network, loading the
 .Xr kld 4
 version of
 .Nm
 is probably not as straightforward as you would think.
 I recommend the following command line:
 .Bd -literal -offset indent
 kldload ipfw && \e
 ipfw add 32000 allow ip from any to any
 .Ed
 .Pp
 Along the same lines, doing an
 .Bd -literal -offset indent
 ipfw flush
 .Ed
 .Pp
 in similar surroundings is also a bad idea.
 .It
 The
 .Nm
 filter list may not be modified if the system security level
 is set to 3 or higher
 (see
 .Xr init 8
 for information on system security levels).
 .El
 .Sh PACKET DIVERSION
 A
 .Xr divert 4
 socket bound to the specified port will receive all packets
 diverted to that port.
 If no socket is bound to the destination port, or if the divert module is
 not loaded, or if the kernel was not compiled with divert socket support,
 the packets are dropped.
 .Sh NETWORK ADDRESS TRANSLATION (NAT)
 The nat configuration command is the following:
 .Bd -ragged -offset indent
 .Bk -words
 .Cm nat 
 .Ar nat_number 
 .Cm config 
 .Ar nat-configuration
 .Ek
 .Ed
 .Pp
 .
 The following parameters can be configured:
 .Bl -tag -width indent
 .It Cm ip Ar ip_address
 Define an ip address to use for aliasing.
 .It Cm if Ar nic
 Use ip addres of NIC for aliasing, dynamically changing
 it if NIC's ip address change.
 .It Cm log
 Enable logging on this nat instance.
 .It Cm deny_in
 Deny any incoming connection from outside world.
 .It Cm same_ports
 Try to leave the alias port numbers unchanged from
 the actual local port numbers.
 .It Cm unreg_only
 Traffic on the local network not originating from an
 unregistered address spaces will be ignored.
 .It Cm reset
 Reset table of the packet aliasing engine on address change.
 .It Cm reverse
 Reverse the way libalias handles aliasing.
 .It Cm proxy_only
 Obey transparent proxy rules only, packet aliasing is not performed.
 .El
 .Pp
 To let the packet continue after being (de)aliased, set the sysctl variable
 .Va net.inet.ip.fw.one_pass 
 to 0.
 For more information about aliasing modes, refer to
 .Xr libalias 3
 .
 See Section
 .Sx EXAMPLES
 for some examples about nat usage.
 .Sh REDIRECT AND LSNAT SUPPORT IN IPFW
 Redirect and LSNAT support follow closely the syntax used in
 .Xr natd 8
 . 
 See Section
 .Sx EXAMPLES
 for some examples on how to do redirect and lsnat.
 .Sh SYSCTL VARIABLES
 A set of
 .Xr sysctl 8
 variables controls the behaviour of the firewall and
 associated modules
 .Pq Nm dummynet , bridge .
 These are shown below together with their default value
 (but always check with the
 .Xr sysctl 8
 command what value is actually in use) and meaning:
 .Bl -tag -width indent
 .It Va net.inet.ip.dummynet.expire : No 1
 Lazily delete dynamic pipes/queue once they have no pending traffic.
 You can disable this by setting the variable to 0, in which case
 the pipes/queues will only be deleted when the threshold is reached.
 .It Va net.inet.ip.dummynet.hash_size : No 64
 Default size of the hash table used for dynamic pipes/queues.
 This value is used when no
 .Cm buckets
 option is specified when configuring a pipe/queue.
 .It Va net.inet.ip.dummynet.io_fast : No 0
 If set to a non-zero value,
 the
 .Dq fast
 mode of
 .Nm dummynet
 operation (see above) is enabled.
 .It Va net.inet.ip.dummynet.io_pkt
 Number of packets passed to
 .Nm dummynet .
 .It Va net.inet.ip.dummynet.io_pkt_drop
 Number of packets dropped by
 .Nm dummynet .
 .It Va net.inet.ip.dummynet.io_pkt_fast
 Number of packets bypassed by the
 .Nm dummynet
 scheduler.
 .It Va net.inet.ip.dummynet.max_chain_len : No 16
 Target value for the maximum number of pipes/queues in a hash bucket.
 The product
 .Cm max_chain_len*hash_size
 is used to determine the threshold over which empty pipes/queues
 will be expired even when
 .Cm net.inet.ip.dummynet.expire=0 .
 .It Va net.inet.ip.dummynet.red_lookup_depth : No 256
 .It Va net.inet.ip.dummynet.red_avg_pkt_size : No 512
 .It Va net.inet.ip.dummynet.red_max_pkt_size : No 1500
 Parameters used in the computations of the drop probability
 for the RED algorithm.
 .It Va net.inet.ip.dummynet.pipe_byte_limit : No 1048576
 .It Va net.inet.ip.dummynet.pipe_slot_limit : No 100
 The maximum queue size that can be specified in bytes or packets.
 These limits prevent accidental exhaustion of resources such as mbufs.
 If you raise these limits,
 you should make sure the system is configured so that sufficient resources
 are available.
 .It Va net.inet.ip.fw.autoinc_step : No 100
 Delta between rule numbers when auto-generating them.
 The value must be in the range 1..1000.
 .It Va net.inet.ip.fw.curr_dyn_buckets : Va net.inet.ip.fw.dyn_buckets
 The current number of buckets in the hash table for dynamic rules
 (readonly).
 .It Va net.inet.ip.fw.debug : No 1
 Controls debugging messages produced by
 .Nm .
 .It Va net.inet.ip.fw.dyn_buckets : No 256
 The number of buckets in the hash table for dynamic rules.
 Must be a power of 2, up to 65536.
 It only takes effect when all dynamic rules have expired, so you
 are advised to use a
 .Cm flush
 command to make sure that the hash table is resized.
 .It Va net.inet.ip.fw.dyn_count : No 3
 Current number of dynamic rules
 (read-only).
 .It Va net.inet.ip.fw.dyn_keepalive : No 1
 Enables generation of keepalive packets for
 .Cm keep-state
 rules on TCP sessions.
 A keepalive is generated to both
 sides of the connection every 5 seconds for the last 20
 seconds of the lifetime of the rule.
 .It Va net.inet.ip.fw.dyn_max : No 8192
 Maximum number of dynamic rules.
 When you hit this limit, no more dynamic rules can be
 installed until old ones expire.
 .It Va net.inet.ip.fw.dyn_ack_lifetime : No 300
 .It Va net.inet.ip.fw.dyn_syn_lifetime : No 20
 .It Va net.inet.ip.fw.dyn_fin_lifetime : No 1
 .It Va net.inet.ip.fw.dyn_rst_lifetime : No 1
 .It Va net.inet.ip.fw.dyn_udp_lifetime : No 5
 .It Va net.inet.ip.fw.dyn_short_lifetime : No 30
 These variables control the lifetime, in seconds, of dynamic
 rules.
 Upon the initial SYN exchange the lifetime is kept short,
 then increased after both SYN have been seen, then decreased
 again during the final FIN exchange or when a RST is received.
 Both
 .Em dyn_fin_lifetime
 and
 .Em dyn_rst_lifetime
 must be strictly lower than 5 seconds, the period of
 repetition of keepalives.
 The firewall enforces that.
 .It Va net.inet.ip.fw.enable : No 1
 Enables the firewall.
 Setting this variable to 0 lets you run your machine without
 firewall even if compiled in.
 .It Va net.inet6.ip6.fw.enable : No 1
 provides the same functionality as above for the IPv6 case.
 .It Va net.inet.ip.fw.one_pass : No 1
 When set, the packet exiting from the
 .Nm dummynet
 pipe or from
 .Xr ng_ipfw 4
 node is not passed though the firewall again.
 Otherwise, after an action, the packet is
 reinjected into the firewall at the next rule.
 .It Va net.inet.ip.fw.verbose : No 1
 Enables verbose messages.
 .It Va net.inet.ip.fw.verbose_limit : No 0
 Limits the number of messages produced by a verbose firewall.
 .It Va net.inet6.ip6.fw.deny_unknown_exthdrs : No 1
 If enabled packets with unknown IPv6 Extension Headers will be denied.
 .It Va net.link.ether.ipfw : No 0
 Controls whether layer-2 packets are passed to
 .Nm .
 Default is no.
 .It Va net.link.bridge.ipfw : No 0
 Controls whether bridged packets are passed to
 .Nm .
 Default is no.
 .El
 .Pp
 .Sh EXAMPLES
 There are far too many possible uses of
 .Nm
 so this Section will only give a small set of examples.
 .Pp
 .Ss BASIC PACKET FILTERING
 This command adds an entry which denies all tcp packets from
 .Em cracker.evil.org
 to the telnet port of
 .Em wolf.tambov.su
 from being forwarded by the host:
 .Pp
 .Dl "ipfw add deny tcp from cracker.evil.org to wolf.tambov.su telnet"
 .Pp
 This one disallows any connection from the entire cracker's
 network to my host:
 .Pp
 .Dl "ipfw add deny ip from 123.45.67.0/24 to my.host.org"
 .Pp
 A first and efficient way to limit access (not using dynamic rules)
 is the use of the following rules:
 .Pp
 .Dl "ipfw add allow tcp from any to any established"
 .Dl "ipfw add allow tcp from net1 portlist1 to net2 portlist2 setup"
 .Dl "ipfw add allow tcp from net3 portlist3 to net3 portlist3 setup"
 .Dl "..."
 .Dl "ipfw add deny tcp from any to any"
 .Pp
 The first rule will be a quick match for normal TCP packets,
 but it will not match the initial SYN packet, which will be
 matched by the
 .Cm setup
 rules only for selected source/destination pairs.
 All other SYN packets will be rejected by the final
 .Cm deny
 rule.
 .Pp
 If you administer one or more subnets, you can take advantage
 of the address sets and or-blocks and write extremely
 compact rulesets which selectively enable services to blocks
 of clients, as below:
 .Pp
 .Dl "goodguys=\*q{ 10.1.2.0/24{20,35,66,18} or 10.2.3.0/28{6,3,11} }\*q"
 .Dl "badguys=\*q10.1.2.0/24{8,38,60}\*q"
 .Dl ""
 .Dl "ipfw add allow ip from ${goodguys} to any"
 .Dl "ipfw add deny ip from ${badguys} to any"
 .Dl "... normal policies ..."
 .Pp
 The
 .Cm verrevpath
 option could be used to do automated anti-spoofing by adding the
 following to the top of a ruleset:
 .Pp
 .Dl "ipfw add deny ip from any to any not verrevpath in"
 .Pp
 This rule drops all incoming packets that appear to be coming to the
 system on the wrong interface.
 For example, a packet with a source
 address belonging to a host on a protected internal network would be
 dropped if it tried to enter the system from an external interface.
 .Pp
 The
 .Cm antispoof
 option could be used to do similar but more restricted anti-spoofing
 by adding the following to the top of a ruleset:
 .Pp
 .Dl "ipfw add deny ip from any to any not antispoof in"
 .Pp
 This rule drops all incoming packets that appear to be coming from another
 directly connected system but on the wrong interface.
 For example, a packet with a source address of
 .Li 192.168.0.0/24
 , configured on
 .Li fxp0
 , but coming in on
 .Li fxp1
 would be dropped.
 .Ss DYNAMIC RULES
 In order to protect a site from flood attacks involving fake
 TCP packets, it is safer to use dynamic rules:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add deny tcp from any to any established"
 .Dl "ipfw add allow tcp from my-net to any setup keep-state"
 .Pp
 This will let the firewall install dynamic rules only for
 those connection which start with a regular SYN packet coming
 from the inside of our network.
 Dynamic rules are checked when encountering the first
 .Cm check-state
 or
 .Cm keep-state
 rule.
 A
 .Cm check-state
 rule should usually be placed near the beginning of the
 ruleset to minimize the amount of work scanning the ruleset.
 Your mileage may vary.
 .Pp
 To limit the number of connections a user can open
 you can use the following type of rules:
 .Pp
 .Dl "ipfw add allow tcp from my-net/24 to any setup limit src-addr 10"
 .Dl "ipfw add allow tcp from any to me setup limit src-addr 4"
 .Pp
 The former (assuming it runs on a gateway) will allow each host
 on a /24 network to open at most 10 TCP connections.
 The latter can be placed on a server to make sure that a single
 client does not use more than 4 simultaneous connections.
 .Pp
 .Em BEWARE :
 stateful rules can be subject to denial-of-service attacks
 by a SYN-flood which opens a huge number of dynamic rules.
 The effects of such attacks can be partially limited by
 acting on a set of
 .Xr sysctl 8
 variables which control the operation of the firewall.
 .Pp
 Here is a good usage of the
 .Cm list
 command to see accounting records and timestamp information:
 .Pp
 .Dl ipfw -at list
 .Pp
 or in short form without timestamps:
 .Pp
 .Dl ipfw -a list
 .Pp
 which is equivalent to:
 .Pp
 .Dl ipfw show
 .Pp
 Next rule diverts all incoming packets from 192.168.2.0/24
 to divert port 5000:
 .Pp
 .Dl ipfw divert 5000 ip from 192.168.2.0/24 to any in
 .Pp
 .Ss TRAFFIC SHAPING
 The following rules show some of the applications of
 .Nm
 and
 .Nm dummynet
 for simulations and the like.
 .Pp
 This rule drops random incoming packets with a probability
 of 5%:
 .Pp
 .Dl "ipfw add prob 0.05 deny ip from any to any in"
 .Pp
 A similar effect can be achieved making use of
 .Nm dummynet
 pipes:
 .Pp
 .Dl "ipfw add pipe 10 ip from any to any"
 .Dl "ipfw pipe 10 config plr 0.05"
 .Pp
 We can use pipes to artificially limit bandwidth, e.g.\& on a
 machine acting as a router, if we want to limit traffic from
 local clients on 192.168.2.0/24 we do:
 .Pp
 .Dl "ipfw add pipe 1 ip from 192.168.2.0/24 to any out"
 .Dl "ipfw pipe 1 config bw 300Kbit/s queue 50KBytes"
 .Pp
 note that we use the
 .Cm out
 modifier so that the rule is not used twice.
 Remember in fact that
 .Nm
 rules are checked both on incoming and outgoing packets.
 .Pp
 Should we want to simulate a bidirectional link with bandwidth
 limitations, the correct way is the following:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any out"
 .Dl "ipfw add pipe 2 ip from any to any in"
 .Dl "ipfw pipe 1 config bw 64Kbit/s queue 10Kbytes"
 .Dl "ipfw pipe 2 config bw 64Kbit/s queue 10Kbytes"
 .Pp
 The above can be very useful, e.g.\& if you want to see how
 your fancy Web page will look for a residential user who
 is connected only through a slow link.
 You should not use only one pipe for both directions, unless
 you want to simulate a half-duplex medium (e.g.\& AppleTalk,
 Ethernet, IRDA).
 It is not necessary that both pipes have the same configuration,
 so we can also simulate asymmetric links.
 .Pp
 Should we want to verify network performance with the RED queue
 management algorithm:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any"
 .Dl "ipfw pipe 1 config bw 500Kbit/s queue 100 red 0.002/30/80/0.1"
 .Pp
 Another typical application of the traffic shaper is to
 introduce some delay in the communication.
 This can significantly affect applications which do a lot of Remote
 Procedure Calls, and where the round-trip-time of the
 connection often becomes a limiting factor much more than
 bandwidth:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any out"
 .Dl "ipfw add pipe 2 ip from any to any in"
 .Dl "ipfw pipe 1 config delay 250ms bw 1Mbit/s"
 .Dl "ipfw pipe 2 config delay 250ms bw 1Mbit/s"
 .Pp
 Per-flow queueing can be useful for a variety of purposes.
 A very simple one is counting traffic:
 .Pp
 .Dl "ipfw add pipe 1 tcp from any to any"
 .Dl "ipfw add pipe 1 udp from any to any"
 .Dl "ipfw add pipe 1 ip from any to any"
 .Dl "ipfw pipe 1 config mask all"
 .Pp
 The above set of rules will create queues (and collect
 statistics) for all traffic.
 Because the pipes have no limitations, the only effect is
 collecting statistics.
 Note that we need 3 rules, not just the last one, because
 when
 .Nm
 tries to match IP packets it will not consider ports, so we
 would not see connections on separate ports as different
 ones.
 .Pp
 A more sophisticated example is limiting the outbound traffic
 on a net with per-host limits, rather than per-network limits:
 .Pp
 .Dl "ipfw add pipe 1 ip from 192.168.2.0/24 to any out"
 .Dl "ipfw add pipe 2 ip from any to 192.168.2.0/24 in"
 .Dl "ipfw pipe 1 config mask src-ip 0x000000ff bw 200Kbit/s queue 20Kbytes"
 .Dl "ipfw pipe 2 config mask dst-ip 0x000000ff bw 200Kbit/s queue 20Kbytes"
 .Ss LOOKUP TABLES
 In the following example, we need to create several traffic bandwidth
 classes and we need different hosts/networks to fall into different classes.
 We create one pipe for each class and configure them accordingly.
 Then we create a single table and fill it with IP subnets and addresses.
 For each subnet/host we set the argument equal to the number of the pipe
 that it should use.
 Then we classify traffic using a single rule:
 .Pp
 .Dl "ipfw pipe 1 config bw 1000Kbyte/s"
 .Dl "ipfw pipe 4 config bw 4000Kbyte/s"
 .Dl "..."
 .Dl "ipfw table 1 add 192.168.2.0/24 1"
 .Dl "ipfw table 1 add 192.168.0.0/27 4"
 .Dl "ipfw table 1 add 192.168.0.2 1"
 .Dl "..."
 .Dl "ipfw add pipe tablearg ip from table(1) to any"
 .Pp
 Using the
 .Cm fwd
 action, the table entries may include hostnames and IP addresses.
 .Pp
 .Dl "ipfw table 1 add 192.168.2.0/24 10.23.2.1"
 .Dl "ipfw table 1 add 192.168.0.0/27 router1.dmz"
 .Dl "..."
 .Dl "ipfw add 100 fwd tablearg ip from any to table(1)"
 .Ss SETS OF RULES
 To add a set of rules atomically, e.g.\& set 18:
 .Pp
 .Dl "ipfw set disable 18"
 .Dl "ipfw add NN set 18 ...         # repeat as needed"
 .Dl "ipfw set enable 18"
 .Pp
 To delete a set of rules atomically the command is simply:
 .Pp
 .Dl "ipfw delete set 18"
 .Pp
 To test a ruleset and disable it and regain control if something goes wrong:
 .Pp
 .Dl "ipfw set disable 18"
 .Dl "ipfw add NN set 18 ...         # repeat as needed"
 .Dl "ipfw set enable 18; echo done; sleep 30 && ipfw set disable 18"
 .Pp
 Here if everything goes well, you press control-C before the "sleep"
 terminates, and your ruleset will be left active.
 Otherwise, e.g.\& if
 you cannot access your box, the ruleset will be disabled after
 the sleep terminates thus restoring the previous situation.
 .Pp
 To show rules of the specific set:
 .Pp
 .Dl "ipfw set 18 show"
 .Pp
 To show rules of the disabled set:
 .Pp
 .Dl "ipfw -S set 18 show"
 .Pp
 To clear a specific rule counters of the specific set:
 .Pp
 .Dl "ipfw set 18 zero NN"
 .Pp
 To delete a specific rule of the specific set:
 .Pp
 .Dl "ipfw set 18 delete NN"
 .Ss NAT, REDIRECT AND LSNAT
 First redirect all the traffic to nat instance 123:
 .Pp
 .Dl "ipfw add nat 123 all from any to any"
 .Pp
 Then to configure nat instance 123 to alias all the outgoing traffic with ip
 192.168.0.123, blocking all incoming connections, trying to keep
 same ports on both sides, clearing aliasing table on address change 
 and keeping a log of traffic/link statistics:
 .Pp
 .Dl "ipfw nat 123 config ip 192.168.0.123 log deny_in reset same_ports"
 .Pp
 Or to change address of instance 123, aliasing table will be cleared (see
 reset option):
 .Pp
 .Dl "ipfw nat 123 config ip 10.0.0.1"
 .Pp
 To see configuration of nat instance 123:
 .Pp
 .Dl "ipfw nat 123 show config"
 .Pp
 To show logs of all the instances in range 111-999:
 .Pp
 .Dl "ipfw nat 111-999 show"
 .Pp
 To see configurations of all instances:
 .Pp
 .Dl "ipfw nat show config"
 .Pp
 Or a redirect rule with mixed modes could looks like:
 .Pp
 .Dl "ipfw nat 123 config redirect_addr 10.0.0.1 10.0.0.66"
 .Dl "			 redirect_port tcp 192.168.0.1:80 500"
 .Dl "			 redirect_proto udp 192.168.1.43 192.168.1.1"
 .Dl "			 redirect_addr 192.168.0.10,192.168.0.11"
 .Dl "			 	    10.0.0.100	# LSNAT"
 .Dl "			 redirect_port tcp 192.168.0.1:80,192.168.0.10:22" 
 .Dl "			 	    500		# LSNAT"
 .Pp
 or it could be splitted in:
 .Pp
 .Dl "ipfw nat 1 config redirect_addr 10.0.0.1 10.0.0.66"
 .Dl "ipfw nat 2 config redirect_port tcp 192.168.0.1:80 500"
 .Dl "ipfw nat 3 config redirect_proto udp 192.168.1.43 192.168.1.1"
 .Dl "ipfw nat 4 config redirect_addr 192.168.0.10,192.168.0.11,192.168.0.12" 
 .Dl "				         10.0.0.100"
 .Dl "ipfw nat 5 config redirect_port tcp"
 .Dl "			192.168.0.1:80,192.168.0.10:22,192.168.0.20:25 500"
 .Pp
 .Sh SEE ALSO
 .Xr cpp 1 ,
 .Xr m4 1 ,
 .Xr altq 4 ,
 .Xr divert 4 ,
 .Xr dummynet 4 ,
 .Xr if_bridge 4 ,
 .Xr ip 4 ,
 .Xr ipfirewall 4 ,
 .Xr ng_ipfw 4 ,
 .Xr protocols 5 ,
 .Xr services 5 ,
 .Xr init 8 ,
 .Xr kldload 8 ,
 .Xr reboot 8 ,
 .Xr sysctl 8 ,
 .Xr syslogd 8
 .Sh HISTORY
 The
 .Nm
 utility first appeared in
 .Fx 2.0 .
 .Nm dummynet
 was introduced in
 .Fx 2.2.8 .
 Stateful extensions were introduced in
 .Fx 4.0 .
 .Nm ipfw2
 was introduced in Summer 2002.
 .Sh AUTHORS
 .An Ugen J. S. Antsilevich ,
 .An Poul-Henning Kamp ,
 .An Alex Nash ,
 .An Archie Cobbs ,
 .An Luigi Rizzo .
 .Pp
 .An -nosplit
 API based upon code written by
 .An Daniel Boulet
 for BSDI.
 .Pp
 .An -nosplit
 In-kernel NAT support written by
 .An Paolo Pisati Aq piso@FreeBSD.org
 as part of a Summer of Code 2005 project.
 .Pp
 Work on
 .Nm dummynet
 traffic shaper supported by Akamba Corp.
 .Sh BUGS
 The syntax has grown over the years and sometimes it might be confusing.
 Unfortunately, backward compatibility prevents cleaning up mistakes
 made in the definition of the syntax.
 .Pp
 .Em !!! WARNING !!!
 .Pp
 Misconfiguring the firewall can put your computer in an unusable state,
 possibly shutting down network services and requiring console access to
 regain control of it.
 .Pp
 Incoming packet fragments diverted by
 .Cm divert
 are reassembled before delivery to the socket.
 The action used on those packet is the one from the
 rule which matches the first fragment of the packet.
 .Pp
 Packets diverted to userland, and then reinserted by a userland process
 may lose various packet attributes.
 The packet source interface name
 will be preserved if it is shorter than 8 bytes and the userland process
 saves and reuses the sockaddr_in
 (as does
 .Xr natd 8 ) ;
 otherwise, it may be lost.
 If a packet is reinserted in this manner, later rules may be incorrectly
 applied, making the order of
 .Cm divert
 rules in the rule sequence very important.
 .Pp
 Dummynet drops all packets with IPv6 link-local addresses.
 .Pp
 Rules using
 .Cm uid
 or
 .Cm gid
 may not behave as expected.
 In particular, incoming SYN packets may
 have no uid or gid associated with them since they do not yet belong
 to a TCP connection, and the uid/gid associated with a packet may not
 be as expected if the associated process calls
 .Xr setuid 2
 or similar system calls.
 .Pp
 Rule syntax is subject to the command line environment and some patterns
 may need to be escaped with the backslash character
 or quoted appropriately.
 .Pp
 Due to the architecture of 
 .Xr libalias 3 , 
 ipfw nat is not compatible with the tcp segmentation offloading
 (TSO). Thus, to reliably nat your network traffic, please disable TSO
 on your NICs using
 .Xr ifconfig 8 .
 .Pp
 ICMP error messages are not implicitly matched by dynamic rules
 for the respective conversations.
 To avoid failures of network error detection and path MTU discovery,
 ICMP error messages may need to be allowed explicitly through static
 rules.
Index: head/sbin/ipfw/ipfw2.c
===================================================================
--- head/sbin/ipfw/ipfw2.c	(revision 178887)
+++ head/sbin/ipfw/ipfw2.c	(revision 178888)
@@ -1,6428 +1,6463 @@
 /*
  * Copyright (c) 2002-2003 Luigi Rizzo
  * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
  * Copyright (c) 1994 Ugen J.S.Antsilevich
  *
  * Idea and grammar partially left from:
  * Copyright (c) 1993 Daniel Boulet
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
  *
  * Redistribution in binary form may occur without any restrictions.
  * Obviously, it would be nice if you gave credit where credit is due
  * but requiring it would be too onerous.
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  *
  * NEW command line interface for IP firewall facility
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 #include <sys/queue.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <grp.h>
 #include <limits.h>
 #include <netdb.h>
 #include <pwd.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
 #include <timeconv.h>	/* XXX do we need this ? */
 #include <unistd.h>
 #include <sysexits.h>
 #include <unistd.h>
 #include <fcntl.h>
 
 #define IPFW_INTERNAL	/* Access to protected structures in ip_fw.h. */
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/pfvar.h>
 #include <net/route.h> /* def. of struct route */
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/tcp.h>
 #include <arpa/inet.h>
 #include <alias.h>
 
 int
 		do_value_as_ip,		/* show table value as IP */
 		do_resolv,		/* Would try to resolve all */
 		do_time,		/* Show time stamps */
 		do_quiet,		/* Be quiet in add and flush */
 		do_pipe,		/* this cmd refers to a pipe */
 	        do_nat, 		/* Nat configuration. */
 		do_sort,		/* field to sort results (0 = no) */
 		do_dynamic,		/* display dynamic rules */
 		do_expired,		/* display expired dynamic rules */
 		do_compact,		/* show rules in compact mode */
 		do_force,		/* do not ask for confirmation */
 		use_set,		/* work with specified set number */
 		show_sets,		/* display rule sets */
 		test_only,		/* only check syntax */
 		comment_only,		/* only print action and comment */
 		verbose;
 
 #define	IP_MASK_ALL	0xffffffff
 /*
  * the following macro returns an error message if we run out of
  * arguments.
  */
 #define NEED1(msg)      {if (!ac) errx(EX_USAGE, msg);}
 
 #define GET_UINT_ARG(arg, min, max, tok, s_x) do {			\
 	if (!ac)							\
 		errx(EX_USAGE, "%s: missing argument", match_value(s_x, tok)); \
 	if (_substrcmp(*av, "tablearg") == 0) {				\
 		arg = IP_FW_TABLEARG;					\
 		break;							\
 	}								\
 									\
 	{								\
 	long val;							\
 	char *end;							\
 									\
 	val = strtol(*av, &end, 10);					\
 									\
 	if (!isdigit(**av) || *end != '\0' || (val == 0 && errno == EINVAL)) \
 		errx(EX_DATAERR, "%s: invalid argument: %s",		\
 		    match_value(s_x, tok), *av);			\
 									\
 	if (errno == ERANGE || val < min || val > max)			\
 		errx(EX_DATAERR, "%s: argument is out of range (%u..%u): %s", \
 		    match_value(s_x, tok), min, max, *av);		\
 									\
 	if (val == IP_FW_TABLEARG)					\
 		errx(EX_DATAERR, "%s: illegal argument value: %s",	\
 		    match_value(s_x, tok), *av);			\
 	arg = val;							\
 	}								\
 } while (0)
 
 #define PRINT_UINT_ARG(str, arg) do {					\
 	if (str != NULL)						\
 		printf("%s",str);					\
 	if (arg == IP_FW_TABLEARG)					\
 		printf("tablearg");					\
 	else								\
 		printf("%u", (uint32_t)arg);				\
 } while (0)
 
 /*
  * _s_x is a structure that stores a string <-> token pairs, used in
  * various places in the parser. Entries are stored in arrays,
  * with an entry with s=NULL as terminator.
  * The search routines are match_token() and match_value().
  * Often, an element with x=0 contains an error string.
  *
  */
 struct _s_x {
 	char const *s;
 	int x;
 };
 
 static struct _s_x f_tcpflags[] = {
 	{ "syn", TH_SYN },
 	{ "fin", TH_FIN },
 	{ "ack", TH_ACK },
 	{ "psh", TH_PUSH },
 	{ "rst", TH_RST },
 	{ "urg", TH_URG },
 	{ "tcp flag", 0 },
 	{ NULL,	0 }
 };
 
 static struct _s_x f_tcpopts[] = {
 	{ "mss",	IP_FW_TCPOPT_MSS },
 	{ "maxseg",	IP_FW_TCPOPT_MSS },
 	{ "window",	IP_FW_TCPOPT_WINDOW },
 	{ "sack",	IP_FW_TCPOPT_SACK },
 	{ "ts",		IP_FW_TCPOPT_TS },
 	{ "timestamp",	IP_FW_TCPOPT_TS },
 	{ "cc",		IP_FW_TCPOPT_CC },
 	{ "tcp option",	0 },
 	{ NULL,	0 }
 };
 
 /*
  * IP options span the range 0 to 255 so we need to remap them
  * (though in fact only the low 5 bits are significant).
  */
 static struct _s_x f_ipopts[] = {
 	{ "ssrr",	IP_FW_IPOPT_SSRR},
 	{ "lsrr",	IP_FW_IPOPT_LSRR},
 	{ "rr",		IP_FW_IPOPT_RR},
 	{ "ts",		IP_FW_IPOPT_TS},
 	{ "ip option",	0 },
 	{ NULL,	0 }
 };
 
 static struct _s_x f_iptos[] = {
 	{ "lowdelay",	IPTOS_LOWDELAY},
 	{ "throughput",	IPTOS_THROUGHPUT},
 	{ "reliability", IPTOS_RELIABILITY},
 	{ "mincost",	IPTOS_MINCOST},
 	{ "congestion",	IPTOS_ECN_CE},
 	{ "ecntransport", IPTOS_ECN_ECT0},
 	{ "ip tos option", 0},
 	{ NULL,	0 }
 };
 
 static struct _s_x limit_masks[] = {
 	{"all",		DYN_SRC_ADDR|DYN_SRC_PORT|DYN_DST_ADDR|DYN_DST_PORT},
 	{"src-addr",	DYN_SRC_ADDR},
 	{"src-port",	DYN_SRC_PORT},
 	{"dst-addr",	DYN_DST_ADDR},
 	{"dst-port",	DYN_DST_PORT},
 	{NULL,		0}
 };
 
 /*
  * we use IPPROTO_ETHERTYPE as a fake protocol id to call the print routines
  * This is only used in this code.
  */
 #define IPPROTO_ETHERTYPE	0x1000
 static struct _s_x ether_types[] = {
     /*
      * Note, we cannot use "-:&/" in the names because they are field
      * separators in the type specifications. Also, we use s = NULL as
      * end-delimiter, because a type of 0 can be legal.
      */
 	{ "ip",		0x0800 },
 	{ "ipv4",	0x0800 },
 	{ "ipv6",	0x86dd },
 	{ "arp",	0x0806 },
 	{ "rarp",	0x8035 },
 	{ "vlan",	0x8100 },
 	{ "loop",	0x9000 },
 	{ "trail",	0x1000 },
 	{ "at",		0x809b },
 	{ "atalk",	0x809b },
 	{ "aarp",	0x80f3 },
 	{ "pppoe_disc",	0x8863 },
 	{ "pppoe_sess",	0x8864 },
 	{ "ipx_8022",	0x00E0 },
 	{ "ipx_8023",	0x0000 },
 	{ "ipx_ii",	0x8137 },
 	{ "ipx_snap",	0x8137 },
 	{ "ipx",	0x8137 },
 	{ "ns",		0x0600 },
 	{ NULL,		0 }
 };
 
 static void show_usage(void);
 
 enum tokens {
 	TOK_NULL=0,
 
 	TOK_OR,
 	TOK_NOT,
 	TOK_STARTBRACE,
 	TOK_ENDBRACE,
 
 	TOK_ACCEPT,
 	TOK_COUNT,
 	TOK_PIPE,
 	TOK_QUEUE,
 	TOK_DIVERT,
 	TOK_TEE,
 	TOK_NETGRAPH,
 	TOK_NGTEE,
 	TOK_FORWARD,
 	TOK_SKIPTO,
 	TOK_DENY,
 	TOK_REJECT,
 	TOK_RESET,
 	TOK_UNREACH,
 	TOK_CHECKSTATE,
 	TOK_NAT,
 
 	TOK_ALTQ,
 	TOK_LOG,
 	TOK_TAG,
 	TOK_UNTAG,
 
 	TOK_TAGGED,
 	TOK_UID,
 	TOK_GID,
 	TOK_JAIL,
 	TOK_IN,
 	TOK_LIMIT,
 	TOK_KEEPSTATE,
 	TOK_LAYER2,
 	TOK_OUT,
 	TOK_DIVERTED,
 	TOK_DIVERTEDLOOPBACK,
 	TOK_DIVERTEDOUTPUT,
 	TOK_XMIT,
 	TOK_RECV,
 	TOK_VIA,
 	TOK_FRAG,
 	TOK_IPOPTS,
 	TOK_IPLEN,
 	TOK_IPID,
 	TOK_IPPRECEDENCE,
 	TOK_IPTOS,
 	TOK_IPTTL,
 	TOK_IPVER,
 	TOK_ESTAB,
 	TOK_SETUP,
 	TOK_TCPDATALEN,
 	TOK_TCPFLAGS,
 	TOK_TCPOPTS,
 	TOK_TCPSEQ,
 	TOK_TCPACK,
 	TOK_TCPWIN,
 	TOK_ICMPTYPES,
 	TOK_MAC,
 	TOK_MACTYPE,
 	TOK_VERREVPATH,
 	TOK_VERSRCREACH,
 	TOK_ANTISPOOF,
 	TOK_IPSEC,
 	TOK_COMMENT,
 
 	TOK_PLR,
 	TOK_NOERROR,
 	TOK_BUCKETS,
 	TOK_DSTIP,
 	TOK_SRCIP,
 	TOK_DSTPORT,
 	TOK_SRCPORT,
 	TOK_ALL,
 	TOK_MASK,
 	TOK_BW,
 	TOK_DELAY,
 	TOK_RED,
 	TOK_GRED,
 	TOK_DROPTAIL,
 	TOK_PROTO,
 	TOK_WEIGHT,
 	TOK_IP,
 	TOK_IF,
  	TOK_ALOG,
  	TOK_DENY_INC,
  	TOK_SAME_PORTS,
  	TOK_UNREG_ONLY,
  	TOK_RESET_ADDR,
  	TOK_ALIAS_REV,
  	TOK_PROXY_ONLY,
 	TOK_REDIR_ADDR,
 	TOK_REDIR_PORT,
 	TOK_REDIR_PROTO,	
 
 	TOK_IPV6,
 	TOK_FLOWID,
 	TOK_ICMP6TYPES,
 	TOK_EXT6HDR,
 	TOK_DSTIP6,
 	TOK_SRCIP6,
 
 	TOK_IPV4,
 	TOK_UNREACH6,
 	TOK_RESET6,
+
+	TOK_FIB,
+	TOK_SETFIB,
 };
 
 struct _s_x dummynet_params[] = {
 	{ "plr",		TOK_PLR },
 	{ "noerror",		TOK_NOERROR },
 	{ "buckets",		TOK_BUCKETS },
 	{ "dst-ip",		TOK_DSTIP },
 	{ "src-ip",		TOK_SRCIP },
 	{ "dst-port",		TOK_DSTPORT },
 	{ "src-port",		TOK_SRCPORT },
 	{ "proto",		TOK_PROTO },
 	{ "weight",		TOK_WEIGHT },
 	{ "all",		TOK_ALL },
 	{ "mask",		TOK_MASK },
 	{ "droptail",		TOK_DROPTAIL },
 	{ "red",		TOK_RED },
 	{ "gred",		TOK_GRED },
 	{ "bw",			TOK_BW },
 	{ "bandwidth",		TOK_BW },
 	{ "delay",		TOK_DELAY },
 	{ "pipe",		TOK_PIPE },
 	{ "queue",		TOK_QUEUE },
 	{ "flow-id",		TOK_FLOWID},
 	{ "dst-ipv6",		TOK_DSTIP6},
 	{ "dst-ip6",		TOK_DSTIP6},
 	{ "src-ipv6",		TOK_SRCIP6},
 	{ "src-ip6",		TOK_SRCIP6},
 	{ "dummynet-params",	TOK_NULL },
 	{ NULL, 0 }	/* terminator */
 };
 
 struct _s_x nat_params[] = {
 	{ "ip",	                TOK_IP },
 	{ "if",	                TOK_IF },
  	{ "log",                TOK_ALOG },
  	{ "deny_in",	        TOK_DENY_INC },
  	{ "same_ports",	        TOK_SAME_PORTS },
  	{ "unreg_only",	        TOK_UNREG_ONLY },
  	{ "reset",	        TOK_RESET_ADDR },
  	{ "reverse",	        TOK_ALIAS_REV },	
  	{ "proxy_only",	        TOK_PROXY_ONLY },
 	{ "redirect_addr",	TOK_REDIR_ADDR },
 	{ "redirect_port",	TOK_REDIR_PORT },
 	{ "redirect_proto",	TOK_REDIR_PROTO },
  	{ NULL, 0 }	/* terminator */
 };
 
 struct _s_x rule_actions[] = {
 	{ "accept",		TOK_ACCEPT },
 	{ "pass",		TOK_ACCEPT },
 	{ "allow",		TOK_ACCEPT },
 	{ "permit",		TOK_ACCEPT },
 	{ "count",		TOK_COUNT },
 	{ "pipe",		TOK_PIPE },
 	{ "queue",		TOK_QUEUE },
 	{ "divert",		TOK_DIVERT },
 	{ "tee",		TOK_TEE },
 	{ "netgraph",		TOK_NETGRAPH },
 	{ "ngtee",		TOK_NGTEE },
 	{ "fwd",		TOK_FORWARD },
 	{ "forward",		TOK_FORWARD },
 	{ "skipto",		TOK_SKIPTO },
 	{ "deny",		TOK_DENY },
 	{ "drop",		TOK_DENY },
 	{ "reject",		TOK_REJECT },
 	{ "reset6",		TOK_RESET6 },
 	{ "reset",		TOK_RESET },
 	{ "unreach6",		TOK_UNREACH6 },
 	{ "unreach",		TOK_UNREACH },
 	{ "check-state",	TOK_CHECKSTATE },
 	{ "//",			TOK_COMMENT },
 	{ "nat",                TOK_NAT },
+	{ "setfib",		TOK_SETFIB },
 	{ NULL, 0 }	/* terminator */
 };
 
 struct _s_x rule_action_params[] = {
 	{ "altq",		TOK_ALTQ },
 	{ "log",		TOK_LOG },
 	{ "tag",		TOK_TAG },
 	{ "untag",		TOK_UNTAG },
 	{ NULL, 0 }	/* terminator */
 };
 
 struct _s_x rule_options[] = {
 	{ "tagged",		TOK_TAGGED },
 	{ "uid",		TOK_UID },
 	{ "gid",		TOK_GID },
 	{ "jail",		TOK_JAIL },
 	{ "in",			TOK_IN },
 	{ "limit",		TOK_LIMIT },
 	{ "keep-state",		TOK_KEEPSTATE },
 	{ "bridged",		TOK_LAYER2 },
 	{ "layer2",		TOK_LAYER2 },
 	{ "out",		TOK_OUT },
 	{ "diverted",		TOK_DIVERTED },
 	{ "diverted-loopback",	TOK_DIVERTEDLOOPBACK },
 	{ "diverted-output",	TOK_DIVERTEDOUTPUT },
 	{ "xmit",		TOK_XMIT },
 	{ "recv",		TOK_RECV },
 	{ "via",		TOK_VIA },
 	{ "fragment",		TOK_FRAG },
 	{ "frag",		TOK_FRAG },
+	{ "fib",		TOK_FIB },
 	{ "ipoptions",		TOK_IPOPTS },
 	{ "ipopts",		TOK_IPOPTS },
 	{ "iplen",		TOK_IPLEN },
 	{ "ipid",		TOK_IPID },
 	{ "ipprecedence",	TOK_IPPRECEDENCE },
 	{ "iptos",		TOK_IPTOS },
 	{ "ipttl",		TOK_IPTTL },
 	{ "ipversion",		TOK_IPVER },
 	{ "ipver",		TOK_IPVER },
 	{ "estab",		TOK_ESTAB },
 	{ "established",	TOK_ESTAB },
 	{ "setup",		TOK_SETUP },
 	{ "tcpdatalen",		TOK_TCPDATALEN },
 	{ "tcpflags",		TOK_TCPFLAGS },
 	{ "tcpflgs",		TOK_TCPFLAGS },
 	{ "tcpoptions",		TOK_TCPOPTS },
 	{ "tcpopts",		TOK_TCPOPTS },
 	{ "tcpseq",		TOK_TCPSEQ },
 	{ "tcpack",		TOK_TCPACK },
 	{ "tcpwin",		TOK_TCPWIN },
 	{ "icmptype",		TOK_ICMPTYPES },
 	{ "icmptypes",		TOK_ICMPTYPES },
 	{ "dst-ip",		TOK_DSTIP },
 	{ "src-ip",		TOK_SRCIP },
 	{ "dst-port",		TOK_DSTPORT },
 	{ "src-port",		TOK_SRCPORT },
 	{ "proto",		TOK_PROTO },
 	{ "MAC",		TOK_MAC },
 	{ "mac",		TOK_MAC },
 	{ "mac-type",		TOK_MACTYPE },
 	{ "verrevpath",		TOK_VERREVPATH },
 	{ "versrcreach",	TOK_VERSRCREACH },
 	{ "antispoof",		TOK_ANTISPOOF },
 	{ "ipsec",		TOK_IPSEC },
 	{ "icmp6type",		TOK_ICMP6TYPES },
 	{ "icmp6types",		TOK_ICMP6TYPES },
 	{ "ext6hdr",		TOK_EXT6HDR},
 	{ "flow-id",		TOK_FLOWID},
 	{ "ipv6",		TOK_IPV6},
 	{ "ip6",		TOK_IPV6},
 	{ "ipv4",		TOK_IPV4},
 	{ "ip4",		TOK_IPV4},
 	{ "dst-ipv6",		TOK_DSTIP6},
 	{ "dst-ip6",		TOK_DSTIP6},
 	{ "src-ipv6",		TOK_SRCIP6},
 	{ "src-ip6",		TOK_SRCIP6},
 	{ "//",			TOK_COMMENT },
 
 	{ "not",		TOK_NOT },		/* pseudo option */
 	{ "!", /* escape ? */	TOK_NOT },		/* pseudo option */
 	{ "or",			TOK_OR },		/* pseudo option */
 	{ "|", /* escape */	TOK_OR },		/* pseudo option */
 	{ "{",			TOK_STARTBRACE },	/* pseudo option */
 	{ "(",			TOK_STARTBRACE },	/* pseudo option */
 	{ "}",			TOK_ENDBRACE },		/* pseudo option */
 	{ ")",			TOK_ENDBRACE },		/* pseudo option */
 	{ NULL, 0 }	/* terminator */
 };
 
 #define	TABLEARG	"tablearg"
 
 static __inline uint64_t
 align_uint64(uint64_t *pll) {
 	uint64_t ret;
 
 	bcopy (pll, &ret, sizeof(ret));
 	return ret;
 }
 
 /*
  * conditionally runs the command.
  */
 static int
 do_cmd(int optname, void *optval, uintptr_t optlen)
 {
 	static int s = -1;	/* the socket */
 	int i;
 
 	if (test_only)
 		return 0;
 
 	if (s == -1)
 		s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
 	if (s < 0)
 		err(EX_UNAVAILABLE, "socket");
 
 	if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET ||
 	    optname == IP_FW_ADD || optname == IP_FW_TABLE_LIST ||
 	    optname == IP_FW_TABLE_GETSIZE || 
 	    optname == IP_FW_NAT_GET_CONFIG || 
 	    optname == IP_FW_NAT_GET_LOG)
 		i = getsockopt(s, IPPROTO_IP, optname, optval,
 			(socklen_t *)optlen);
 	else
 		i = setsockopt(s, IPPROTO_IP, optname, optval, optlen);
 	return i;
 }
 
 /**
  * match_token takes a table and a string, returns the value associated
  * with the string (-1 in case of failure).
  */
 static int
 match_token(struct _s_x *table, char *string)
 {
 	struct _s_x *pt;
 	uint i = strlen(string);
 
 	for (pt = table ; i && pt->s != NULL ; pt++)
 		if (strlen(pt->s) == i && !bcmp(string, pt->s, i))
 			return pt->x;
 	return -1;
 }
 
 /**
  * match_value takes a table and a value, returns the string associated
  * with the value (NULL in case of failure).
  */
 static char const *
 match_value(struct _s_x *p, int value)
 {
 	for (; p->s != NULL; p++)
 		if (p->x == value)
 			return p->s;
 	return NULL;
 }
 
 /*
  * _substrcmp takes two strings and returns 1 if they do not match,
  * and 0 if they match exactly or the first string is a sub-string
  * of the second.  A warning is printed to stderr in the case that the
  * first string is a sub-string of the second.
  *
  * This function will be removed in the future through the usual
  * deprecation process.
  */
 static int
 _substrcmp(const char *str1, const char* str2)
 {
 	
 	if (strncmp(str1, str2, strlen(str1)) != 0)
 		return 1;
 
 	if (strlen(str1) != strlen(str2))
 		warnx("DEPRECATED: '%s' matched '%s' as a sub-string",
 		    str1, str2);
 	return 0;
 }
 
 /*
  * _substrcmp2 takes three strings and returns 1 if the first two do not match,
  * and 0 if they match exactly or the second string is a sub-string
  * of the first.  A warning is printed to stderr in the case that the
  * first string does not match the third.
  *
  * This function exists to warn about the bizzare construction
  * strncmp(str, "by", 2) which is used to allow people to use a shotcut
  * for "bytes".  The problem is that in addition to accepting "by",
  * "byt", "byte", and "bytes", it also excepts "by_rabid_dogs" and any
  * other string beginning with "by".
  *
  * This function will be removed in the future through the usual
  * deprecation process.
  */
 static int
 _substrcmp2(const char *str1, const char* str2, const char* str3)
 {
 	
 	if (strncmp(str1, str2, strlen(str2)) != 0)
 		return 1;
 
 	if (strcmp(str1, str3) != 0)
 		warnx("DEPRECATED: '%s' matched '%s'",
 		    str1, str3);
 	return 0;
 }
 
 /*
  * prints one port, symbolic or numeric
  */
 static void
 print_port(int proto, uint16_t port)
 {
 
 	if (proto == IPPROTO_ETHERTYPE) {
 		char const *s;
 
 		if (do_resolv && (s = match_value(ether_types, port)) )
 			printf("%s", s);
 		else
 			printf("0x%04x", port);
 	} else {
 		struct servent *se = NULL;
 		if (do_resolv) {
 			struct protoent *pe = getprotobynumber(proto);
 
 			se = getservbyport(htons(port), pe ? pe->p_name : NULL);
 		}
 		if (se)
 			printf("%s", se->s_name);
 		else
 			printf("%d", port);
 	}
 }
 
 struct _s_x _port_name[] = {
 	{"dst-port",	O_IP_DSTPORT},
 	{"src-port",	O_IP_SRCPORT},
 	{"ipid",	O_IPID},
 	{"iplen",	O_IPLEN},
 	{"ipttl",	O_IPTTL},
 	{"mac-type",	O_MAC_TYPE},
 	{"tcpdatalen",	O_TCPDATALEN},
 	{"tagged",	O_TAGGED},
 	{NULL,		0}
 };
 
 /*
  * Print the values in a list 16-bit items of the types above.
  * XXX todo: add support for mask.
  */
 static void
 print_newports(ipfw_insn_u16 *cmd, int proto, int opcode)
 {
 	uint16_t *p = cmd->ports;
 	int i;
 	char const *sep;
 
 	if (opcode != 0) {
 		sep = match_value(_port_name, opcode);
 		if (sep == NULL)
 			sep = "???";
 		printf (" %s", sep);
 	}
 	sep = " ";
 	for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) {
 		printf(sep);
 		print_port(proto, p[0]);
 		if (p[0] != p[1]) {
 			printf("-");
 			print_port(proto, p[1]);
 		}
 		sep = ",";
 	}
 }
 
 /*
  * Like strtol, but also translates service names into port numbers
  * for some protocols.
  * In particular:
  *	proto == -1 disables the protocol check;
  *	proto == IPPROTO_ETHERTYPE looks up an internal table
  *	proto == <some value in /etc/protocols> matches the values there.
  * Returns *end == s in case the parameter is not found.
  */
 static int
 strtoport(char *s, char **end, int base, int proto)
 {
 	char *p, *buf;
 	char *s1;
 	int i;
 
 	*end = s;		/* default - not found */
 	if (*s == '\0')
 		return 0;	/* not found */
 
 	if (isdigit(*s))
 		return strtol(s, end, base);
 
 	/*
 	 * find separator. '\\' escapes the next char.
 	 */
 	for (s1 = s; *s1 && (isalnum(*s1) || *s1 == '\\') ; s1++)
 		if (*s1 == '\\' && s1[1] != '\0')
 			s1++;
 
 	buf = malloc(s1 - s + 1);
 	if (buf == NULL)
 		return 0;
 
 	/*
 	 * copy into a buffer skipping backslashes
 	 */
 	for (p = s, i = 0; p != s1 ; p++)
 		if (*p != '\\')
 			buf[i++] = *p;
 	buf[i++] = '\0';
 
 	if (proto == IPPROTO_ETHERTYPE) {
 		i = match_token(ether_types, buf);
 		free(buf);
 		if (i != -1) {	/* found */
 			*end = s1;
 			return i;
 		}
 	} else {
 		struct protoent *pe = NULL;
 		struct servent *se;
 
 		if (proto != 0)
 			pe = getprotobynumber(proto);
 		setservent(1);
 		se = getservbyname(buf, pe ? pe->p_name : NULL);
 		free(buf);
 		if (se != NULL) {
 			*end = s1;
 			return ntohs(se->s_port);
 		}
 	}
 	return 0;	/* not found */
 }
 
 /*
  * Map between current altq queue id numbers and names.
  */
 static int altq_fetched = 0;
 static TAILQ_HEAD(, pf_altq) altq_entries = 
 	TAILQ_HEAD_INITIALIZER(altq_entries);
 
 static void
 altq_set_enabled(int enabled)
 {
 	int pffd;
 
 	pffd = open("/dev/pf", O_RDWR);
 	if (pffd == -1)
 		err(EX_UNAVAILABLE,
 		    "altq support opening pf(4) control device");
 	if (enabled) {
 		if (ioctl(pffd, DIOCSTARTALTQ) != 0 && errno != EEXIST)
 			err(EX_UNAVAILABLE, "enabling altq");
 	} else {
 		if (ioctl(pffd, DIOCSTOPALTQ) != 0 && errno != ENOENT)
 			err(EX_UNAVAILABLE, "disabling altq");
 	}
 	close(pffd);
 }
 
 static void
 altq_fetch()
 {
 	struct pfioc_altq pfioc;
 	struct pf_altq *altq;
 	int pffd, mnr;
 
 	if (altq_fetched)
 		return;
 	altq_fetched = 1;
 	pffd = open("/dev/pf", O_RDONLY);
 	if (pffd == -1) {
 		warn("altq support opening pf(4) control device");
 		return;
 	}
 	bzero(&pfioc, sizeof(pfioc));
 	if (ioctl(pffd, DIOCGETALTQS, &pfioc) != 0) {
 		warn("altq support getting queue list");
 		close(pffd);
 		return;
 	}
 	mnr = pfioc.nr;
 	for (pfioc.nr = 0; pfioc.nr < mnr; pfioc.nr++) {
 		if (ioctl(pffd, DIOCGETALTQ, &pfioc) != 0) {
 			if (errno == EBUSY)
 				break;
 			warn("altq support getting queue list");
 			close(pffd);
 			return;
 		}
 		if (pfioc.altq.qid == 0)
 			continue;
 		altq = malloc(sizeof(*altq));
 		if (altq == NULL)
 			err(EX_OSERR, "malloc");
 		*altq = pfioc.altq;
 		TAILQ_INSERT_TAIL(&altq_entries, altq, entries);
 	}
 	close(pffd);
 }
 
 static u_int32_t
 altq_name_to_qid(const char *name)
 {
 	struct pf_altq *altq;
 
 	altq_fetch();
 	TAILQ_FOREACH(altq, &altq_entries, entries)
 		if (strcmp(name, altq->qname) == 0)
 			break;
 	if (altq == NULL)
 		errx(EX_DATAERR, "altq has no queue named `%s'", name);
 	return altq->qid;
 }
 
 static const char *
 altq_qid_to_name(u_int32_t qid)
 {
 	struct pf_altq *altq;
 
 	altq_fetch();
 	TAILQ_FOREACH(altq, &altq_entries, entries)
 		if (qid == altq->qid)
 			break;
 	if (altq == NULL)
 		return NULL;
 	return altq->qname;
 }
 
 static void
 fill_altq_qid(u_int32_t *qid, const char *av)
 {
 	*qid = altq_name_to_qid(av);
 }
 
 /*
  * Fill the body of the command with the list of port ranges.
  */
 static int
 fill_newports(ipfw_insn_u16 *cmd, char *av, int proto)
 {
 	uint16_t a, b, *p = cmd->ports;
 	int i = 0;
 	char *s = av;
 
 	while (*s) {
 		a = strtoport(av, &s, 0, proto);
 		if (s == av) 			/* empty or invalid argument */
 			return (0);
 
 		switch (*s) {
 		case '-':			/* a range */
 			av = s + 1;
 			b = strtoport(av, &s, 0, proto);
 			/* Reject expressions like '1-abc' or '1-2-3'. */
 			if (s == av || (*s != ',' && *s != '\0'))
 				return (0);
 			p[0] = a;
 			p[1] = b;
 			break;
 		case ',':			/* comma separated list */
 		case '\0':
 			p[0] = p[1] = a;
 			break;
 		default:
 			warnx("port list: invalid separator <%c> in <%s>",
 				*s, av);
 			return (0);
 		}
 
 		i++;
 		p += 2;
 		av = s + 1;
 	}
 	if (i > 0) {
 		if (i + 1 > F_LEN_MASK)
 			errx(EX_DATAERR, "too many ports/ranges\n");
 		cmd->o.len |= i + 1;	/* leave F_NOT and F_OR untouched */
 	}
 	return (i);
 }
 
 static struct _s_x icmpcodes[] = {
       { "net",			ICMP_UNREACH_NET },
       { "host",			ICMP_UNREACH_HOST },
       { "protocol",		ICMP_UNREACH_PROTOCOL },
       { "port",			ICMP_UNREACH_PORT },
       { "needfrag",		ICMP_UNREACH_NEEDFRAG },
       { "srcfail",		ICMP_UNREACH_SRCFAIL },
       { "net-unknown",		ICMP_UNREACH_NET_UNKNOWN },
       { "host-unknown",		ICMP_UNREACH_HOST_UNKNOWN },
       { "isolated",		ICMP_UNREACH_ISOLATED },
       { "net-prohib",		ICMP_UNREACH_NET_PROHIB },
       { "host-prohib",		ICMP_UNREACH_HOST_PROHIB },
       { "tosnet",		ICMP_UNREACH_TOSNET },
       { "toshost",		ICMP_UNREACH_TOSHOST },
       { "filter-prohib",	ICMP_UNREACH_FILTER_PROHIB },
       { "host-precedence",	ICMP_UNREACH_HOST_PRECEDENCE },
       { "precedence-cutoff",	ICMP_UNREACH_PRECEDENCE_CUTOFF },
       { NULL, 0 }
 };
 
 static void
 fill_reject_code(u_short *codep, char *str)
 {
 	int val;
 	char *s;
 
 	val = strtoul(str, &s, 0);
 	if (s == str || *s != '\0' || val >= 0x100)
 		val = match_token(icmpcodes, str);
 	if (val < 0)
 		errx(EX_DATAERR, "unknown ICMP unreachable code ``%s''", str);
 	*codep = val;
 	return;
 }
 
 static void
 print_reject_code(uint16_t code)
 {
 	char const *s = match_value(icmpcodes, code);
 
 	if (s != NULL)
 		printf("unreach %s", s);
 	else
 		printf("unreach %u", code);
 }
 
 static struct _s_x icmp6codes[] = {
       { "no-route",		ICMP6_DST_UNREACH_NOROUTE },
       { "admin-prohib",		ICMP6_DST_UNREACH_ADMIN },
       { "address",		ICMP6_DST_UNREACH_ADDR },
       { "port",			ICMP6_DST_UNREACH_NOPORT },
       { NULL, 0 }
 };
 
 static void
 fill_unreach6_code(u_short *codep, char *str)
 {
 	int val;
 	char *s;
 
 	val = strtoul(str, &s, 0);
 	if (s == str || *s != '\0' || val >= 0x100)
 		val = match_token(icmp6codes, str);
 	if (val < 0)
 		errx(EX_DATAERR, "unknown ICMPv6 unreachable code ``%s''", str);
 	*codep = val;
 	return;
 }
 
 static void
 print_unreach6_code(uint16_t code)
 {
 	char const *s = match_value(icmp6codes, code);
 
 	if (s != NULL)
 		printf("unreach6 %s", s);
 	else
 		printf("unreach6 %u", code);
 }
 
 /*
  * Returns the number of bits set (from left) in a contiguous bitmask,
  * or -1 if the mask is not contiguous.
  * XXX this needs a proper fix.
  * This effectively works on masks in big-endian (network) format.
  * when compiled on little endian architectures.
  *
  * First bit is bit 7 of the first byte -- note, for MAC addresses,
  * the first bit on the wire is bit 0 of the first byte.
  * len is the max length in bits.
  */
 static int
 contigmask(uint8_t *p, int len)
 {
 	int i, n;
 
 	for (i=0; i<len ; i++)
 		if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
 			break;
 	for (n=i+1; n < len; n++)
 		if ( (p[n/8] & (1 << (7 - (n%8)))) != 0)
 			return -1; /* mask not contiguous */
 	return i;
 }
 
 /*
  * print flags set/clear in the two bitmasks passed as parameters.
  * There is a specialized check for f_tcpflags.
  */
 static void
 print_flags(char const *name, ipfw_insn *cmd, struct _s_x *list)
 {
 	char const *comma = "";
 	int i;
 	uint8_t set = cmd->arg1 & 0xff;
 	uint8_t clear = (cmd->arg1 >> 8) & 0xff;
 
 	if (list == f_tcpflags && set == TH_SYN && clear == TH_ACK) {
 		printf(" setup");
 		return;
 	}
 
 	printf(" %s ", name);
 	for (i=0; list[i].x != 0; i++) {
 		if (set & list[i].x) {
 			set &= ~list[i].x;
 			printf("%s%s", comma, list[i].s);
 			comma = ",";
 		}
 		if (clear & list[i].x) {
 			clear &= ~list[i].x;
 			printf("%s!%s", comma, list[i].s);
 			comma = ",";
 		}
 	}
 }
 
 /*
  * Print the ip address contained in a command.
  */
 static void
 print_ip(ipfw_insn_ip *cmd, char const *s)
 {
 	struct hostent *he = NULL;
 	int len = F_LEN((ipfw_insn *)cmd);
 	uint32_t *a = ((ipfw_insn_u32 *)cmd)->d;
 
 	printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s);
 
 	if (cmd->o.opcode == O_IP_SRC_ME || cmd->o.opcode == O_IP_DST_ME) {
 		printf("me");
 		return;
 	}
 	if (cmd->o.opcode == O_IP_SRC_LOOKUP ||
 	    cmd->o.opcode == O_IP_DST_LOOKUP) {
 		printf("table(%u", ((ipfw_insn *)cmd)->arg1);
 		if (len == F_INSN_SIZE(ipfw_insn_u32))
 			printf(",%u", *a);
 		printf(")");
 		return;
 	}
 	if (cmd->o.opcode == O_IP_SRC_SET || cmd->o.opcode == O_IP_DST_SET) {
 		uint32_t x, *map = (uint32_t *)&(cmd->mask);
 		int i, j;
 		char comma = '{';
 
 		x = cmd->o.arg1 - 1;
 		x = htonl( ~x );
 		cmd->addr.s_addr = htonl(cmd->addr.s_addr);
 		printf("%s/%d", inet_ntoa(cmd->addr),
 			contigmask((uint8_t *)&x, 32));
 		x = cmd->addr.s_addr = htonl(cmd->addr.s_addr);
 		x &= 0xff; /* base */
 		/*
 		 * Print bits and ranges.
 		 * Locate first bit set (i), then locate first bit unset (j).
 		 * If we have 3+ consecutive bits set, then print them as a
 		 * range, otherwise only print the initial bit and rescan.
 		 */
 		for (i=0; i < cmd->o.arg1; i++)
 			if (map[i/32] & (1<<(i & 31))) {
 				for (j=i+1; j < cmd->o.arg1; j++)
 					if (!(map[ j/32] & (1<<(j & 31))))
 						break;
 				printf("%c%d", comma, i+x);
 				if (j>i+2) { /* range has at least 3 elements */
 					printf("-%d", j-1+x);
 					i = j-1;
 				}
 				comma = ',';
 			}
 		printf("}");
 		return;
 	}
 	/*
 	 * len == 2 indicates a single IP, whereas lists of 1 or more
 	 * addr/mask pairs have len = (2n+1). We convert len to n so we
 	 * use that to count the number of entries.
 	 */
     for (len = len / 2; len > 0; len--, a += 2) {
 	int mb =	/* mask length */
 	    (cmd->o.opcode == O_IP_SRC || cmd->o.opcode == O_IP_DST) ?
 		32 : contigmask((uint8_t *)&(a[1]), 32);
 	if (mb == 32 && do_resolv)
 		he = gethostbyaddr((char *)&(a[0]), sizeof(u_long), AF_INET);
 	if (he != NULL)		/* resolved to name */
 		printf("%s", he->h_name);
 	else if (mb == 0)	/* any */
 		printf("any");
 	else {		/* numeric IP followed by some kind of mask */
 		printf("%s", inet_ntoa( *((struct in_addr *)&a[0]) ) );
 		if (mb < 0)
 			printf(":%s", inet_ntoa( *((struct in_addr *)&a[1]) ) );
 		else if (mb < 32)
 			printf("/%d", mb);
 	}
 	if (len > 1)
 		printf(",");
     }
 }
 
 /*
  * prints a MAC address/mask pair
  */
 static void
 print_mac(uint8_t *addr, uint8_t *mask)
 {
 	int l = contigmask(mask, 48);
 
 	if (l == 0)
 		printf(" any");
 	else {
 		printf(" %02x:%02x:%02x:%02x:%02x:%02x",
 		    addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
 		if (l == -1)
 			printf("&%02x:%02x:%02x:%02x:%02x:%02x",
 			    mask[0], mask[1], mask[2],
 			    mask[3], mask[4], mask[5]);
 		else if (l < 48)
 			printf("/%d", l);
 	}
 }
 
 static void
 fill_icmptypes(ipfw_insn_u32 *cmd, char *av)
 {
 	uint8_t type;
 
 	cmd->d[0] = 0;
 	while (*av) {
 		if (*av == ',')
 			av++;
 
 		type = strtoul(av, &av, 0);
 
 		if (*av != ',' && *av != '\0')
 			errx(EX_DATAERR, "invalid ICMP type");
 
 		if (type > 31)
 			errx(EX_DATAERR, "ICMP type out of range");
 
 		cmd->d[0] |= 1 << type;
 	}
 	cmd->o.opcode = O_ICMPTYPE;
 	cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32);
 }
 
 static void
 print_icmptypes(ipfw_insn_u32 *cmd)
 {
 	int i;
 	char sep= ' ';
 
 	printf(" icmptypes");
 	for (i = 0; i < 32; i++) {
 		if ( (cmd->d[0] & (1 << (i))) == 0)
 			continue;
 		printf("%c%d", sep, i);
 		sep = ',';
 	}
 }
 
 /* 
  * Print the ip address contained in a command.
  */
 static void
 print_ip6(ipfw_insn_ip6 *cmd, char const *s)
 {
        struct hostent *he = NULL;
        int len = F_LEN((ipfw_insn *) cmd) - 1;
        struct in6_addr *a = &(cmd->addr6);
        char trad[255];
 
        printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s);
 
        if (cmd->o.opcode == O_IP6_SRC_ME || cmd->o.opcode == O_IP6_DST_ME) {
                printf("me6");
                return;
        }
        if (cmd->o.opcode == O_IP6) {
                printf(" ip6");
                return;
        }
 
        /*
         * len == 4 indicates a single IP, whereas lists of 1 or more
         * addr/mask pairs have len = (2n+1). We convert len to n so we
         * use that to count the number of entries.
         */
 
        for (len = len / 4; len > 0; len -= 2, a += 2) {
            int mb =        /* mask length */
                (cmd->o.opcode == O_IP6_SRC || cmd->o.opcode == O_IP6_DST) ?
                128 : contigmask((uint8_t *)&(a[1]), 128);
 
            if (mb == 128 && do_resolv)
                he = gethostbyaddr((char *)a, sizeof(*a), AF_INET6);
            if (he != NULL)             /* resolved to name */
                printf("%s", he->h_name);
            else if (mb == 0)           /* any */
                printf("any");
            else {          /* numeric IP followed by some kind of mask */
                if (inet_ntop(AF_INET6,  a, trad, sizeof( trad ) ) == NULL)
                    printf("Error ntop in print_ip6\n");
                printf("%s",  trad );
                if (mb < 0)     /* XXX not really legal... */
                    printf(":%s",
                        inet_ntop(AF_INET6, &a[1], trad, sizeof(trad)));
                else if (mb < 128)
                    printf("/%d", mb);
            }
            if (len > 2)
                printf(",");
        }
 }
 
 static void
 fill_icmp6types(ipfw_insn_icmp6 *cmd, char *av)
 {
        uint8_t type;
 
        bzero(cmd, sizeof(*cmd));
        while (*av) {
            if (*av == ',')
                av++;
            type = strtoul(av, &av, 0);
            if (*av != ',' && *av != '\0')
                errx(EX_DATAERR, "invalid ICMP6 type");
 	   /*
 	    * XXX: shouldn't this be 0xFF?  I can't see any reason why
 	    * we shouldn't be able to filter all possiable values
 	    * regardless of the ability of the rest of the kernel to do
 	    * anything useful with them.
 	    */
            if (type > ICMP6_MAXTYPE)
                errx(EX_DATAERR, "ICMP6 type out of range");
            cmd->d[type / 32] |= ( 1 << (type % 32));
        }
        cmd->o.opcode = O_ICMP6TYPE;
        cmd->o.len |= F_INSN_SIZE(ipfw_insn_icmp6);
 }
 
 
 static void
 print_icmp6types(ipfw_insn_u32 *cmd)
 {
        int i, j;
        char sep= ' ';
 
        printf(" ip6 icmp6types");
        for (i = 0; i < 7; i++)
                for (j=0; j < 32; ++j) {
                        if ( (cmd->d[i] & (1 << (j))) == 0)
                                continue;
                        printf("%c%d", sep, (i*32 + j));
                        sep = ',';
                }
 }
 
 static void
 print_flow6id( ipfw_insn_u32 *cmd)
 {
        uint16_t i, limit = cmd->o.arg1;
        char sep = ',';
 
        printf(" flow-id ");
        for( i=0; i < limit; ++i) {
                if (i == limit - 1)
                        sep = ' ';
                printf("%d%c", cmd->d[i], sep);
        }
 }
 
 /* structure and define for the extension header in ipv6 */
 static struct _s_x ext6hdrcodes[] = {
        { "frag",       EXT_FRAGMENT },
        { "hopopt",     EXT_HOPOPTS },
        { "route",      EXT_ROUTING },
        { "dstopt",     EXT_DSTOPTS },
        { "ah",         EXT_AH },
        { "esp",        EXT_ESP },
        { "rthdr0",     EXT_RTHDR0 },
        { "rthdr2",     EXT_RTHDR2 },
        { NULL,         0 }
 };
 
 /* fills command for the extension header filtering */
 int
 fill_ext6hdr( ipfw_insn *cmd, char *av)
 {
        int tok;
        char *s = av;
 
        cmd->arg1 = 0;
 
        while(s) {
            av = strsep( &s, ",") ;
            tok = match_token(ext6hdrcodes, av);
            switch (tok) {
            case EXT_FRAGMENT:
                cmd->arg1 |= EXT_FRAGMENT;
                break;
 
            case EXT_HOPOPTS:
                cmd->arg1 |= EXT_HOPOPTS;
                break;
 
            case EXT_ROUTING:
                cmd->arg1 |= EXT_ROUTING;
                break;
 
            case EXT_DSTOPTS:
                cmd->arg1 |= EXT_DSTOPTS;
                break;
 
            case EXT_AH:
                cmd->arg1 |= EXT_AH;
                break;
 
            case EXT_ESP:
                cmd->arg1 |= EXT_ESP;
                break;
 
            case EXT_RTHDR0:
                cmd->arg1 |= EXT_RTHDR0;
                break;
 
            case EXT_RTHDR2:
                cmd->arg1 |= EXT_RTHDR2;
                break;
 
            default:
                errx( EX_DATAERR, "invalid option for ipv6 exten header" );
                break;
            }
        }
        if (cmd->arg1 == 0 )
            return 0;
        cmd->opcode = O_EXT_HDR;
        cmd->len |= F_INSN_SIZE( ipfw_insn );
        return 1;
 }
 
 void
 print_ext6hdr( ipfw_insn *cmd )
 {
        char sep = ' ';
 
        printf(" extension header:");
        if (cmd->arg1 & EXT_FRAGMENT ) {
            printf("%cfragmentation", sep);
            sep = ',';
        }
        if (cmd->arg1 & EXT_HOPOPTS ) {
            printf("%chop options", sep);
            sep = ',';
        }
        if (cmd->arg1 & EXT_ROUTING ) {
            printf("%crouting options", sep);
            sep = ',';
        }
        if (cmd->arg1 & EXT_RTHDR0 ) {
            printf("%crthdr0", sep);
            sep = ',';
        }
        if (cmd->arg1 & EXT_RTHDR2 ) {
            printf("%crthdr2", sep);
            sep = ',';
        }
        if (cmd->arg1 & EXT_DSTOPTS ) {
            printf("%cdestination options", sep);
            sep = ',';
        }
        if (cmd->arg1 & EXT_AH ) {
            printf("%cauthentication header", sep);
            sep = ',';
        }
        if (cmd->arg1 & EXT_ESP ) {
            printf("%cencapsulated security payload", sep);
        }
 }
 
 /*
  * show_ipfw() prints the body of an ipfw rule.
  * Because the standard rule has at least proto src_ip dst_ip, we use
  * a helper function to produce these entries if not provided explicitly.
  * The first argument is the list of fields we have, the second is
  * the list of fields we want to be printed.
  *
  * Special cases if we have provided a MAC header:
  *   + if the rule does not contain IP addresses/ports, do not print them;
  *   + if the rule does not contain an IP proto, print "all" instead of "ip";
  *
  * Once we have 'have_options', IP header fields are printed as options.
  */
 #define	HAVE_PROTO	0x0001
 #define	HAVE_SRCIP	0x0002
 #define	HAVE_DSTIP	0x0004
 #define	HAVE_PROTO4	0x0008
 #define	HAVE_PROTO6	0x0010
 #define	HAVE_OPTIONS	0x8000
 
 #define	HAVE_IP		(HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP)
 static void
 show_prerequisites(int *flags, int want, int cmd)
 {
 	if (comment_only)
 		return;
 	if ( (*flags & HAVE_IP) == HAVE_IP)
 		*flags |= HAVE_OPTIONS;
 
 	if ( !(*flags & HAVE_OPTIONS)) {
 		if ( !(*flags & HAVE_PROTO) && (want & HAVE_PROTO))
 			if ( (*flags & HAVE_PROTO4))
 				printf(" ip4");
 			else if ( (*flags & HAVE_PROTO6))
 				printf(" ip6");
 			else
 				printf(" ip");
 
 		if ( !(*flags & HAVE_SRCIP) && (want & HAVE_SRCIP))
 			printf(" from any");
 		if ( !(*flags & HAVE_DSTIP) && (want & HAVE_DSTIP))
 			printf(" to any");
 	}
 	*flags |= want;
 }
 
 static void
 show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth)
 {
 	static int twidth = 0;
 	int l;
 	ipfw_insn *cmd, *tagptr = NULL;
 	char *comment = NULL;	/* ptr to comment if we have one */
 	int proto = 0;		/* default */
 	int flags = 0;	/* prerequisites */
 	ipfw_insn_log *logptr = NULL; /* set if we find an O_LOG */
 	ipfw_insn_altq *altqptr = NULL; /* set if we find an O_ALTQ */
 	int or_block = 0;	/* we are in an or block */
 	uint32_t set_disable;
 
 	bcopy(&rule->next_rule, &set_disable, sizeof(set_disable));
 
 	if (set_disable & (1 << rule->set)) { /* disabled */
 		if (!show_sets)
 			return;
 		else
 			printf("# DISABLED ");
 	}
 	printf("%05u ", rule->rulenum);
 
 	if (pcwidth>0 || bcwidth>0)
 		printf("%*llu %*llu ", pcwidth, align_uint64(&rule->pcnt),
 		    bcwidth, align_uint64(&rule->bcnt));
 
 	if (do_time == 2)
 		printf("%10u ", rule->timestamp);
 	else if (do_time == 1) {
 		char timestr[30];
 		time_t t = (time_t)0;
 
 		if (twidth == 0) {
 			strcpy(timestr, ctime(&t));
 			*strchr(timestr, '\n') = '\0';
 			twidth = strlen(timestr);
 		}
 		if (rule->timestamp) {
 			t = _long_to_time(rule->timestamp);
 
 			strcpy(timestr, ctime(&t));
 			*strchr(timestr, '\n') = '\0';
 			printf("%s ", timestr);
 		} else {
 			printf("%*s", twidth, " ");
 		}
 	}
 
 	if (show_sets)
 		printf("set %d ", rule->set);
 
 	/*
 	 * print the optional "match probability"
 	 */
 	if (rule->cmd_len > 0) {
 		cmd = rule->cmd ;
 		if (cmd->opcode == O_PROB) {
 			ipfw_insn_u32 *p = (ipfw_insn_u32 *)cmd;
 			double d = 1.0 * p->d[0];
 
 			d = (d / 0x7fffffff);
 			printf("prob %f ", d);
 		}
 	}
 
 	/*
 	 * first print actions
 	 */
         for (l = rule->cmd_len - rule->act_ofs, cmd = ACTION_PTR(rule);
 			l > 0 ; l -= F_LEN(cmd), cmd += F_LEN(cmd)) {
 		switch(cmd->opcode) {
 		case O_CHECK_STATE:
 			printf("check-state");
 			flags = HAVE_IP; /* avoid printing anything else */
 			break;
 
 		case O_ACCEPT:
 			printf("allow");
 			break;
 
 		case O_COUNT:
 			printf("count");
 			break;
 
 		case O_DENY:
 			printf("deny");
 			break;
 
 		case O_REJECT:
 			if (cmd->arg1 == ICMP_REJECT_RST)
 				printf("reset");
 			else if (cmd->arg1 == ICMP_UNREACH_HOST)
 				printf("reject");
 			else
 				print_reject_code(cmd->arg1);
 			break;
 
 		case O_UNREACH6:
 			if (cmd->arg1 == ICMP6_UNREACH_RST)
 				printf("reset6");
 			else
 				print_unreach6_code(cmd->arg1);
 			break;
 
 		case O_SKIPTO:
 			PRINT_UINT_ARG("skipto ", cmd->arg1);
 			break;
 
 		case O_PIPE:
 			PRINT_UINT_ARG("pipe ", cmd->arg1);
 			break;
 
 		case O_QUEUE:
 			PRINT_UINT_ARG("queue ", cmd->arg1);
 			break;
 
 		case O_DIVERT:
 			PRINT_UINT_ARG("divert ", cmd->arg1);
 			break;
 
 		case O_TEE:
 			PRINT_UINT_ARG("tee ", cmd->arg1);
 			break;
 
 		case O_NETGRAPH:
 			PRINT_UINT_ARG("netgraph ", cmd->arg1);
 			break;
 
 		case O_NGTEE:
 			PRINT_UINT_ARG("ngtee ", cmd->arg1);
 			break;
 
 		case O_FORWARD_IP:
 		    {
 			ipfw_insn_sa *s = (ipfw_insn_sa *)cmd;
 
 			if (s->sa.sin_addr.s_addr == INADDR_ANY) {
 				printf("fwd tablearg");
 			} else {
 				printf("fwd %s", inet_ntoa(s->sa.sin_addr));
 			}
 			if (s->sa.sin_port)
 				printf(",%d", s->sa.sin_port);
 		    }
 			break;
 
 		case O_LOG: /* O_LOG is printed last */
 			logptr = (ipfw_insn_log *)cmd;
 			break;
 
 		case O_ALTQ: /* O_ALTQ is printed after O_LOG */
 			altqptr = (ipfw_insn_altq *)cmd;
 			break;
 
 		case O_TAG:
 			tagptr = cmd;
 			break;
 
 		case O_NAT:
 			PRINT_UINT_ARG("nat ", cmd->arg1);
  			break;
 			
+		case O_SETFIB:
+			PRINT_UINT_ARG("setfib ", cmd->arg1);
+ 			break;
+			
 		default:
 			printf("** unrecognized action %d len %d ",
 				cmd->opcode, cmd->len);
 		}
 	}
 	if (logptr) {
 		if (logptr->max_log > 0)
 			printf(" log logamount %d", logptr->max_log);
 		else
 			printf(" log");
 	}
 	if (altqptr) {
 		const char *qname;
 
 		qname = altq_qid_to_name(altqptr->qid);
 		if (qname == NULL)
 			printf(" altq ?<%u>", altqptr->qid);
 		else
 			printf(" altq %s", qname);
 	}
 	if (tagptr) {
 		if (tagptr->len & F_NOT)
 			PRINT_UINT_ARG(" untag ", tagptr->arg1);
 		else
 			PRINT_UINT_ARG(" tag ", tagptr->arg1);
 	}
 
 	/*
 	 * then print the body.
 	 */
         for (l = rule->act_ofs, cmd = rule->cmd ;
 			l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) {
 		if ((cmd->len & F_OR) || (cmd->len & F_NOT))
 			continue;
 		if (cmd->opcode == O_IP4) {
 			flags |= HAVE_PROTO4;
 			break;
 		} else if (cmd->opcode == O_IP6) {
 			flags |= HAVE_PROTO6;
 			break;
 		}			
 	}
 	if (rule->_pad & 1) {	/* empty rules before options */
 		if (!do_compact) {
 			show_prerequisites(&flags, HAVE_PROTO, 0);
 			printf(" from any to any");
 		}
 		flags |= HAVE_IP | HAVE_OPTIONS;
 	}
 
 	if (comment_only)
 		comment = "...";
 
         for (l = rule->act_ofs, cmd = rule->cmd ;
 			l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) {
 		/* useful alias */
 		ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd;
 
 		if (comment_only) {
 			if (cmd->opcode != O_NOP)
 				continue;
 			printf(" // %s\n", (char *)(cmd + 1));
 			return;
 		}
 
 		show_prerequisites(&flags, 0, cmd->opcode);
 
 		switch(cmd->opcode) {
 		case O_PROB:
 			break;	/* done already */
 
 		case O_PROBE_STATE:
 			break; /* no need to print anything here */
 
 		case O_IP_SRC:
 		case O_IP_SRC_LOOKUP:
 		case O_IP_SRC_MASK:
 		case O_IP_SRC_ME:
 		case O_IP_SRC_SET:
 			show_prerequisites(&flags, HAVE_PROTO, 0);
 			if (!(flags & HAVE_SRCIP))
 				printf(" from");
 			if ((cmd->len & F_OR) && !or_block)
 				printf(" {");
 			print_ip((ipfw_insn_ip *)cmd,
 				(flags & HAVE_OPTIONS) ? " src-ip" : "");
 			flags |= HAVE_SRCIP;
 			break;
 
 		case O_IP_DST:
 		case O_IP_DST_LOOKUP:
 		case O_IP_DST_MASK:
 		case O_IP_DST_ME:
 		case O_IP_DST_SET:
 			show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0);
 			if (!(flags & HAVE_DSTIP))
 				printf(" to");
 			if ((cmd->len & F_OR) && !or_block)
 				printf(" {");
 			print_ip((ipfw_insn_ip *)cmd,
 				(flags & HAVE_OPTIONS) ? " dst-ip" : "");
 			flags |= HAVE_DSTIP;
 			break;
 
 		case O_IP6_SRC:
 		case O_IP6_SRC_MASK:
 		case O_IP6_SRC_ME:
 			show_prerequisites(&flags, HAVE_PROTO, 0);
 			if (!(flags & HAVE_SRCIP))
 				printf(" from");
 			if ((cmd->len & F_OR) && !or_block)
 				printf(" {");
 			print_ip6((ipfw_insn_ip6 *)cmd,
 			    (flags & HAVE_OPTIONS) ? " src-ip6" : "");
 			flags |= HAVE_SRCIP | HAVE_PROTO;
 			break;
 
 		case O_IP6_DST:
 		case O_IP6_DST_MASK:
 		case O_IP6_DST_ME:
 			show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0);
 			if (!(flags & HAVE_DSTIP))
 				printf(" to");
 			if ((cmd->len & F_OR) && !or_block)
 				printf(" {");
 			print_ip6((ipfw_insn_ip6 *)cmd,
 			    (flags & HAVE_OPTIONS) ? " dst-ip6" : "");
 			flags |= HAVE_DSTIP;
 			break;
 
 		case O_FLOW6ID:
 		print_flow6id( (ipfw_insn_u32 *) cmd );
 		flags |= HAVE_OPTIONS;
 		break;
 
 		case O_IP_DSTPORT:
 			show_prerequisites(&flags, HAVE_IP, 0);
 		case O_IP_SRCPORT:
 			show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0);
 			if ((cmd->len & F_OR) && !or_block)
 				printf(" {");
 			if (cmd->len & F_NOT)
 				printf(" not");
 			print_newports((ipfw_insn_u16 *)cmd, proto,
 				(flags & HAVE_OPTIONS) ? cmd->opcode : 0);
 			break;
 
 		case O_PROTO: {
 			struct protoent *pe = NULL;
 
 			if ((cmd->len & F_OR) && !or_block)
 				printf(" {");
 			if (cmd->len & F_NOT)
 				printf(" not");
 			proto = cmd->arg1;
 			pe = getprotobynumber(cmd->arg1);
 			if ((flags & (HAVE_PROTO4 | HAVE_PROTO6)) &&
 			    !(flags & HAVE_PROTO))
 				show_prerequisites(&flags,
 				    HAVE_IP | HAVE_OPTIONS, 0);
 			if (flags & HAVE_OPTIONS)
 				printf(" proto");
 			if (pe)
 				printf(" %s", pe->p_name);
 			else
 				printf(" %u", cmd->arg1);
 			}
 			flags |= HAVE_PROTO;
 			break;
 
 		default: /*options ... */
 			if (!(cmd->len & (F_OR|F_NOT)))
 				if (((cmd->opcode == O_IP6) &&
 				    (flags & HAVE_PROTO6)) ||
 				    ((cmd->opcode == O_IP4) &&
 				    (flags & HAVE_PROTO4)))
 					break;
 			show_prerequisites(&flags, HAVE_IP | HAVE_OPTIONS, 0);
 			if ((cmd->len & F_OR) && !or_block)
 				printf(" {");
 			if (cmd->len & F_NOT && cmd->opcode != O_IN)
 				printf(" not");
 			switch(cmd->opcode) {
 			case O_MACADDR2: {
 				ipfw_insn_mac *m = (ipfw_insn_mac *)cmd;
 
 				printf(" MAC");
 				print_mac(m->addr, m->mask);
 				print_mac(m->addr + 6, m->mask + 6);
 				}
 				break;
 
 			case O_MAC_TYPE:
 				print_newports((ipfw_insn_u16 *)cmd,
 						IPPROTO_ETHERTYPE, cmd->opcode);
 				break;
 
 
 			case O_FRAG:
 				printf(" frag");
 				break;
 
+			case O_FIB:
+				printf(" fib %u", cmd->arg1 );
+				break;
+
 			case O_IN:
 				printf(cmd->len & F_NOT ? " out" : " in");
 				break;
 
 			case O_DIVERTED:
 				switch (cmd->arg1) {
 				case 3:
 					printf(" diverted");
 					break;
 				case 1:
 					printf(" diverted-loopback");
 					break;
 				case 2:
 					printf(" diverted-output");
 					break;
 				default:
 					printf(" diverted-?<%u>", cmd->arg1);
 					break;
 				}
 				break;
 
 			case O_LAYER2:
 				printf(" layer2");
 				break;
 			case O_XMIT:
 			case O_RECV:
 			case O_VIA:
 			    {
 				char const *s;
 				ipfw_insn_if *cmdif = (ipfw_insn_if *)cmd;
 
 				if (cmd->opcode == O_XMIT)
 					s = "xmit";
 				else if (cmd->opcode == O_RECV)
 					s = "recv";
 				else /* if (cmd->opcode == O_VIA) */
 					s = "via";
 				if (cmdif->name[0] == '\0')
 					printf(" %s %s", s,
 					    inet_ntoa(cmdif->p.ip));
 				else
 					printf(" %s %s", s, cmdif->name);
 
 				break;
 			    }
 			case O_IPID:
 				if (F_LEN(cmd) == 1)
 				    printf(" ipid %u", cmd->arg1 );
 				else
 				    print_newports((ipfw_insn_u16 *)cmd, 0,
 					O_IPID);
 				break;
 
 			case O_IPTTL:
 				if (F_LEN(cmd) == 1)
 				    printf(" ipttl %u", cmd->arg1 );
 				else
 				    print_newports((ipfw_insn_u16 *)cmd, 0,
 					O_IPTTL);
 				break;
 
 			case O_IPVER:
 				printf(" ipver %u", cmd->arg1 );
 				break;
 
 			case O_IPPRECEDENCE:
 				printf(" ipprecedence %u", (cmd->arg1) >> 5 );
 				break;
 
 			case O_IPLEN:
 				if (F_LEN(cmd) == 1)
 				    printf(" iplen %u", cmd->arg1 );
 				else
 				    print_newports((ipfw_insn_u16 *)cmd, 0,
 					O_IPLEN);
 				break;
 
 			case O_IPOPT:
 				print_flags("ipoptions", cmd, f_ipopts);
 				break;
 
 			case O_IPTOS:
 				print_flags("iptos", cmd, f_iptos);
 				break;
 
 			case O_ICMPTYPE:
 				print_icmptypes((ipfw_insn_u32 *)cmd);
 				break;
 
 			case O_ESTAB:
 				printf(" established");
 				break;
 
 			case O_TCPDATALEN:
 				if (F_LEN(cmd) == 1)
 				    printf(" tcpdatalen %u", cmd->arg1 );
 				else
 				    print_newports((ipfw_insn_u16 *)cmd, 0,
 					O_TCPDATALEN);
 				break;
 
 			case O_TCPFLAGS:
 				print_flags("tcpflags", cmd, f_tcpflags);
 				break;
 
 			case O_TCPOPTS:
 				print_flags("tcpoptions", cmd, f_tcpopts);
 				break;
 
 			case O_TCPWIN:
 				printf(" tcpwin %d", ntohs(cmd->arg1));
 				break;
 
 			case O_TCPACK:
 				printf(" tcpack %d", ntohl(cmd32->d[0]));
 				break;
 
 			case O_TCPSEQ:
 				printf(" tcpseq %d", ntohl(cmd32->d[0]));
 				break;
 
 			case O_UID:
 			    {
 				struct passwd *pwd = getpwuid(cmd32->d[0]);
 
 				if (pwd)
 					printf(" uid %s", pwd->pw_name);
 				else
 					printf(" uid %u", cmd32->d[0]);
 			    }
 				break;
 
 			case O_GID:
 			    {
 				struct group *grp = getgrgid(cmd32->d[0]);
 
 				if (grp)
 					printf(" gid %s", grp->gr_name);
 				else
 					printf(" gid %u", cmd32->d[0]);
 			    }
 				break;
 
 			case O_JAIL:
 				printf(" jail %d", cmd32->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				printf(" verrevpath");
 				break;
 
 			case O_VERSRCREACH:
 				printf(" versrcreach");
 				break;
 
 			case O_ANTISPOOF:
 				printf(" antispoof");
 				break;
 
 			case O_IPSEC:
 				printf(" ipsec");
 				break;
 
 			case O_NOP:
 				comment = (char *)(cmd + 1);
 				break;
 
 			case O_KEEP_STATE:
 				printf(" keep-state");
 				break;
 
 			case O_LIMIT: {
 				struct _s_x *p = limit_masks;
 				ipfw_insn_limit *c = (ipfw_insn_limit *)cmd;
 				uint8_t x = c->limit_mask;
 				char const *comma = " ";
 
 				printf(" limit");
 				for (; p->x != 0 ; p++)
 					if ((x & p->x) == p->x) {
 						x &= ~p->x;
 						printf("%s%s", comma, p->s);
 						comma = ",";
 					}
 				PRINT_UINT_ARG(" ", c->conn_limit);
 				break;
 			}
 
 			case O_IP6:
 				printf(" ip6");
 				break;
 
 			case O_IP4:
 				printf(" ip4");
 				break;
 
 			case O_ICMP6TYPE:
 				print_icmp6types((ipfw_insn_u32 *)cmd);
 				break;
 
 			case O_EXT_HDR:
 				print_ext6hdr( (ipfw_insn *) cmd );
 				break;
 
 			case O_TAGGED:
 				if (F_LEN(cmd) == 1)
 					PRINT_UINT_ARG(" tagged ", cmd->arg1);
 				else
 					print_newports((ipfw_insn_u16 *)cmd, 0,
 					    O_TAGGED);
 				break;
 
 			default:
 				printf(" [opcode %d len %d]",
 				    cmd->opcode, cmd->len);
 			}
 		}
 		if (cmd->len & F_OR) {
 			printf(" or");
 			or_block = 1;
 		} else if (or_block) {
 			printf(" }");
 			or_block = 0;
 		}
 	}
 	show_prerequisites(&flags, HAVE_IP, 0);
 	if (comment)
 		printf(" // %s", comment);
 	printf("\n");
 }
 
 static void
 show_dyn_ipfw(ipfw_dyn_rule *d, int pcwidth, int bcwidth)
 {
 	struct protoent *pe;
 	struct in_addr a;
 	uint16_t rulenum;
 	char buf[INET6_ADDRSTRLEN];
 
 	if (!do_expired) {
 		if (!d->expire && !(d->dyn_type == O_LIMIT_PARENT))
 			return;
 	}
 	bcopy(&d->rule, &rulenum, sizeof(rulenum));
 	printf("%05d", rulenum);
 	if (pcwidth>0 || bcwidth>0)
 	    printf(" %*llu %*llu (%ds)", pcwidth,
 		align_uint64(&d->pcnt), bcwidth,
 		align_uint64(&d->bcnt), d->expire);
 	switch (d->dyn_type) {
 	case O_LIMIT_PARENT:
 		printf(" PARENT %d", d->count);
 		break;
 	case O_LIMIT:
 		printf(" LIMIT");
 		break;
 	case O_KEEP_STATE: /* bidir, no mask */
 		printf(" STATE");
 		break;
 	}
 
 	if ((pe = getprotobynumber(d->id.proto)) != NULL)
 		printf(" %s", pe->p_name);
 	else
 		printf(" proto %u", d->id.proto);
 
 	if (d->id.addr_type == 4) {
 		a.s_addr = htonl(d->id.src_ip);
 		printf(" %s %d", inet_ntoa(a), d->id.src_port);
 
 		a.s_addr = htonl(d->id.dst_ip);
 		printf(" <-> %s %d", inet_ntoa(a), d->id.dst_port);
 	} else if (d->id.addr_type == 6) {
 		printf(" %s %d", inet_ntop(AF_INET6, &d->id.src_ip6, buf,
 		    sizeof(buf)), d->id.src_port);
 		printf(" <-> %s %d", inet_ntop(AF_INET6, &d->id.dst_ip6, buf,
 		    sizeof(buf)), d->id.dst_port);
 	} else
 		printf(" UNKNOWN <-> UNKNOWN\n");
 	
 	printf("\n");
 }
 
 static int
 sort_q(const void *pa, const void *pb)
 {
 	int rev = (do_sort < 0);
 	int field = rev ? -do_sort : do_sort;
 	long long res = 0;
 	const struct dn_flow_queue *a = pa;
 	const struct dn_flow_queue *b = pb;
 
 	switch (field) {
 	case 1: /* pkts */
 		res = a->len - b->len;
 		break;
 	case 2: /* bytes */
 		res = a->len_bytes - b->len_bytes;
 		break;
 
 	case 3: /* tot pkts */
 		res = a->tot_pkts - b->tot_pkts;
 		break;
 
 	case 4: /* tot bytes */
 		res = a->tot_bytes - b->tot_bytes;
 		break;
 	}
 	if (res < 0)
 		res = -1;
 	if (res > 0)
 		res = 1;
 	return (int)(rev ? res : -res);
 }
 
 static void
 list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q)
 {
 	int l;
 	int index_printed, indexes = 0;
 	char buff[255];
 	struct protoent *pe;
 
 	if (fs->rq_elements == 0)
 		return;
 
 	if (do_sort != 0)
 		heapsort(q, fs->rq_elements, sizeof *q, sort_q);
 
 	/* Print IPv4 flows */
 	index_printed = 0;
 	for (l = 0; l < fs->rq_elements; l++) {
 		struct in_addr ina;
 
 		/* XXX: Should check for IPv4 flows */
 		if (IS_IP6_FLOW_ID(&(q[l].id)))
 			continue;
 
 		if (!index_printed) {
 			index_printed = 1;
 			if (indexes > 0)	/* currently a no-op */
 				printf("\n");
 			indexes++;
 			printf("    "
 			    "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
 			    fs->flow_mask.proto,
 			    fs->flow_mask.src_ip, fs->flow_mask.src_port,
 			    fs->flow_mask.dst_ip, fs->flow_mask.dst_port);
 
 			printf("BKT Prot ___Source IP/port____ "
 			    "____Dest. IP/port____ "
 			    "Tot_pkt/bytes Pkt/Byte Drp\n");
 		}
 
 		printf("%3d ", q[l].hash_slot);
 		pe = getprotobynumber(q[l].id.proto);
 		if (pe)
 			printf("%-4s ", pe->p_name);
 		else
 			printf("%4u ", q[l].id.proto);
 		ina.s_addr = htonl(q[l].id.src_ip);
 		printf("%15s/%-5d ",
 		    inet_ntoa(ina), q[l].id.src_port);
 		ina.s_addr = htonl(q[l].id.dst_ip);
 		printf("%15s/%-5d ",
 		    inet_ntoa(ina), q[l].id.dst_port);
 		printf("%4qu %8qu %2u %4u %3u\n",
 		    q[l].tot_pkts, q[l].tot_bytes,
 		    q[l].len, q[l].len_bytes, q[l].drops);
 		if (verbose)
 			printf("   S %20qd  F %20qd\n",
 			    q[l].S, q[l].F);
 	}
 
 	/* Print IPv6 flows */
 	index_printed = 0;
 	for (l = 0; l < fs->rq_elements; l++) {
 		if (!IS_IP6_FLOW_ID(&(q[l].id)))
 			continue;
 
 		if (!index_printed) {
 			index_printed = 1;
 			if (indexes > 0)
 				printf("\n");
 			indexes++;
 			printf("\n        mask: proto: 0x%02x, flow_id: 0x%08x,  ",
 			    fs->flow_mask.proto, fs->flow_mask.flow_id6);
 			inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6),
 			    buff, sizeof(buff));
 			printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port);
 			inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6),
 			    buff, sizeof(buff) );
 			printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port);
 
 			printf("BKT ___Prot___ _flow-id_ "
 			    "______________Source IPv6/port_______________ "
 			    "_______________Dest. IPv6/port_______________ "
 			    "Tot_pkt/bytes Pkt/Byte Drp\n");
 		}
 		printf("%3d ", q[l].hash_slot);
 		pe = getprotobynumber(q[l].id.proto);
 		if (pe != NULL)
 			printf("%9s ", pe->p_name);
 		else
 			printf("%9u ", q[l].id.proto);
 		printf("%7d  %39s/%-5d ", q[l].id.flow_id6,
 		    inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)),
 		    q[l].id.src_port);
 		printf(" %39s/%-5d ",
 		    inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)),
 		    q[l].id.dst_port);
 		printf(" %4qu %8qu %2u %4u %3u\n",
 		    q[l].tot_pkts, q[l].tot_bytes,
 		    q[l].len, q[l].len_bytes, q[l].drops);
 		if (verbose)
 			printf("   S %20qd  F %20qd\n", q[l].S, q[l].F);
 	}
 }
 
 static void
 print_flowset_parms(struct dn_flow_set *fs, char *prefix)
 {
 	int l;
 	char qs[30];
 	char plr[30];
 	char red[90];	/* Display RED parameters */
 
 	l = fs->qsize;
 	if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
 		if (l >= 8192)
 			sprintf(qs, "%d KB", l / 1024);
 		else
 			sprintf(qs, "%d B", l);
 	} else
 		sprintf(qs, "%3d sl.", l);
 	if (fs->plr)
 		sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff));
 	else
 		plr[0] = '\0';
 	if (fs->flags_fs & DN_IS_RED)	/* RED parameters */
 		sprintf(red,
 		    "\n\t  %cRED w_q %f min_th %d max_th %d max_p %f",
 		    (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ',
 		    1.0 * fs->w_q / (double)(1 << SCALE_RED),
 		    SCALE_VAL(fs->min_th),
 		    SCALE_VAL(fs->max_th),
 		    1.0 * fs->max_p / (double)(1 << SCALE_RED));
 	else
 		sprintf(red, "droptail");
 
 	printf("%s %s%s %d queues (%d buckets) %s\n",
 	    prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
 }
 
 static void
 list_pipes(void *data, uint nbytes, int ac, char *av[])
 {
 	int rulenum;
 	void *next = data;
 	struct dn_pipe *p = (struct dn_pipe *) data;
 	struct dn_flow_set *fs;
 	struct dn_flow_queue *q;
 	int l;
 
 	if (ac > 0)
 		rulenum = strtoul(*av++, NULL, 10);
 	else
 		rulenum = 0;
 	for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) {
 		double b = p->bandwidth;
 		char buf[30];
 		char prefix[80];
 
 		if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE)
 			break;	/* done with pipes, now queues */
 
 		/*
 		 * compute length, as pipe have variable size
 		 */
 		l = sizeof(*p) + p->fs.rq_elements * sizeof(*q);
 		next = (char *)p + l;
 		nbytes -= l;
 
 		if ((rulenum != 0 && rulenum != p->pipe_nr) || do_pipe == 2)
 			continue;
 
 		/*
 		 * Print rate (or clocking interface)
 		 */
 		if (p->if_name[0] != '\0')
 			sprintf(buf, "%s", p->if_name);
 		else if (b == 0)
 			sprintf(buf, "unlimited");
 		else if (b >= 1000000)
 			sprintf(buf, "%7.3f Mbit/s", b/1000000);
 		else if (b >= 1000)
 			sprintf(buf, "%7.3f Kbit/s", b/1000);
 		else
 			sprintf(buf, "%7.3f bit/s ", b);
 
 		sprintf(prefix, "%05d: %s %4d ms ",
 		    p->pipe_nr, buf, p->delay);
 		print_flowset_parms(&(p->fs), prefix);
 		if (verbose)
 			printf("   V %20qd\n", p->V >> MY_M);
 
 		q = (struct dn_flow_queue *)(p+1);
 		list_queues(&(p->fs), q);
 	}
 	for (fs = next; nbytes >= sizeof *fs; fs = next) {
 		char prefix[80];
 
 		if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE)
 			break;
 		l = sizeof(*fs) + fs->rq_elements * sizeof(*q);
 		next = (char *)fs + l;
 		nbytes -= l;
 
 		if (rulenum != 0 && ((rulenum != fs->fs_nr && do_pipe == 2) ||
 		    (rulenum != fs->parent_nr && do_pipe == 1))) {
 			continue;
 		}
 
 		q = (struct dn_flow_queue *)(fs+1);
 		sprintf(prefix, "q%05d: weight %d pipe %d ",
 		    fs->fs_nr, fs->weight, fs->parent_nr);
 		print_flowset_parms(fs, prefix);
 		list_queues(fs, q);
 	}
 }
 
 /*
  * This one handles all set-related commands
  * 	ipfw set { show | enable | disable }
  * 	ipfw set swap X Y
  * 	ipfw set move X to Y
  * 	ipfw set move rule X to Y
  */
 static void
 sets_handler(int ac, char *av[])
 {
 	uint32_t set_disable, masks[2];
 	int i, nbytes;
 	uint16_t rulenum;
 	uint8_t cmd, new_set;
 
 	ac--;
 	av++;
 
 	if (!ac)
 		errx(EX_USAGE, "set needs command");
 	if (_substrcmp(*av, "show") == 0) {
 		void *data;
 		char const *msg;
 
 		nbytes = sizeof(struct ip_fw);
 		if ((data = calloc(1, nbytes)) == NULL)
 			err(EX_OSERR, "calloc");
 		if (do_cmd(IP_FW_GET, data, (uintptr_t)&nbytes) < 0)
 			err(EX_OSERR, "getsockopt(IP_FW_GET)");
 		bcopy(&((struct ip_fw *)data)->next_rule,
 			&set_disable, sizeof(set_disable));
 
 		for (i = 0, msg = "disable" ; i < RESVD_SET; i++)
 			if ((set_disable & (1<<i))) {
 				printf("%s %d", msg, i);
 				msg = "";
 			}
 		msg = (set_disable) ? " enable" : "enable";
 		for (i = 0; i < RESVD_SET; i++)
 			if (!(set_disable & (1<<i))) {
 				printf("%s %d", msg, i);
 				msg = "";
 			}
 		printf("\n");
 	} else if (_substrcmp(*av, "swap") == 0) {
 		ac--; av++;
 		if (ac != 2)
 			errx(EX_USAGE, "set swap needs 2 set numbers\n");
 		rulenum = atoi(av[0]);
 		new_set = atoi(av[1]);
 		if (!isdigit(*(av[0])) || rulenum > RESVD_SET)
 			errx(EX_DATAERR, "invalid set number %s\n", av[0]);
 		if (!isdigit(*(av[1])) || new_set > RESVD_SET)
 			errx(EX_DATAERR, "invalid set number %s\n", av[1]);
 		masks[0] = (4 << 24) | (new_set << 16) | (rulenum);
 		i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t));
 	} else if (_substrcmp(*av, "move") == 0) {
 		ac--; av++;
 		if (ac && _substrcmp(*av, "rule") == 0) {
 			cmd = 2;
 			ac--; av++;
 		} else
 			cmd = 3;
 		if (ac != 3 || _substrcmp(av[1], "to") != 0)
 			errx(EX_USAGE, "syntax: set move [rule] X to Y\n");
 		rulenum = atoi(av[0]);
 		new_set = atoi(av[2]);
 		if (!isdigit(*(av[0])) || (cmd == 3 && rulenum > RESVD_SET) ||
 			(cmd == 2 && rulenum == 65535) )
 			errx(EX_DATAERR, "invalid source number %s\n", av[0]);
 		if (!isdigit(*(av[2])) || new_set > RESVD_SET)
 			errx(EX_DATAERR, "invalid dest. set %s\n", av[1]);
 		masks[0] = (cmd << 24) | (new_set << 16) | (rulenum);
 		i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t));
 	} else if (_substrcmp(*av, "disable") == 0 ||
 		   _substrcmp(*av, "enable") == 0 ) {
 		int which = _substrcmp(*av, "enable") == 0 ? 1 : 0;
 
 		ac--; av++;
 		masks[0] = masks[1] = 0;
 
 		while (ac) {
 			if (isdigit(**av)) {
 				i = atoi(*av);
 				if (i < 0 || i > RESVD_SET)
 					errx(EX_DATAERR,
 					    "invalid set number %d\n", i);
 				masks[which] |= (1<<i);
 			} else if (_substrcmp(*av, "disable") == 0)
 				which = 0;
 			else if (_substrcmp(*av, "enable") == 0)
 				which = 1;
 			else
 				errx(EX_DATAERR,
 					"invalid set command %s\n", *av);
 			av++; ac--;
 		}
 		if ( (masks[0] & masks[1]) != 0 )
 			errx(EX_DATAERR,
 			    "cannot enable and disable the same set\n");
 
 		i = do_cmd(IP_FW_DEL, masks, sizeof(masks));
 		if (i)
 			warn("set enable/disable: setsockopt(IP_FW_DEL)");
 	} else
 		errx(EX_USAGE, "invalid set command %s\n", *av);
 }
 
 static void
 sysctl_handler(int ac, char *av[], int which)
 {
 	ac--;
 	av++;
 
 	if (ac == 0) {
 		warnx("missing keyword to enable/disable\n");
 	} else if (_substrcmp(*av, "firewall") == 0) {
 		sysctlbyname("net.inet.ip.fw.enable", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "one_pass") == 0) {
 		sysctlbyname("net.inet.ip.fw.one_pass", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "debug") == 0) {
 		sysctlbyname("net.inet.ip.fw.debug", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "verbose") == 0) {
 		sysctlbyname("net.inet.ip.fw.verbose", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "dyn_keepalive") == 0) {
 		sysctlbyname("net.inet.ip.fw.dyn_keepalive", NULL, 0,
 		    &which, sizeof(which));
 	} else if (_substrcmp(*av, "altq") == 0) {
 		altq_set_enabled(which);
 	} else {
 		warnx("unrecognize enable/disable keyword: %s\n", *av);
 	}
 }
 
 static void
 list(int ac, char *av[], int show_counters)
 {
 	struct ip_fw *r;
 	ipfw_dyn_rule *dynrules, *d;
 
 #define NEXT(r)	((struct ip_fw *)((char *)r + RULESIZE(r)))
 	char *lim;
 	void *data = NULL;
 	int bcwidth, n, nbytes, nstat, ndyn, pcwidth, width;
 	int exitval = EX_OK;
 	int lac;
 	char **lav;
 	u_long rnum, last;
 	char *endptr;
 	int seen = 0;
 	uint8_t set;
 
 	const int ocmd = do_pipe ? IP_DUMMYNET_GET : IP_FW_GET;
 	int nalloc = 1024;	/* start somewhere... */
 
 	last = 0;
 
 	if (test_only) {
 		fprintf(stderr, "Testing only, list disabled\n");
 		return;
 	}
 
 	ac--;
 	av++;
 
 	/* get rules or pipes from kernel, resizing array as necessary */
 	nbytes = nalloc;
 
 	while (nbytes >= nalloc) {
 		nalloc = nalloc * 2 + 200;
 		nbytes = nalloc;
 		if ((data = realloc(data, nbytes)) == NULL)
 			err(EX_OSERR, "realloc");
 		if (do_cmd(ocmd, data, (uintptr_t)&nbytes) < 0)
 			err(EX_OSERR, "getsockopt(IP_%s_GET)",
 				do_pipe ? "DUMMYNET" : "FW");
 	}
 
 	if (do_pipe) {
 		list_pipes(data, nbytes, ac, av);
 		goto done;
 	}
 
 	/*
 	 * Count static rules. They have variable size so we
 	 * need to scan the list to count them.
 	 */
 	for (nstat = 1, r = data, lim = (char *)data + nbytes;
 		    r->rulenum < 65535 && (char *)r < lim;
 		    ++nstat, r = NEXT(r) )
 		; /* nothing */
 
 	/*
 	 * Count dynamic rules. This is easier as they have
 	 * fixed size.
 	 */
 	r = NEXT(r);
 	dynrules = (ipfw_dyn_rule *)r ;
 	n = (char *)r - (char *)data;
 	ndyn = (nbytes - n) / sizeof *dynrules;
 
 	/* if showing stats, figure out column widths ahead of time */
 	bcwidth = pcwidth = 0;
 	if (show_counters) {
 		for (n = 0, r = data; n < nstat; n++, r = NEXT(r)) {
 			/* skip rules from another set */
 			if (use_set && r->set != use_set - 1)
 				continue;
 
 			/* packet counter */
 			width = snprintf(NULL, 0, "%llu",
 			    align_uint64(&r->pcnt));
 			if (width > pcwidth)
 				pcwidth = width;
 
 			/* byte counter */
 			width = snprintf(NULL, 0, "%llu",
 			    align_uint64(&r->bcnt));
 			if (width > bcwidth)
 				bcwidth = width;
 		}
 	}
 	if (do_dynamic && ndyn) {
 		for (n = 0, d = dynrules; n < ndyn; n++, d++) {
 			if (use_set) {
 				/* skip rules from another set */
 				bcopy((char *)&d->rule + sizeof(uint16_t),
 				      &set, sizeof(uint8_t));
 				if (set != use_set - 1)
 					continue;
 			}
 			width = snprintf(NULL, 0, "%llu",
 			    align_uint64(&d->pcnt));
 			if (width > pcwidth)
 				pcwidth = width;
 
 			width = snprintf(NULL, 0, "%llu",
 			    align_uint64(&d->bcnt));
 			if (width > bcwidth)
 				bcwidth = width;
 		}
 	}
 	/* if no rule numbers were specified, list all rules */
 	if (ac == 0) {
 		for (n = 0, r = data; n < nstat; n++, r = NEXT(r)) {
 			if (use_set && r->set != use_set - 1)
 				continue;
 			show_ipfw(r, pcwidth, bcwidth);
 		}
 
 		if (do_dynamic && ndyn) {
 			printf("## Dynamic rules (%d):\n", ndyn);
 			for (n = 0, d = dynrules; n < ndyn; n++, d++) {
 				if (use_set) {
 					bcopy((char *)&d->rule + sizeof(uint16_t),
 					      &set, sizeof(uint8_t));
 					if (set != use_set - 1)
 						continue;
 				}
 				show_dyn_ipfw(d, pcwidth, bcwidth);
 		}
 		}
 		goto done;
 	}
 
 	/* display specific rules requested on command line */
 
 	for (lac = ac, lav = av; lac != 0; lac--) {
 		/* convert command line rule # */
 		last = rnum = strtoul(*lav++, &endptr, 10);
 		if (*endptr == '-')
 			last = strtoul(endptr+1, &endptr, 10);
 		if (*endptr) {
 			exitval = EX_USAGE;
 			warnx("invalid rule number: %s", *(lav - 1));
 			continue;
 		}
 		for (n = seen = 0, r = data; n < nstat; n++, r = NEXT(r) ) {
 			if (r->rulenum > last)
 				break;
 			if (use_set && r->set != use_set - 1)
 				continue;
 			if (r->rulenum >= rnum && r->rulenum <= last) {
 				show_ipfw(r, pcwidth, bcwidth);
 				seen = 1;
 			}
 		}
 		if (!seen) {
 			/* give precedence to other error(s) */
 			if (exitval == EX_OK)
 				exitval = EX_UNAVAILABLE;
 			warnx("rule %lu does not exist", rnum);
 		}
 	}
 
 	if (do_dynamic && ndyn) {
 		printf("## Dynamic rules:\n");
 		for (lac = ac, lav = av; lac != 0; lac--) {
 			last = rnum = strtoul(*lav++, &endptr, 10);
 			if (*endptr == '-')
 				last = strtoul(endptr+1, &endptr, 10);
 			if (*endptr)
 				/* already warned */
 				continue;
 			for (n = 0, d = dynrules; n < ndyn; n++, d++) {
 				uint16_t rulenum;
 
 				bcopy(&d->rule, &rulenum, sizeof(rulenum));
 				if (rulenum > rnum)
 					break;
 				if (use_set) {
 					bcopy((char *)&d->rule + sizeof(uint16_t),
 					      &set, sizeof(uint8_t));
 					if (set != use_set - 1)
 						continue;
 				}
 				if (r->rulenum >= rnum && r->rulenum <= last)
 					show_dyn_ipfw(d, pcwidth, bcwidth);
 			}
 		}
 	}
 
 	ac = 0;
 
 done:
 	free(data);
 
 	if (exitval != EX_OK)
 		exit(exitval);
 #undef NEXT
 }
 
 static void
 show_usage(void)
 {
 	fprintf(stderr, "usage: ipfw [options]\n"
 "do \"ipfw -h\" or see ipfw manpage for details\n"
 );
 	exit(EX_USAGE);
 }
 
 static void
 help(void)
 {
 	fprintf(stderr,
 "ipfw syntax summary (but please do read the ipfw(8) manpage):\n"
 "ipfw [-abcdefhnNqStTv] <command> where <command> is one of:\n"
 "add [num] [set N] [prob x] RULE-BODY\n"
 "{pipe|queue} N config PIPE-BODY\n"
 "[pipe|queue] {zero|delete|show} [N{,N}]\n"
 "nat N config {ip IPADDR|if IFNAME|log|deny_in|same_ports|unreg_only|reset|\n"
 "		reverse|proxy_only|redirect_addr linkspec|\n"
 "		redirect_port linkspec|redirect_proto linkspec}\n"
 "set [disable N... enable N...] | move [rule] X to Y | swap X Y | show\n"
 "set N {show|list|zero|resetlog|delete} [N{,N}] | flush\n"
 "table N {add ip[/bits] [value] | delete ip[/bits] | flush | list}\n"
 "\n"
 "RULE-BODY:	check-state [PARAMS] | ACTION [PARAMS] ADDR [OPTION_LIST]\n"
 "ACTION:	check-state | allow | count | deny | unreach{,6} CODE |\n"
 "               skipto N | {divert|tee} PORT | forward ADDR |\n"
-"               pipe N | queue N | nat N\n"
+"               pipe N | queue N | nat N | setfib FIB\n"
 "PARAMS: 	[log [logamount LOGLIMIT]] [altq QUEUE_NAME]\n"
 "ADDR:		[ MAC dst src ether_type ] \n"
 "		[ ip from IPADDR [ PORT ] to IPADDR [ PORTLIST ] ]\n"
 "		[ ipv6|ip6 from IP6ADDR [ PORT ] to IP6ADDR [ PORTLIST ] ]\n"
 "IPADDR:	[not] { any | me | ip/bits{x,y,z} | table(t[,v]) | IPLIST }\n"
 "IP6ADDR:	[not] { any | me | me6 | ip6/bits | IP6LIST }\n"
 "IP6LIST:	{ ip6 | ip6/bits }[,IP6LIST]\n"
 "IPLIST:	{ ip | ip/bits | ip:mask }[,IPLIST]\n"
 "OPTION_LIST:	OPTION [OPTION_LIST]\n"
 "OPTION:	bridged | diverted | diverted-loopback | diverted-output |\n"
 "	{dst-ip|src-ip} IPADDR | {dst-ip6|src-ip6|dst-ipv6|src-ipv6} IP6ADDR |\n"
 "	{dst-port|src-port} LIST |\n"
 "	estab | frag | {gid|uid} N | icmptypes LIST | in | out | ipid LIST |\n"
 "	iplen LIST | ipoptions SPEC | ipprecedence | ipsec | iptos SPEC |\n"
 "	ipttl LIST | ipversion VER | keep-state | layer2 | limit ... |\n"
-"	icmp6types LIST | ext6hdr LIST | flow-id N[,N] |\n"
+"	icmp6types LIST | ext6hdr LIST | flow-id N[,N] | fib FIB |\n"
 "	mac ... | mac-type LIST | proto LIST | {recv|xmit|via} {IF|IPADDR} |\n"
 "	setup | {tcpack|tcpseq|tcpwin} NN | tcpflags SPEC | tcpoptions SPEC |\n"
 "	tcpdatalen LIST | verrevpath | versrcreach | antispoof\n"
 );
 exit(0);
 }
 
 
 static int
 lookup_host (char *host, struct in_addr *ipaddr)
 {
 	struct hostent *he;
 
 	if (!inet_aton(host, ipaddr)) {
 		if ((he = gethostbyname(host)) == NULL)
 			return(-1);
 		*ipaddr = *(struct in_addr *)he->h_addr_list[0];
 	}
 	return(0);
 }
 
 /*
  * fills the addr and mask fields in the instruction as appropriate from av.
  * Update length as appropriate.
  * The following formats are allowed:
  *	me	returns O_IP_*_ME
  *	1.2.3.4		single IP address
  *	1.2.3.4:5.6.7.8	address:mask
  *	1.2.3.4/24	address/mask
  *	1.2.3.4/26{1,6,5,4,23}	set of addresses in a subnet
  * We can have multiple comma-separated address/mask entries.
  */
 static void
 fill_ip(ipfw_insn_ip *cmd, char *av)
 {
 	int len = 0;
 	uint32_t *d = ((ipfw_insn_u32 *)cmd)->d;
 
 	cmd->o.len &= ~F_LEN_MASK;	/* zero len */
 
 	if (_substrcmp(av, "any") == 0)
 		return;
 
 	if (_substrcmp(av, "me") == 0) {
 		cmd->o.len |= F_INSN_SIZE(ipfw_insn);
 		return;
 	}
 
 	if (strncmp(av, "table(", 6) == 0) {
 		char *p = strchr(av + 6, ',');
 
 		if (p)
 			*p++ = '\0';
 		cmd->o.opcode = O_IP_DST_LOOKUP;
 		cmd->o.arg1 = strtoul(av + 6, NULL, 0);
 		if (p) {
 			cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32);
 			d[0] = strtoul(p, NULL, 0);
 		} else
 			cmd->o.len |= F_INSN_SIZE(ipfw_insn);
 		return;
 	}
 
     while (av) {
 	/*
 	 * After the address we can have '/' or ':' indicating a mask,
 	 * ',' indicating another address follows, '{' indicating a
 	 * set of addresses of unspecified size.
 	 */
 	char *t = NULL, *p = strpbrk(av, "/:,{");
 	int masklen;
 	char md, nd;
 
 	if (p) {
 		md = *p;
 		*p++ = '\0';
 		if ((t = strpbrk(p, ",{")) != NULL) {
 			nd = *t;
 			*t = '\0';
 		}
 	} else
 		md = '\0';
 
 	if (lookup_host(av, (struct in_addr *)&d[0]) != 0)
 		errx(EX_NOHOST, "hostname ``%s'' unknown", av);
 	switch (md) {
 	case ':':
 		if (!inet_aton(p, (struct in_addr *)&d[1]))
 			errx(EX_DATAERR, "bad netmask ``%s''", p);
 		break;
 	case '/':
 		masklen = atoi(p);
 		if (masklen == 0)
 			d[1] = htonl(0);	/* mask */
 		else if (masklen > 32)
 			errx(EX_DATAERR, "bad width ``%s''", p);
 		else
 			d[1] = htonl(~0 << (32 - masklen));
 		break;
 	case '{':	/* no mask, assume /24 and put back the '{' */
 		d[1] = htonl(~0 << (32 - 24));
 		*(--p) = md;
 		break;
 
 	case ',':	/* single address plus continuation */
 		*(--p) = md;
 		/* FALLTHROUGH */
 	case 0:		/* initialization value */
 	default:
 		d[1] = htonl(~0);	/* force /32 */
 		break;
 	}
 	d[0] &= d[1];		/* mask base address with mask */
 	if (t)
 		*t = nd;
 	/* find next separator */
 	if (p)
 		p = strpbrk(p, ",{");
 	if (p && *p == '{') {
 		/*
 		 * We have a set of addresses. They are stored as follows:
 		 *   arg1	is the set size (powers of 2, 2..256)
 		 *   addr	is the base address IN HOST FORMAT
 		 *   mask..	is an array of arg1 bits (rounded up to
 		 *		the next multiple of 32) with bits set
 		 *		for each host in the map.
 		 */
 		uint32_t *map = (uint32_t *)&cmd->mask;
 		int low, high;
 		int i = contigmask((uint8_t *)&(d[1]), 32);
 
 		if (len > 0)
 			errx(EX_DATAERR, "address set cannot be in a list");
 		if (i < 24 || i > 31)
 			errx(EX_DATAERR, "invalid set with mask %d\n", i);
 		cmd->o.arg1 = 1<<(32-i);	/* map length		*/
 		d[0] = ntohl(d[0]);		/* base addr in host format */
 		cmd->o.opcode = O_IP_DST_SET;	/* default */
 		cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32) + (cmd->o.arg1+31)/32;
 		for (i = 0; i < (cmd->o.arg1+31)/32 ; i++)
 			map[i] = 0;	/* clear map */
 
 		av = p + 1;
 		low = d[0] & 0xff;
 		high = low + cmd->o.arg1 - 1;
 		/*
 		 * Here, i stores the previous value when we specify a range
 		 * of addresses within a mask, e.g. 45-63. i = -1 means we
 		 * have no previous value.
 		 */
 		i = -1;	/* previous value in a range */
 		while (isdigit(*av)) {
 			char *s;
 			int a = strtol(av, &s, 0);
 
 			if (s == av) { /* no parameter */
 			    if (*av != '}')
 				errx(EX_DATAERR, "set not closed\n");
 			    if (i != -1)
 				errx(EX_DATAERR, "incomplete range %d-", i);
 			    break;
 			}
 			if (a < low || a > high)
 			    errx(EX_DATAERR, "addr %d out of range [%d-%d]\n",
 				a, low, high);
 			a -= low;
 			if (i == -1)	/* no previous in range */
 			    i = a;
 			else {		/* check that range is valid */
 			    if (i > a)
 				errx(EX_DATAERR, "invalid range %d-%d",
 					i+low, a+low);
 			    if (*s == '-')
 				errx(EX_DATAERR, "double '-' in range");
 			}
 			for (; i <= a; i++)
 			    map[i/32] |= 1<<(i & 31);
 			i = -1;
 			if (*s == '-')
 			    i = a;
 			else if (*s == '}')
 			    break;
 			av = s+1;
 		}
 		return;
 	}
 	av = p;
 	if (av)			/* then *av must be a ',' */
 		av++;
 
 	/* Check this entry */
 	if (d[1] == 0) { /* "any", specified as x.x.x.x/0 */
 		/*
 		 * 'any' turns the entire list into a NOP.
 		 * 'not any' never matches, so it is removed from the
 		 * list unless it is the only item, in which case we
 		 * report an error.
 		 */
 		if (cmd->o.len & F_NOT) {	/* "not any" never matches */
 			if (av == NULL && len == 0) /* only this entry */
 				errx(EX_DATAERR, "not any never matches");
 		}
 		/* else do nothing and skip this entry */
 		return;
 	}
 	/* A single IP can be stored in an optimized format */
 	if (d[1] == IP_MASK_ALL && av == NULL && len == 0) {
 		cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32);
 		return;
 	}
 	len += 2;	/* two words... */
 	d += 2;
     } /* end while */
     if (len + 1 > F_LEN_MASK)
 	errx(EX_DATAERR, "address list too long");
     cmd->o.len |= len+1;
 }
 
 
 /* Try to find ipv6 address by hostname */
 static int
 lookup_host6 (char *host, struct in6_addr *ip6addr)
 {
 	struct hostent *he;
 
 	if (!inet_pton(AF_INET6, host, ip6addr)) {
 		if ((he = gethostbyname2(host, AF_INET6)) == NULL)
 			return(-1);
 		memcpy(ip6addr, he->h_addr_list[0], sizeof( struct in6_addr));
 	}
 	return(0);
 }
 
 
 /* n2mask sets n bits of the mask */
 static void
 n2mask(struct in6_addr *mask, int n)
 {
 	static int	minimask[9] =
 	    { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
 	u_char		*p;
 
 	memset(mask, 0, sizeof(struct in6_addr));
 	p = (u_char *) mask;
 	for (; n > 0; p++, n -= 8) {
 		if (n >= 8)
 			*p = 0xff;
 		else
 			*p = minimask[n];
 	}
 	return;
 }
  
 
 /*
  * fill the addr and mask fields in the instruction as appropriate from av.
  * Update length as appropriate.
  * The following formats are allowed:
  *     any     matches any IP6. Actually returns an empty instruction.
  *     me      returns O_IP6_*_ME
  *
  *     03f1::234:123:0342                single IP6 addres
  *     03f1::234:123:0342/24            address/mask
  *     03f1::234:123:0342/24,03f1::234:123:0343/               List of address
  *
  * Set of address (as in ipv6) not supported because ipv6 address
  * are typically random past the initial prefix.
  * Return 1 on success, 0 on failure.
  */
 static int
 fill_ip6(ipfw_insn_ip6 *cmd, char *av)
 {
 	int len = 0;
 	struct in6_addr *d = &(cmd->addr6);
 	/*
 	 * Needed for multiple address.
 	 * Note d[1] points to struct in6_add r mask6 of cmd
 	 */
 
        cmd->o.len &= ~F_LEN_MASK;	/* zero len */
 
        if (strcmp(av, "any") == 0)
 	       return (1);
 
 
        if (strcmp(av, "me") == 0) {	/* Set the data for "me" opt*/
 	       cmd->o.len |= F_INSN_SIZE(ipfw_insn);
 	       return (1);
        }
 
        if (strcmp(av, "me6") == 0) {	/* Set the data for "me" opt*/
 	       cmd->o.len |= F_INSN_SIZE(ipfw_insn);
 	       return (1);
        }
 
        av = strdup(av);
        while (av) {
 		/*
 		 * After the address we can have '/' indicating a mask,
 		 * or ',' indicating another address follows.
 		 */
 
 		char *p;
 		int masklen;
 		char md = '\0';
 
 		if ((p = strpbrk(av, "/,")) ) {
 			md = *p;	/* save the separator */
 			*p = '\0';	/* terminate address string */
 			p++;		/* and skip past it */
 		}
 		/* now p points to NULL, mask or next entry */
 
 		/* lookup stores address in *d as a side effect */
 		if (lookup_host6(av, d) != 0) {
 			/* XXX: failed. Free memory and go */
 			errx(EX_DATAERR, "bad address \"%s\"", av);
 		}
 		/* next, look at the mask, if any */
 		masklen = (md == '/') ? atoi(p) : 128;
 		if (masklen > 128 || masklen < 0)
 			errx(EX_DATAERR, "bad width \"%s\''", p);
 		else
 			n2mask(&d[1], masklen);
 
 		APPLY_MASK(d, &d[1])   /* mask base address with mask */
 
 		/* find next separator */
 
 		if (md == '/') {	/* find separator past the mask */
 			p = strpbrk(p, ",");
 			if (p != NULL)
 				p++;
 		}
 		av = p;
 
 		/* Check this entry */
 		if (masklen == 0) {
 			/*
 			 * 'any' turns the entire list into a NOP.
 			 * 'not any' never matches, so it is removed from the
 			 * list unless it is the only item, in which case we
 			 * report an error.
 			 */
 			if (cmd->o.len & F_NOT && av == NULL && len == 0)
 				errx(EX_DATAERR, "not any never matches");
 			continue;
 		}
 
 		/*
 		 * A single IP can be stored alone
 		 */
 		if (masklen == 128 && av == NULL && len == 0) {
 			len = F_INSN_SIZE(struct in6_addr);
 			break;
 		}
 
 		/* Update length and pointer to arguments */
 		len += F_INSN_SIZE(struct in6_addr)*2;
 		d += 2;
 	} /* end while */
 
 	/*
 	 * Total length of the command, remember that 1 is the size of
 	 * the base command.
 	 */
 	if (len + 1 > F_LEN_MASK)
 		errx(EX_DATAERR, "address list too long");
 	cmd->o.len |= len+1;
 	free(av);
 	return (1);
 }
 
 /*
  * fills command for ipv6 flow-id filtering
  * note that the 20 bit flow number is stored in a array of u_int32_t
  * it's supported lists of flow-id, so in the o.arg1 we store how many
  * additional flow-id we want to filter, the basic is 1
  */
 void
 fill_flow6( ipfw_insn_u32 *cmd, char *av )
 {
 	u_int32_t type;	 /* Current flow number */
 	u_int16_t nflow = 0;    /* Current flow index */
 	char *s = av;
 	cmd->d[0] = 0;	  /* Initializing the base number*/
 
 	while (s) {
 		av = strsep( &s, ",") ;
 		type = strtoul(av, &av, 0);
 		if (*av != ',' && *av != '\0')
 			errx(EX_DATAERR, "invalid ipv6 flow number %s", av);
 		if (type > 0xfffff)
 			errx(EX_DATAERR, "flow number out of range %s", av);
 		cmd->d[nflow] |= type;
 		nflow++;
 	}
 	if( nflow > 0 ) {
 		cmd->o.opcode = O_FLOW6ID;
 		cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32) + nflow;
 		cmd->o.arg1 = nflow;
 	}
 	else {
 		errx(EX_DATAERR, "invalid ipv6 flow number %s", av);
 	}
 }
 
 static ipfw_insn *
 add_srcip6(ipfw_insn *cmd, char *av)
 {
 
 	fill_ip6((ipfw_insn_ip6 *)cmd, av);
 	if (F_LEN(cmd) == 0)				/* any */
 		;
 	if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) {	/* "me" */
 		cmd->opcode = O_IP6_SRC_ME;
 	} else if (F_LEN(cmd) ==
 	    (F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn))) {
 		/* single IP, no mask*/
 		cmd->opcode = O_IP6_SRC;
 	} else {					/* addr/mask opt */
 		cmd->opcode = O_IP6_SRC_MASK;
 	}
 	return cmd;
 }
 
 static ipfw_insn *
 add_dstip6(ipfw_insn *cmd, char *av)
 {
 
 	fill_ip6((ipfw_insn_ip6 *)cmd, av);
 	if (F_LEN(cmd) == 0)				/* any */
 		;
 	if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) {	/* "me" */
 		cmd->opcode = O_IP6_DST_ME;
 	} else if (F_LEN(cmd) ==
 	    (F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn))) {
 		/* single IP, no mask*/
 		cmd->opcode = O_IP6_DST;
 	} else {					/* addr/mask opt */
 		cmd->opcode = O_IP6_DST_MASK;
 	}
 	return cmd;
 }
 
 
 /*
  * helper function to process a set of flags and set bits in the
  * appropriate masks.
  */
 static void
 fill_flags(ipfw_insn *cmd, enum ipfw_opcodes opcode,
 	struct _s_x *flags, char *p)
 {
 	uint8_t set=0, clear=0;
 
 	while (p && *p) {
 		char *q;	/* points to the separator */
 		int val;
 		uint8_t *which;	/* mask we are working on */
 
 		if (*p == '!') {
 			p++;
 			which = &clear;
 		} else
 			which = &set;
 		q = strchr(p, ',');
 		if (q)
 			*q++ = '\0';
 		val = match_token(flags, p);
 		if (val <= 0)
 			errx(EX_DATAERR, "invalid flag %s", p);
 		*which |= (uint8_t)val;
 		p = q;
 	}
         cmd->opcode = opcode;
         cmd->len =  (cmd->len & (F_NOT | F_OR)) | 1;
         cmd->arg1 = (set & 0xff) | ( (clear & 0xff) << 8);
 }
 
 
 static void
 delete(int ac, char *av[])
 {
 	uint32_t rulenum;
 	struct dn_pipe p;
 	int i;
 	int exitval = EX_OK;
 	int do_set = 0;
 
 	memset(&p, 0, sizeof p);
 
 	av++; ac--;
 	NEED1("missing rule specification");
 	if (ac > 0 && _substrcmp(*av, "set") == 0) {
 		/* Do not allow using the following syntax:
 		 *	ipfw set N delete set M
 		 */
 		if (use_set)
 			errx(EX_DATAERR, "invalid syntax");
 		do_set = 1;	/* delete set */
 		ac--; av++;
 	}
 
 	/* Rule number */
 	while (ac && isdigit(**av)) {
 		i = atoi(*av); av++; ac--;
 		if (do_nat) {
 			exitval = do_cmd(IP_FW_NAT_DEL, &i, sizeof i);
 			if (exitval) {
 				exitval = EX_UNAVAILABLE;
 				warn("rule %u not available", i);
 			}
  		} else if (do_pipe) {
 			if (do_pipe == 1)
 				p.pipe_nr = i;
 			else
 				p.fs.fs_nr = i;
 			i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p);
 			if (i) {
 				exitval = 1;
 				warn("rule %u: setsockopt(IP_DUMMYNET_DEL)",
 				    do_pipe == 1 ? p.pipe_nr : p.fs.fs_nr);
 			}
 		} else {
 			if (use_set)
 				rulenum = (i & 0xffff) | (5 << 24) |
 				    ((use_set - 1) << 16);
 			else
 			rulenum =  (i & 0xffff) | (do_set << 24);
 			i = do_cmd(IP_FW_DEL, &rulenum, sizeof rulenum);
 			if (i) {
 				exitval = EX_UNAVAILABLE;
 				warn("rule %u: setsockopt(IP_FW_DEL)",
 				    rulenum);
 			}
 		}
 	}
 	if (exitval != EX_OK)
 		exit(exitval);
 }
 
 
 /*
  * fill the interface structure. We do not check the name as we can
  * create interfaces dynamically, so checking them at insert time
  * makes relatively little sense.
  * Interface names containing '*', '?', or '[' are assumed to be shell 
  * patterns which match interfaces.
  */
 static void
 fill_iface(ipfw_insn_if *cmd, char *arg)
 {
 	cmd->name[0] = '\0';
 	cmd->o.len |= F_INSN_SIZE(ipfw_insn_if);
 
 	/* Parse the interface or address */
 	if (strcmp(arg, "any") == 0)
 		cmd->o.len = 0;		/* effectively ignore this command */
 	else if (!isdigit(*arg)) {
 		strlcpy(cmd->name, arg, sizeof(cmd->name));
 		cmd->p.glob = strpbrk(arg, "*?[") != NULL ? 1 : 0;
 	} else if (!inet_aton(arg, &cmd->p.ip))
 		errx(EX_DATAERR, "bad ip address ``%s''", arg);
 }
 
 /* 
  * Search for interface with name "ifn", and fill n accordingly:
  *
  * n->ip        ip address of interface "ifn"
  * n->if_name   copy of interface name "ifn"
  */
 static void
 set_addr_dynamic(const char *ifn, struct cfg_nat *n)
 {
 	size_t needed;
 	int mib[6];
 	char *buf, *lim, *next;
 	struct if_msghdr *ifm;
 	struct ifa_msghdr *ifam;
 	struct sockaddr_dl *sdl;
 	struct sockaddr_in *sin;
 	int ifIndex, ifMTU;
 
 	mib[0] = CTL_NET;
 	mib[1] = PF_ROUTE;
 	mib[2] = 0;
 	mib[3] = AF_INET;	
 	mib[4] = NET_RT_IFLIST;
 	mib[5] = 0;		
 /*
  * Get interface data.
  */
 	if (sysctl(mib, 6, NULL, &needed, NULL, 0) == -1)
 		err(1, "iflist-sysctl-estimate");
 	if ((buf = malloc(needed)) == NULL)
 		errx(1, "malloc failed");
 	if (sysctl(mib, 6, buf, &needed, NULL, 0) == -1)
 		err(1, "iflist-sysctl-get");
 	lim = buf + needed;
 /*
  * Loop through interfaces until one with
  * given name is found. This is done to
  * find correct interface index for routing
  * message processing.
  */
 	ifIndex	= 0;
 	next = buf;
 	while (next < lim) {
 		ifm = (struct if_msghdr *)next;
 		next += ifm->ifm_msglen;
 		if (ifm->ifm_version != RTM_VERSION) {
 			if (verbose)
 				warnx("routing message version %d "
 				    "not understood", ifm->ifm_version);
 			continue;
 		}
 		if (ifm->ifm_type == RTM_IFINFO) {
 			sdl = (struct sockaddr_dl *)(ifm + 1);
 			if (strlen(ifn) == sdl->sdl_nlen &&
 			    strncmp(ifn, sdl->sdl_data, sdl->sdl_nlen) == 0) {
 				ifIndex = ifm->ifm_index;
 				ifMTU = ifm->ifm_data.ifi_mtu;
 				break;
 			}
 		}
 	}
 	if (!ifIndex)
 		errx(1, "unknown interface name %s", ifn);
 /*
  * Get interface address.
  */
 	sin = NULL;
 	while (next < lim) {
 		ifam = (struct ifa_msghdr *)next;
 		next += ifam->ifam_msglen;
 		if (ifam->ifam_version != RTM_VERSION) {
 			if (verbose)
 				warnx("routing message version %d "
 				    "not understood", ifam->ifam_version);
 			continue;
 		}
 		if (ifam->ifam_type != RTM_NEWADDR)
 			break;
 		if (ifam->ifam_addrs & RTA_IFA) {
 			int i;
 			char *cp = (char *)(ifam + 1);
 
 			for (i = 1; i < RTA_IFA; i <<= 1) {
 				if (ifam->ifam_addrs & i)
 					cp += SA_SIZE((struct sockaddr *)cp);
 			}
 			if (((struct sockaddr *)cp)->sa_family == AF_INET) {
 				sin = (struct sockaddr_in *)cp;
 				break;
 			}
 		}
 	}
 	if (sin == NULL)
 		errx(1, "%s: cannot get interface address", ifn);
 
 	n->ip = sin->sin_addr;
 	strncpy(n->if_name, ifn, IF_NAMESIZE);
 
 	free(buf);
 }
 
 /* 
  * XXX - The following functions, macros and definitions come from natd.c:
  * it would be better to move them outside natd.c, in a file 
  * (redirect_support.[ch]?) shared by ipfw and natd, but for now i can live 
  * with it.
  */
 
 /*
  * Definition of a port range, and macros to deal with values.
  * FORMAT:  HI 16-bits == first port in range, 0 == all ports.
  *          LO 16-bits == number of ports in range
  * NOTES:   - Port values are not stored in network byte order.
  */
 
 #define port_range u_long
 
 #define GETLOPORT(x)     ((x) >> 0x10)
 #define GETNUMPORTS(x)   ((x) & 0x0000ffff)
 #define GETHIPORT(x)     (GETLOPORT((x)) + GETNUMPORTS((x)))
 
 /* Set y to be the low-port value in port_range variable x. */
 #define SETLOPORT(x,y)   ((x) = ((x) & 0x0000ffff) | ((y) << 0x10))
 
 /* Set y to be the number of ports in port_range variable x. */
 #define SETNUMPORTS(x,y) ((x) = ((x) & 0xffff0000) | (y))
 
 static void 
 StrToAddr (const char* str, struct in_addr* addr)
 {
 	struct hostent* hp;
 
 	if (inet_aton (str, addr))
 		return;
 
 	hp = gethostbyname (str);
 	if (!hp)
 		errx (1, "unknown host %s", str);
 
 	memcpy (addr, hp->h_addr, sizeof (struct in_addr));
 }
 
 static int 
 StrToPortRange (const char* str, const char* proto, port_range *portRange)
 {
 	char*           sep;
 	struct servent*	sp;
 	char*		end;
 	u_short         loPort;
 	u_short         hiPort;
 	
 	/* First see if this is a service, return corresponding port if so. */
 	sp = getservbyname (str,proto);
 	if (sp) {
 	        SETLOPORT(*portRange, ntohs(sp->s_port));
 		SETNUMPORTS(*portRange, 1);
 		return 0;
 	}
 	        
 	/* Not a service, see if it's a single port or port range. */
 	sep = strchr (str, '-');
 	if (sep == NULL) {
 	        SETLOPORT(*portRange, strtol(str, &end, 10));
 		if (end != str) {
 		        /* Single port. */
 		        SETNUMPORTS(*portRange, 1);
 			return 0;
 		}
 
 		/* Error in port range field. */
 		errx (EX_DATAERR, "%s/%s: unknown service", str, proto);
 	}
 
 	/* Port range, get the values and sanity check. */
 	sscanf (str, "%hu-%hu", &loPort, &hiPort);
 	SETLOPORT(*portRange, loPort);
 	SETNUMPORTS(*portRange, 0);	/* Error by default */
 	if (loPort <= hiPort)
 	        SETNUMPORTS(*portRange, hiPort - loPort + 1);
 
 	if (GETNUMPORTS(*portRange) == 0)
 	        errx (EX_DATAERR, "invalid port range %s", str);
 
 	return 0;
 }
 
 static int 
 StrToProto (const char* str)
 {
 	if (!strcmp (str, "tcp"))
 		return IPPROTO_TCP;
 
 	if (!strcmp (str, "udp"))
 		return IPPROTO_UDP;
 
 	errx (EX_DATAERR, "unknown protocol %s. Expected tcp or udp", str);
 }
 
 static int 
 StrToAddrAndPortRange (const char* str, struct in_addr* addr, char* proto, 
 		       port_range *portRange)
 {
 	char*	ptr;
 
 	ptr = strchr (str, ':');
 	if (!ptr)
 		errx (EX_DATAERR, "%s is missing port number", str);
 
 	*ptr = '\0';
 	++ptr;
 
 	StrToAddr (str, addr);
 	return StrToPortRange (ptr, proto, portRange);
 }
 
 /* End of stuff taken from natd.c. */
 
 #define INC_ARGCV() do {        \
 	(*_av)++;               \
 	(*_ac)--;               \
 	av = *_av;              \
 	ac = *_ac;              \
 } while(0)
 
 /* 
  * The next 3 functions add support for the addr, port and proto redirect and 
  * their logic is loosely based on SetupAddressRedirect(), SetupPortRedirect() 
  * and SetupProtoRedirect() from natd.c.
  *
  * Every setup_* function fills at least one redirect entry 
  * (struct cfg_redir) and zero or more server pool entry (struct cfg_spool) 
  * in buf.
  * 
  * The format of data in buf is:
  * 
  *
  *     cfg_nat    cfg_redir    cfg_spool    ......  cfg_spool 
  *
  *    -------------------------------------        ------------
  *   |          | .....X ... |          |         |           |  .....
  *    ------------------------------------- ...... ------------
  *                     ^          
  *                spool_cnt       n=0       ......   n=(X-1)
  *
  * len points to the amount of available space in buf
  * space counts the memory consumed by every function
  *
  * XXX - Every function get all the argv params so it 
  * has to check, in optional parameters, that the next
  * args is a valid option for the redir entry and not 
  * another token. Only redir_port and redir_proto are 
  * affected by this.
  */
 
 static int
 setup_redir_addr(char *spool_buf, int len,
 		 int *_ac, char ***_av) 
 {
 	char **av, *sep; /* Token separator. */
 	/* Temporary buffer used to hold server pool ip's. */
 	char tmp_spool_buf[NAT_BUF_LEN]; 
 	int ac, i, space, lsnat;
 	struct cfg_redir *r;	
 	struct cfg_spool *tmp;		
 
 	av = *_av;
 	ac = *_ac;
 	space = 0;
 	lsnat = 0;
 	if (len >= SOF_REDIR) {
 		r = (struct cfg_redir *)spool_buf;
 		/* Skip cfg_redir at beginning of buf. */
 		spool_buf = &spool_buf[SOF_REDIR];
 		space = SOF_REDIR;
 		len -= SOF_REDIR;
 	} else 
 		goto nospace; 
 	r->mode = REDIR_ADDR;
 	/* Extract local address. */
 	if (ac == 0) 
 		errx(EX_DATAERR, "redirect_addr: missing local address");
 	sep = strchr(*av, ',');
 	if (sep) {		/* LSNAT redirection syntax. */
 		r->laddr.s_addr = INADDR_NONE;
 		/* Preserve av, copy spool servers to tmp_spool_buf. */
 		strncpy(tmp_spool_buf, *av, strlen(*av)+1);
 		lsnat = 1;
 	} else 
 		StrToAddr(*av, &r->laddr);		
 	INC_ARGCV();
 
 	/* Extract public address. */
 	if (ac == 0) 
 		errx(EX_DATAERR, "redirect_addr: missing public address");
 	StrToAddr(*av, &r->paddr);
 	INC_ARGCV();
 
 	/* Setup LSNAT server pool. */
 	if (sep) {
 		sep = strtok(tmp_spool_buf, ",");		
 		while (sep != NULL) {
 			tmp = (struct cfg_spool *)spool_buf;		
 			if (len < SOF_SPOOL)
 				goto nospace;
 			len -= SOF_SPOOL;
 			space += SOF_SPOOL;			
 			StrToAddr(sep, &tmp->addr);
 			tmp->port = ~0;
 			r->spool_cnt++;
 			/* Point to the next possible cfg_spool. */
 			spool_buf = &spool_buf[SOF_SPOOL];
 			sep = strtok(NULL, ",");
 		}
 	}
 	return(space);
 nospace:
 	errx(EX_DATAERR, "redirect_addr: buf is too small\n");
 }
 
 static int
 setup_redir_port(char *spool_buf, int len,
 		 int *_ac, char ***_av) 
 {
 	char **av, *sep, *protoName;
 	char tmp_spool_buf[NAT_BUF_LEN];
 	int ac, space, lsnat;
 	struct cfg_redir *r;
 	struct cfg_spool *tmp;
 	u_short numLocalPorts;
 	port_range portRange;	
 
 	av = *_av;
 	ac = *_ac;
 	space = 0;
 	lsnat = 0;
 	numLocalPorts = 0;	
 
 	if (len >= SOF_REDIR) {
 		r = (struct cfg_redir *)spool_buf;
 		/* Skip cfg_redir at beginning of buf. */
 		spool_buf = &spool_buf[SOF_REDIR];
 		space = SOF_REDIR;
 		len -= SOF_REDIR;
 	} else 
 		goto nospace; 
 	r->mode = REDIR_PORT;
 	/*
 	 * Extract protocol.
 	 */
 	if (ac == 0)
 		errx (EX_DATAERR, "redirect_port: missing protocol");
 	r->proto = StrToProto(*av);
 	protoName = *av;	
 	INC_ARGCV();
 
 	/*
 	 * Extract local address.
 	 */
 	if (ac == 0)
 		errx (EX_DATAERR, "redirect_port: missing local address");
 
 	sep = strchr(*av, ',');
 	/* LSNAT redirection syntax. */
 	if (sep) {
 		r->laddr.s_addr = INADDR_NONE;
 		r->lport = ~0;
 		numLocalPorts = 1;
 		/* Preserve av, copy spool servers to tmp_spool_buf. */
 		strncpy(tmp_spool_buf, *av, strlen(*av)+1);
 		lsnat = 1;
 	} else {
 		if (StrToAddrAndPortRange (*av, &r->laddr, protoName, 
 		    &portRange) != 0)
 			errx(EX_DATAERR, "redirect_port:"
 			    "invalid local port range");
 
 		r->lport = GETLOPORT(portRange);
 		numLocalPorts = GETNUMPORTS(portRange);
 	}
 	INC_ARGCV();	
 
 	/*
 	 * Extract public port and optionally address.
 	 */
 	if (ac == 0)
 		errx (EX_DATAERR, "redirect_port: missing public port");
 
 	sep = strchr (*av, ':');
 	if (sep) {
 	        if (StrToAddrAndPortRange (*av, &r->paddr, protoName, 
 		    &portRange) != 0)
 		        errx(EX_DATAERR, "redirect_port:" 
 			    "invalid public port range");
 	} else {
 		r->paddr.s_addr = INADDR_ANY;
 		if (StrToPortRange (*av, protoName, &portRange) != 0)
 		        errx(EX_DATAERR, "redirect_port:"
 			    "invalid public port range");
 	}
 
 	r->pport = GETLOPORT(portRange);
 	r->pport_cnt = GETNUMPORTS(portRange);
 	INC_ARGCV();
 
 	/*
 	 * Extract remote address and optionally port.
 	 */	
 	/* 
 	 * NB: isalpha(**av) => we've to check that next parameter is really an
 	 * option for this redirect entry, else stop here processing arg[cv].
 	 */
 	if (ac != 0 && !isalpha(**av)) { 
 		sep = strchr (*av, ':');
 		if (sep) {
 		        if (StrToAddrAndPortRange (*av, &r->raddr, protoName, 
 			    &portRange) != 0)
 				errx(EX_DATAERR, "redirect_port:"
 				    "invalid remote port range");
 		} else {
 		        SETLOPORT(portRange, 0);
 			SETNUMPORTS(portRange, 1);
 			StrToAddr (*av, &r->raddr);
 		}
 		INC_ARGCV();
 	} else {
 		SETLOPORT(portRange, 0);
 		SETNUMPORTS(portRange, 1);
 		r->raddr.s_addr = INADDR_ANY;
 	}
 	r->rport = GETLOPORT(portRange);
 	r->rport_cnt = GETNUMPORTS(portRange);
 
 	/* 
 	 * Make sure port ranges match up, then add the redirect ports.
 	 */
 	if (numLocalPorts != r->pport_cnt)
 	        errx(EX_DATAERR, "redirect_port:"
 		    "port ranges must be equal in size");
 
 	/* Remote port range is allowed to be '0' which means all ports. */
 	if (r->rport_cnt != numLocalPorts && 
 	    (r->rport_cnt != 1 || r->rport != 0))
 	        errx(EX_DATAERR, "redirect_port: remote port must"
 		    "be 0 or equal to local port range in size");
 
 	/*
 	 * Setup LSNAT server pool.
 	 */
 	if (lsnat) {
 		sep = strtok(tmp_spool_buf, ",");
 		while (sep != NULL) {
 			tmp = (struct cfg_spool *)spool_buf;
 			if (len < SOF_SPOOL)
 				goto nospace;
 			len -= SOF_SPOOL;
 			space += SOF_SPOOL;
 			if (StrToAddrAndPortRange(sep, &tmp->addr, protoName, 
 			    &portRange) != 0)
 				errx(EX_DATAERR, "redirect_port:"
 				    "invalid local port range");
 			if (GETNUMPORTS(portRange) != 1)
 				errx(EX_DATAERR, "redirect_port: local port"
 				    "must be single in this context");
 			tmp->port = GETLOPORT(portRange);
 			r->spool_cnt++;	
 			/* Point to the next possible cfg_spool. */
 			spool_buf = &spool_buf[SOF_SPOOL];
 			sep = strtok(NULL, ",");
 		}
 	}
 	return (space);
 nospace:
 	errx(EX_DATAERR, "redirect_port: buf is too small\n");
 }
 
 static int
 setup_redir_proto(char *spool_buf, int len,
 		 int *_ac, char ***_av) 
 {
 	char **av;
 	int ac, i, space;
 	struct protoent *protoent;
 	struct cfg_redir *r;
 	
 	av = *_av;
 	ac = *_ac;
 	if (len >= SOF_REDIR) {
 		r = (struct cfg_redir *)spool_buf;
 		/* Skip cfg_redir at beginning of buf. */
 		spool_buf = &spool_buf[SOF_REDIR];
 		space = SOF_REDIR;
 		len -= SOF_REDIR;
 	} else 
 		goto nospace;
 	r->mode = REDIR_PROTO;
 	/*
 	 * Extract protocol.
 	 */	
 	if (ac == 0)
 		errx(EX_DATAERR, "redirect_proto: missing protocol");
 
 	protoent = getprotobyname(*av);
 	if (protoent == NULL)
 		errx(EX_DATAERR, "redirect_proto: unknown protocol %s", *av);
 	else
 		r->proto = protoent->p_proto;
 
 	INC_ARGCV();
 	
 	/*
 	 * Extract local address.
 	 */
 	if (ac == 0)
 		errx(EX_DATAERR, "redirect_proto: missing local address");
 	else
 		StrToAddr(*av, &r->laddr);
 
 	INC_ARGCV();
 	
 	/*
 	 * Extract optional public address.
 	 */
 	if (ac == 0) {
 		r->paddr.s_addr = INADDR_ANY;		
 		r->raddr.s_addr = INADDR_ANY;	
 	} else {
 		/* see above in setup_redir_port() */
 		if (!isalpha(**av)) {
 			StrToAddr(*av, &r->paddr);			
 			INC_ARGCV();
 		
 			/*
 			 * Extract optional remote address.
 			 */	
 			/* see above in setup_redir_port() */
 			if (ac!=0 && !isalpha(**av)) {
 				StrToAddr(*av, &r->raddr);
 				INC_ARGCV();
 			}
 		}		
 	}
 	return (space);
 nospace:
 	errx(EX_DATAERR, "redirect_proto: buf is too small\n");
 }
 
 static void
 show_nat(int ac, char **av);
 
 static void
 print_nat_config(char *buf) {
 	struct cfg_nat *n;
 	int i, cnt, flag, off;
 	struct cfg_redir *t;
 	struct cfg_spool *s;
 	struct protoent *p;
 
 	n = (struct cfg_nat *)buf;
 	flag = 1;
 	off  = sizeof(*n);
 	printf("ipfw nat %u config", n->id);
 	if (strlen(n->if_name) != 0)
 		printf(" if %s", n->if_name);
 	else if (n->ip.s_addr != 0)
 		printf(" ip %s", inet_ntoa(n->ip));
 	while (n->mode != 0) {
 		if (n->mode & PKT_ALIAS_LOG) {
 			printf(" log");
 			n->mode &= ~PKT_ALIAS_LOG;
 		} else if (n->mode & PKT_ALIAS_DENY_INCOMING) {
 			printf(" deny_in");
 			n->mode &= ~PKT_ALIAS_DENY_INCOMING;
 		} else if (n->mode & PKT_ALIAS_SAME_PORTS) {
 			printf(" same_ports");
 			n->mode &= ~PKT_ALIAS_SAME_PORTS;
 		} else if (n->mode & PKT_ALIAS_UNREGISTERED_ONLY) {
 			printf(" unreg_only");
 			n->mode &= ~PKT_ALIAS_UNREGISTERED_ONLY;
 		} else if (n->mode & PKT_ALIAS_RESET_ON_ADDR_CHANGE) {
 			printf(" reset");
 			n->mode &= ~PKT_ALIAS_RESET_ON_ADDR_CHANGE;
 		} else if (n->mode & PKT_ALIAS_REVERSE) {
 			printf(" reverse");
 			n->mode &= ~PKT_ALIAS_REVERSE;
 		} else if (n->mode & PKT_ALIAS_PROXY_ONLY) {
 			printf(" proxy_only");
 			n->mode &= ~PKT_ALIAS_PROXY_ONLY;
 		}
 	}
 	/* Print all the redirect's data configuration. */
 	for (cnt = 0; cnt < n->redir_cnt; cnt++) {
 		t = (struct cfg_redir *)&buf[off];
 		off += SOF_REDIR;
 		switch (t->mode) {
 		case REDIR_ADDR:
 			printf(" redirect_addr");
 			if (t->spool_cnt == 0)
 				printf(" %s", inet_ntoa(t->laddr));
 			else
 				for (i = 0; i < t->spool_cnt; i++) {
 					s = (struct cfg_spool *)&buf[off];
 					if (i)
 						printf(",");
 					else 
 						printf(" ");
 					printf("%s", inet_ntoa(s->addr));
 					off += SOF_SPOOL;
 				}
 			printf(" %s", inet_ntoa(t->paddr));
 			break;
 		case REDIR_PORT:
 			p = getprotobynumber(t->proto);
 			printf(" redirect_port %s ", p->p_name);
 			if (!t->spool_cnt) {
 				printf("%s:%u", inet_ntoa(t->laddr), t->lport);
 				if (t->pport_cnt > 1)
 					printf("-%u", t->lport + 
 					    t->pport_cnt - 1);
 			} else
 				for (i=0; i < t->spool_cnt; i++) {
 					s = (struct cfg_spool *)&buf[off];
 					if (i)
 						printf(",");
 					printf("%s:%u", inet_ntoa(s->addr), 
 					    s->port);
 					off += SOF_SPOOL;
 				}
 
 			printf(" ");
 			if (t->paddr.s_addr)
 				printf("%s:", inet_ntoa(t->paddr)); 
 			printf("%u", t->pport);
 			if (!t->spool_cnt && t->pport_cnt > 1)
 				printf("-%u", t->pport + t->pport_cnt - 1);
 
 			if (t->raddr.s_addr) {
 				printf(" %s", inet_ntoa(t->raddr));
 				if (t->rport) {
 					printf(":%u", t->rport);
 					if (!t->spool_cnt && t->rport_cnt > 1)
 						printf("-%u", t->rport + 
 						    t->rport_cnt - 1);
 				}
 			}
 			break;
 		case REDIR_PROTO:
 			p = getprotobynumber(t->proto);
 			printf(" redirect_proto %s %s", p->p_name, 
 			    inet_ntoa(t->laddr));
 			if (t->paddr.s_addr != 0) {
 				printf(" %s", inet_ntoa(t->paddr));
 				if (t->raddr.s_addr)
 					printf(" %s", inet_ntoa(t->raddr));
 			}
 			break;
 		default:
 			errx(EX_DATAERR, "unknown redir mode");
 			break;
 		}
 	}
 	printf("\n");
 }
 
 static void
 config_nat(int ac, char **av)
 {
 	struct cfg_nat *n;              /* Nat instance configuration. */
 	struct in_addr ip;
 	int i, len, off, tok;
 	char *id, buf[NAT_BUF_LEN]; 	/* Buffer for serialized data. */
 	
 	len = NAT_BUF_LEN;
 	/* Offset in buf: save space for n at the beginning. */
 	off = sizeof(*n);
 	memset(buf, 0, sizeof(buf));
 	n = (struct cfg_nat *)buf;
 
 	av++; ac--;
 	/* Nat id. */
 	if (ac && isdigit(**av)) {
 		id = *av;
 		i = atoi(*av); 
 		ac--; av++;		
 		n->id = i;
 	} else 
 		errx(EX_DATAERR, "missing nat id");
 	if (ac == 0) 
 		errx(EX_DATAERR, "missing option");
 
 	while (ac > 0) {
 		tok = match_token(nat_params, *av);
 		ac--; av++;
 		switch (tok) {
 		case TOK_IP:
 			if (ac == 0) 
 				errx(EX_DATAERR, "missing option");
 			if (!inet_aton(av[0], &(n->ip)))
 				errx(EX_DATAERR, "bad ip address ``%s''", 
 				    av[0]);
 			ac--; av++;
 			break;	    
 		case TOK_IF:
 			if (ac == 0) 
 				errx(EX_DATAERR, "missing option");
 			set_addr_dynamic(av[0], n);
 			ac--; av++;
 			break;
 		case TOK_ALOG:
 			n->mode |= PKT_ALIAS_LOG;
 			break;
 		case TOK_DENY_INC:
 			n->mode |= PKT_ALIAS_DENY_INCOMING;
 			break;
 		case TOK_SAME_PORTS:
 			n->mode |= PKT_ALIAS_SAME_PORTS;
 			break;
 		case TOK_UNREG_ONLY:
 			n->mode |= PKT_ALIAS_UNREGISTERED_ONLY;
 			break;
 		case TOK_RESET_ADDR:
 			n->mode |= PKT_ALIAS_RESET_ON_ADDR_CHANGE;
 			break;
 		case TOK_ALIAS_REV:
 			n->mode |= PKT_ALIAS_REVERSE;
 			break;
 		case TOK_PROXY_ONLY:
 			n->mode |= PKT_ALIAS_PROXY_ONLY;
 			break;
 			/* 
 			 * All the setup_redir_* functions work directly in the final 
 			 * buffer, see above for details.
 			 */
 		case TOK_REDIR_ADDR:
 		case TOK_REDIR_PORT:
 		case TOK_REDIR_PROTO:
 			switch (tok) {
 			case TOK_REDIR_ADDR:
 				i = setup_redir_addr(&buf[off], len, &ac, &av);
 				break;			  
 			case TOK_REDIR_PORT:
 				i = setup_redir_port(&buf[off], len, &ac, &av);
 				break;			  
 			case TOK_REDIR_PROTO:
 				i = setup_redir_proto(&buf[off], len, &ac, &av);
 				break;
 			}
 			n->redir_cnt++;
 			off += i;
 			len -= i;
 			break;
 		default:
 			errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
 		}
 	}
 
 	i = do_cmd(IP_FW_NAT_CFG, buf, off);
 	if (i)
 		err(1, "setsockopt(%s)", "IP_FW_NAT_CFG");
 
 	/* After every modification, we show the resultant rule. */
 	int _ac = 3;
 	char *_av[] = {"show", "config", id};
 	show_nat(_ac, _av);
 }
 
 static void
 config_pipe(int ac, char **av)
 {
 	struct dn_pipe p;
 	int i;
 	char *end;
 	void *par = NULL;
 
 	memset(&p, 0, sizeof p);
 
 	av++; ac--;
 	/* Pipe number */
 	if (ac && isdigit(**av)) {
 		i = atoi(*av); av++; ac--;
 		if (do_pipe == 1)
 			p.pipe_nr = i;
 		else
 			p.fs.fs_nr = i;
 	}
 	while (ac > 0) {
 		double d;
 		int tok = match_token(dummynet_params, *av);
 		ac--; av++;
 
 		switch(tok) {
 		case TOK_NOERROR:
 			p.fs.flags_fs |= DN_NOERROR;
 			break;
 
 		case TOK_PLR:
 			NEED1("plr needs argument 0..1\n");
 			d = strtod(av[0], NULL);
 			if (d > 1)
 				d = 1;
 			else if (d < 0)
 				d = 0;
 			p.fs.plr = (int)(d*0x7fffffff);
 			ac--; av++;
 			break;
 
 		case TOK_QUEUE:
 			NEED1("queue needs queue size\n");
 			end = NULL;
 			p.fs.qsize = strtoul(av[0], &end, 0);
 			if (*end == 'K' || *end == 'k') {
 				p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
 				p.fs.qsize *= 1024;
 			} else if (*end == 'B' ||
 			    _substrcmp2(end, "by", "bytes") == 0) {
 				p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
 			}
 			ac--; av++;
 			break;
 
 		case TOK_BUCKETS:
 			NEED1("buckets needs argument\n");
 			p.fs.rq_size = strtoul(av[0], NULL, 0);
 			ac--; av++;
 			break;
 
 		case TOK_MASK:
 			NEED1("mask needs mask specifier\n");
 			/*
 			 * per-flow queue, mask is dst_ip, dst_port,
 			 * src_ip, src_port, proto measured in bits
 			 */
 			par = NULL;
 
 			bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask));
 			end = NULL;
 
 			while (ac >= 1) {
 			    uint32_t *p32 = NULL;
 			    uint16_t *p16 = NULL;
 			    uint32_t *p20 = NULL;
 			    struct in6_addr *pa6 = NULL;
 			    uint32_t a;
 
 			    tok = match_token(dummynet_params, *av);
 			    ac--; av++;
 			    switch(tok) {
 			    case TOK_ALL:
 				    /*
 				     * special case, all bits significant
 				     */
 				    p.fs.flow_mask.dst_ip = ~0;
 				    p.fs.flow_mask.src_ip = ~0;
 				    p.fs.flow_mask.dst_port = ~0;
 				    p.fs.flow_mask.src_port = ~0;
 				    p.fs.flow_mask.proto = ~0;
 				    n2mask(&(p.fs.flow_mask.dst_ip6), 128);
 				    n2mask(&(p.fs.flow_mask.src_ip6), 128);
 				    p.fs.flow_mask.flow_id6 = ~0;
 				    p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
 				    goto end_mask;
 
 			    case TOK_DSTIP:
 				    p32 = &p.fs.flow_mask.dst_ip;
 				    break;
 
 			    case TOK_SRCIP:
 				    p32 = &p.fs.flow_mask.src_ip;
 				    break;
 
 			    case TOK_DSTIP6:
 				    pa6 = &(p.fs.flow_mask.dst_ip6);
 				    break;
 			    
 			    case TOK_SRCIP6:
 				    pa6 = &(p.fs.flow_mask.src_ip6);
 				    break;
 
 			    case TOK_FLOWID:
 				    p20 = &p.fs.flow_mask.flow_id6;
 				    break;
 
 			    case TOK_DSTPORT:
 				    p16 = &p.fs.flow_mask.dst_port;
 				    break;
 
 			    case TOK_SRCPORT:
 				    p16 = &p.fs.flow_mask.src_port;
 				    break;
 
 			    case TOK_PROTO:
 				    break;
 
 			    default:
 				    ac++; av--; /* backtrack */
 				    goto end_mask;
 			    }
 			    if (ac < 1)
 				    errx(EX_USAGE, "mask: value missing");
 			    if (*av[0] == '/') {
 				    a = strtoul(av[0]+1, &end, 0);
 				    if (pa6 == NULL)
 					    a = (a == 32) ? ~0 : (1 << a) - 1;
 			    } else
 				    a = strtoul(av[0], &end, 0);
 			    if (p32 != NULL)
 				    *p32 = a;
 			    else if (p16 != NULL) {
 				    if (a > 0xFFFF)
 					    errx(EX_DATAERR,
 						"port mask must be 16 bit");
 				    *p16 = (uint16_t)a;
 			    } else if (p20 != NULL) {
 				    if (a > 0xfffff)
 					errx(EX_DATAERR,
 					    "flow_id mask must be 20 bit");
 				    *p20 = (uint32_t)a;
 			    } else if (pa6 != NULL) {
 				    if (a < 0 || a > 128)
 					errx(EX_DATAERR,
 					    "in6addr invalid mask len");
 				    else
 					n2mask(pa6, a);
 			    } else {
 				    if (a > 0xFF)
 					    errx(EX_DATAERR,
 						"proto mask must be 8 bit");
 				    p.fs.flow_mask.proto = (uint8_t)a;
 			    }
 			    if (a != 0)
 				    p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
 			    ac--; av++;
 			} /* end while, config masks */
 end_mask:
 			break;
 
 		case TOK_RED:
 		case TOK_GRED:
 			NEED1("red/gred needs w_q/min_th/max_th/max_p\n");
 			p.fs.flags_fs |= DN_IS_RED;
 			if (tok == TOK_GRED)
 				p.fs.flags_fs |= DN_IS_GENTLE_RED;
 			/*
 			 * the format for parameters is w_q/min_th/max_th/max_p
 			 */
 			if ((end = strsep(&av[0], "/"))) {
 			    double w_q = strtod(end, NULL);
 			    if (w_q > 1 || w_q <= 0)
 				errx(EX_DATAERR, "0 < w_q <= 1");
 			    p.fs.w_q = (int) (w_q * (1 << SCALE_RED));
 			}
 			if ((end = strsep(&av[0], "/"))) {
 			    p.fs.min_th = strtoul(end, &end, 0);
 			    if (*end == 'K' || *end == 'k')
 				p.fs.min_th *= 1024;
 			}
 			if ((end = strsep(&av[0], "/"))) {
 			    p.fs.max_th = strtoul(end, &end, 0);
 			    if (*end == 'K' || *end == 'k')
 				p.fs.max_th *= 1024;
 			}
 			if ((end = strsep(&av[0], "/"))) {
 			    double max_p = strtod(end, NULL);
 			    if (max_p > 1 || max_p <= 0)
 				errx(EX_DATAERR, "0 < max_p <= 1");
 			    p.fs.max_p = (int)(max_p * (1 << SCALE_RED));
 			}
 			ac--; av++;
 			break;
 
 		case TOK_DROPTAIL:
 			p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
 			break;
 
 		case TOK_BW:
 			NEED1("bw needs bandwidth or interface\n");
 			if (do_pipe != 1)
 			    errx(EX_DATAERR, "bandwidth only valid for pipes");
 			/*
 			 * set clocking interface or bandwidth value
 			 */
 			if (av[0][0] >= 'a' && av[0][0] <= 'z') {
 			    int l = sizeof(p.if_name)-1;
 			    /* interface name */
 			    strncpy(p.if_name, av[0], l);
 			    p.if_name[l] = '\0';
 			    p.bandwidth = 0;
 			} else {
 			    p.if_name[0] = '\0';
 			    p.bandwidth = strtoul(av[0], &end, 0);
 			    if (*end == 'K' || *end == 'k') {
 				end++;
 				p.bandwidth *= 1000;
 			    } else if (*end == 'M') {
 				end++;
 				p.bandwidth *= 1000000;
 			    }
 			    if ((*end == 'B' &&
 				  _substrcmp2(end, "Bi", "Bit/s") != 0) ||
 			        _substrcmp2(end, "by", "bytes") == 0)
 				p.bandwidth *= 8;
 			    if (p.bandwidth < 0)
 				errx(EX_DATAERR, "bandwidth too large");
 			}
 			ac--; av++;
 			break;
 
 		case TOK_DELAY:
 			if (do_pipe != 1)
 				errx(EX_DATAERR, "delay only valid for pipes");
 			NEED1("delay needs argument 0..10000ms\n");
 			p.delay = strtoul(av[0], NULL, 0);
 			ac--; av++;
 			break;
 
 		case TOK_WEIGHT:
 			if (do_pipe == 1)
 				errx(EX_DATAERR,"weight only valid for queues");
 			NEED1("weight needs argument 0..100\n");
 			p.fs.weight = strtoul(av[0], &end, 0);
 			ac--; av++;
 			break;
 
 		case TOK_PIPE:
 			if (do_pipe == 1)
 				errx(EX_DATAERR,"pipe only valid for queues");
 			NEED1("pipe needs pipe_number\n");
 			p.fs.parent_nr = strtoul(av[0], &end, 0);
 			ac--; av++;
 			break;
 
 		default:
 			errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
 		}
 	}
 	if (do_pipe == 1) {
 		if (p.pipe_nr == 0)
 			errx(EX_DATAERR, "pipe_nr must be > 0");
 		if (p.delay > 10000)
 			errx(EX_DATAERR, "delay must be < 10000");
 	} else { /* do_pipe == 2, queue */
 		if (p.fs.parent_nr == 0)
 			errx(EX_DATAERR, "pipe must be > 0");
 		if (p.fs.weight >100)
 			errx(EX_DATAERR, "weight must be <= 100");
 	}
 	if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) {
 		size_t len;
 		long limit;
 
 		len = sizeof(limit);
 		if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit",
 			&limit, &len, NULL, 0) == -1)
 			limit = 1024*1024;
 		if (p.fs.qsize > limit)
 			errx(EX_DATAERR, "queue size must be < %ldB", limit);
 	} else {
 		size_t len;
 		long limit;
 
 		len = sizeof(limit);
 		if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit",
 			&limit, &len, NULL, 0) == -1)
 			limit = 100;
 		if (p.fs.qsize > limit)
 			errx(EX_DATAERR, "2 <= queue size <= %ld", limit);
 	}
 	if (p.fs.flags_fs & DN_IS_RED) {
 		size_t len;
 		int lookup_depth, avg_pkt_size;
 		double s, idle, weight, w_q;
 		struct clockinfo ck;
 		int t;
 
 		if (p.fs.min_th >= p.fs.max_th)
 		    errx(EX_DATAERR, "min_th %d must be < than max_th %d",
 			p.fs.min_th, p.fs.max_th);
 		if (p.fs.max_th == 0)
 		    errx(EX_DATAERR, "max_th must be > 0");
 
 		len = sizeof(int);
 		if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth",
 			&lookup_depth, &len, NULL, 0) == -1)
 		    errx(1, "sysctlbyname(\"%s\")",
 			"net.inet.ip.dummynet.red_lookup_depth");
 		if (lookup_depth == 0)
 		    errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth"
 			" must be greater than zero");
 
 		len = sizeof(int);
 		if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size",
 			&avg_pkt_size, &len, NULL, 0) == -1)
 
 		    errx(1, "sysctlbyname(\"%s\")",
 			"net.inet.ip.dummynet.red_avg_pkt_size");
 		if (avg_pkt_size == 0)
 			errx(EX_DATAERR,
 			    "net.inet.ip.dummynet.red_avg_pkt_size must"
 			    " be greater than zero");
 
 		len = sizeof(struct clockinfo);
 		if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1)
 			errx(1, "sysctlbyname(\"%s\")", "kern.clockrate");
 
 		/*
 		 * Ticks needed for sending a medium-sized packet.
 		 * Unfortunately, when we are configuring a WF2Q+ queue, we
 		 * do not have bandwidth information, because that is stored
 		 * in the parent pipe, and also we have multiple queues
 		 * competing for it. So we set s=0, which is not very
 		 * correct. But on the other hand, why do we want RED with
 		 * WF2Q+ ?
 		 */
 		if (p.bandwidth==0) /* this is a WF2Q+ queue */
 			s = 0;
 		else
 			s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth;
 
 		/*
 		 * max idle time (in ticks) before avg queue size becomes 0.
 		 * NOTA:  (3/w_q) is approx the value x so that
 		 * (1-w_q)^x < 10^-3.
 		 */
 		w_q = ((double)p.fs.w_q) / (1 << SCALE_RED);
 		idle = s * 3. / w_q;
 		p.fs.lookup_step = (int)idle / lookup_depth;
 		if (!p.fs.lookup_step)
 			p.fs.lookup_step = 1;
 		weight = 1 - w_q;
 		for (t = p.fs.lookup_step; t > 1; --t)
 			weight *= 1 - w_q;
 		p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
 	}
 	i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
 	if (i)
 		err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE");
 }
 
 static void
 get_mac_addr_mask(const char *p, uint8_t *addr, uint8_t *mask)
 {
 	int i, l;
 	char *ap, *ptr, *optr;
 	struct ether_addr *mac;
 	const char *macset = "0123456789abcdefABCDEF:";
 
 	if (strcmp(p, "any") == 0) {
 		for (i = 0; i < ETHER_ADDR_LEN; i++)
 			addr[i] = mask[i] = 0;
 		return;
 	}
 
 	optr = ptr = strdup(p);
 	if ((ap = strsep(&ptr, "&/")) != NULL && *ap != 0) {
 		l = strlen(ap);
 		if (strspn(ap, macset) != l || (mac = ether_aton(ap)) == NULL)
 			errx(EX_DATAERR, "Incorrect MAC address");
 		bcopy(mac, addr, ETHER_ADDR_LEN);
 	} else
 		errx(EX_DATAERR, "Incorrect MAC address");
 
 	if (ptr != NULL) { /* we have mask? */
 		if (p[ptr - optr - 1] == '/') { /* mask len */
 			l = strtol(ptr, &ap, 10);
 			if (*ap != 0 || l > ETHER_ADDR_LEN * 8 || l < 0)
 				errx(EX_DATAERR, "Incorrect mask length");
 			for (i = 0; l > 0 && i < ETHER_ADDR_LEN; l -= 8, i++)
 				mask[i] = (l >= 8) ? 0xff: (~0) << (8 - l);
 		} else { /* mask */
 			l = strlen(ptr);
 			if (strspn(ptr, macset) != l ||
 			    (mac = ether_aton(ptr)) == NULL)
 				errx(EX_DATAERR, "Incorrect mask");
 			bcopy(mac, mask, ETHER_ADDR_LEN);
 		}
 	} else { /* default mask: ff:ff:ff:ff:ff:ff */
 		for (i = 0; i < ETHER_ADDR_LEN; i++)
 			mask[i] = 0xff;
 	}
 	for (i = 0; i < ETHER_ADDR_LEN; i++)
 		addr[i] &= mask[i];
 
 	free(optr);
 }
 
 /*
  * helper function, updates the pointer to cmd with the length
  * of the current command, and also cleans up the first word of
  * the new command in case it has been clobbered before.
  */
 static ipfw_insn *
 next_cmd(ipfw_insn *cmd)
 {
 	cmd += F_LEN(cmd);
 	bzero(cmd, sizeof(*cmd));
 	return cmd;
 }
 
 /*
  * Takes arguments and copies them into a comment
  */
 static void
 fill_comment(ipfw_insn *cmd, int ac, char **av)
 {
 	int i, l;
 	char *p = (char *)(cmd + 1);
 
 	cmd->opcode = O_NOP;
 	cmd->len =  (cmd->len & (F_NOT | F_OR));
 
 	/* Compute length of comment string. */
 	for (i = 0, l = 0; i < ac; i++)
 		l += strlen(av[i]) + 1;
 	if (l == 0)
 		return;
 	if (l > 84)
 		errx(EX_DATAERR,
 		    "comment too long (max 80 chars)");
 	l = 1 + (l+3)/4;
 	cmd->len =  (cmd->len & (F_NOT | F_OR)) | l;
 	for (i = 0; i < ac; i++) {
 		strcpy(p, av[i]);
 		p += strlen(av[i]);
 		*p++ = ' ';
 	}
 	*(--p) = '\0';
 }
 
 /*
  * A function to fill simple commands of size 1.
  * Existing flags are preserved.
  */
 static void
 fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, int flags, uint16_t arg)
 {
 	cmd->opcode = opcode;
 	cmd->len =  ((cmd->len | flags) & (F_NOT | F_OR)) | 1;
 	cmd->arg1 = arg;
 }
 
 /*
  * Fetch and add the MAC address and type, with masks. This generates one or
  * two microinstructions, and returns the pointer to the last one.
  */
 static ipfw_insn *
 add_mac(ipfw_insn *cmd, int ac, char *av[])
 {
 	ipfw_insn_mac *mac;
 
 	if (ac < 2)
 		errx(EX_DATAERR, "MAC dst src");
 
 	cmd->opcode = O_MACADDR2;
 	cmd->len = (cmd->len & (F_NOT | F_OR)) | F_INSN_SIZE(ipfw_insn_mac);
 
 	mac = (ipfw_insn_mac *)cmd;
 	get_mac_addr_mask(av[0], mac->addr, mac->mask);	/* dst */
 	get_mac_addr_mask(av[1], &(mac->addr[ETHER_ADDR_LEN]),
 	    &(mac->mask[ETHER_ADDR_LEN])); /* src */
 	return cmd;
 }
 
 static ipfw_insn *
 add_mactype(ipfw_insn *cmd, int ac, char *av)
 {
 	if (ac < 1)
 		errx(EX_DATAERR, "missing MAC type");
 	if (strcmp(av, "any") != 0) { /* we have a non-null type */
 		fill_newports((ipfw_insn_u16 *)cmd, av, IPPROTO_ETHERTYPE);
 		cmd->opcode = O_MAC_TYPE;
 		return cmd;
 	} else
 		return NULL;
 }
 
 static ipfw_insn *
 add_proto0(ipfw_insn *cmd, char *av, u_char *protop)
 {
 	struct protoent *pe;
 	char *ep;
 	int proto;
 
 	proto = strtol(av, &ep, 10);
 	if (*ep != '\0' || proto <= 0) {
 		if ((pe = getprotobyname(av)) == NULL)
 			return NULL;
 		proto = pe->p_proto;
 	}
 
 	fill_cmd(cmd, O_PROTO, 0, proto);
 	*protop = proto;
 	return cmd;
 }
 
 static ipfw_insn *
 add_proto(ipfw_insn *cmd, char *av, u_char *protop)
 {
 	u_char proto = IPPROTO_IP;
 
 	if (_substrcmp(av, "all") == 0 || strcmp(av, "ip") == 0)
 		; /* do not set O_IP4 nor O_IP6 */
 	else if (strcmp(av, "ip4") == 0)
 		/* explicit "just IPv4" rule */
 		fill_cmd(cmd, O_IP4, 0, 0);
 	else if (strcmp(av, "ip6") == 0) {
 		/* explicit "just IPv6" rule */
 		proto = IPPROTO_IPV6;
 		fill_cmd(cmd, O_IP6, 0, 0);
 	} else
 		return add_proto0(cmd, av, protop);
 
 	*protop = proto;
 	return cmd;
 }
 
 static ipfw_insn *
 add_proto_compat(ipfw_insn *cmd, char *av, u_char *protop)
 {
 	u_char proto = IPPROTO_IP;
 
 	if (_substrcmp(av, "all") == 0 || strcmp(av, "ip") == 0)
 		; /* do not set O_IP4 nor O_IP6 */
 	else if (strcmp(av, "ipv4") == 0 || strcmp(av, "ip4") == 0)
 		/* explicit "just IPv4" rule */
 		fill_cmd(cmd, O_IP4, 0, 0);
 	else if (strcmp(av, "ipv6") == 0 || strcmp(av, "ip6") == 0) {
 		/* explicit "just IPv6" rule */
 		proto = IPPROTO_IPV6;
 		fill_cmd(cmd, O_IP6, 0, 0);
 	} else
 		return add_proto0(cmd, av, protop);
 
 	*protop = proto;
 	return cmd;
 }
 
 static ipfw_insn *
 add_srcip(ipfw_insn *cmd, char *av)
 {
 	fill_ip((ipfw_insn_ip *)cmd, av);
 	if (cmd->opcode == O_IP_DST_SET)			/* set */
 		cmd->opcode = O_IP_SRC_SET;
 	else if (cmd->opcode == O_IP_DST_LOOKUP)		/* table */
 		cmd->opcode = O_IP_SRC_LOOKUP;
 	else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn))		/* me */
 		cmd->opcode = O_IP_SRC_ME;
 	else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32))	/* one IP */
 		cmd->opcode = O_IP_SRC;
 	else							/* addr/mask */
 		cmd->opcode = O_IP_SRC_MASK;
 	return cmd;
 }
 
 static ipfw_insn *
 add_dstip(ipfw_insn *cmd, char *av)
 {
 	fill_ip((ipfw_insn_ip *)cmd, av);
 	if (cmd->opcode == O_IP_DST_SET)			/* set */
 		;
 	else if (cmd->opcode == O_IP_DST_LOOKUP)		/* table */
 		;
 	else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn))		/* me */
 		cmd->opcode = O_IP_DST_ME;
 	else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32))	/* one IP */
 		cmd->opcode = O_IP_DST;
 	else							/* addr/mask */
 		cmd->opcode = O_IP_DST_MASK;
 	return cmd;
 }
 
 static ipfw_insn *
 add_ports(ipfw_insn *cmd, char *av, u_char proto, int opcode)
 {
 	if (_substrcmp(av, "any") == 0) {
 		return NULL;
 	} else if (fill_newports((ipfw_insn_u16 *)cmd, av, proto)) {
 		/* XXX todo: check that we have a protocol with ports */
 		cmd->opcode = opcode;
 		return cmd;
 	}
 	return NULL;
 }
 
 static ipfw_insn *
 add_src(ipfw_insn *cmd, char *av, u_char proto)
 {
 	struct in6_addr a;
 	char *host, *ch;
 	ipfw_insn *ret = NULL;
 
 	if ((host = strdup(av)) == NULL)
 		return NULL;
 	if ((ch = strrchr(host, '/')) != NULL)
 		*ch = '\0';
 
 	if (proto == IPPROTO_IPV6  || strcmp(av, "me6") == 0 ||
 	    inet_pton(AF_INET6, host, &a))
 		ret = add_srcip6(cmd, av);
 	/* XXX: should check for IPv4, not !IPv6 */
 	if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 ||
 	    !inet_pton(AF_INET6, host, &a)))
 		ret = add_srcip(cmd, av);
 	if (ret == NULL && strcmp(av, "any") != 0)
 		ret = cmd;
 
 	free(host);
 	return ret;
 }
 
 static ipfw_insn *
 add_dst(ipfw_insn *cmd, char *av, u_char proto)
 {
 	struct in6_addr a;
 	char *host, *ch;
 	ipfw_insn *ret = NULL;
 
 	if ((host = strdup(av)) == NULL)
 		return NULL;
 	if ((ch = strrchr(host, '/')) != NULL)
 		*ch = '\0';
 
 	if (proto == IPPROTO_IPV6  || strcmp(av, "me6") == 0 ||
 	    inet_pton(AF_INET6, host, &a))
 		ret = add_dstip6(cmd, av);
 	/* XXX: should check for IPv4, not !IPv6 */
 	if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 ||
 	    !inet_pton(AF_INET6, host, &a)))
 		ret = add_dstip(cmd, av);
 	if (ret == NULL && strcmp(av, "any") != 0)
 		ret = cmd;
 
 	free(host);
 	return ret;
 }
 
 /*
  * Parse arguments and assemble the microinstructions which make up a rule.
  * Rules are added into the 'rulebuf' and then copied in the correct order
  * into the actual rule.
  *
  * The syntax for a rule starts with the action, followed by
  * optional action parameters, and the various match patterns.
  * In the assembled microcode, the first opcode must be an O_PROBE_STATE
  * (generated if the rule includes a keep-state option), then the
  * various match patterns, log/altq actions, and the actual action.
  *
  */
 static void
 add(int ac, char *av[])
 {
 	/*
 	 * rules are added into the 'rulebuf' and then copied in
 	 * the correct order into the actual rule.
 	 * Some things that need to go out of order (prob, action etc.)
 	 * go into actbuf[].
 	 */
 	static uint32_t rulebuf[255], actbuf[255], cmdbuf[255];
 
 	ipfw_insn *src, *dst, *cmd, *action, *prev=NULL;
 	ipfw_insn *first_cmd;	/* first match pattern */
 
 	struct ip_fw *rule;
 
 	/*
 	 * various flags used to record that we entered some fields.
 	 */
 	ipfw_insn *have_state = NULL;	/* check-state or keep-state */
 	ipfw_insn *have_log = NULL, *have_altq = NULL, *have_tag = NULL;
 	size_t len;
 
 	int i;
 
 	int open_par = 0;	/* open parenthesis ( */
 
 	/* proto is here because it is used to fetch ports */
 	u_char proto = IPPROTO_IP;	/* default protocol */
 
 	double match_prob = 1; /* match probability, default is always match */
 
 	bzero(actbuf, sizeof(actbuf));		/* actions go here */
 	bzero(cmdbuf, sizeof(cmdbuf));
 	bzero(rulebuf, sizeof(rulebuf));
 
 	rule = (struct ip_fw *)rulebuf;
 	cmd = (ipfw_insn *)cmdbuf;
 	action = (ipfw_insn *)actbuf;
 
 	av++; ac--;
 
 	/* [rule N]	-- Rule number optional */
 	if (ac && isdigit(**av)) {
 		rule->rulenum = atoi(*av);
 		av++;
 		ac--;
 	}
 
 	/* [set N]	-- set number (0..RESVD_SET), optional */
 	if (ac > 1 && _substrcmp(*av, "set") == 0) {
 		int set = strtoul(av[1], NULL, 10);
 		if (set < 0 || set > RESVD_SET)
 			errx(EX_DATAERR, "illegal set %s", av[1]);
 		rule->set = set;
 		av += 2; ac -= 2;
 	}
 
 	/* [prob D]	-- match probability, optional */
 	if (ac > 1 && _substrcmp(*av, "prob") == 0) {
 		match_prob = strtod(av[1], NULL);
 
 		if (match_prob <= 0 || match_prob > 1)
 			errx(EX_DATAERR, "illegal match prob. %s", av[1]);
 		av += 2; ac -= 2;
 	}
 
 	/* action	-- mandatory */
 	NEED1("missing action");
 	i = match_token(rule_actions, *av);
 	ac--; av++;
 	action->len = 1;	/* default */
 	switch(i) {
 	case TOK_CHECKSTATE:
 		have_state = action;
 		action->opcode = O_CHECK_STATE;
 		break;
 
 	case TOK_ACCEPT:
 		action->opcode = O_ACCEPT;
 		break;
 
 	case TOK_DENY:
 		action->opcode = O_DENY;
 		action->arg1 = 0;
 		break;
 
 	case TOK_REJECT:
 		action->opcode = O_REJECT;
 		action->arg1 = ICMP_UNREACH_HOST;
 		break;
 
 	case TOK_RESET:
 		action->opcode = O_REJECT;
 		action->arg1 = ICMP_REJECT_RST;
 		break;
 
 	case TOK_RESET6:
 		action->opcode = O_UNREACH6;
 		action->arg1 = ICMP6_UNREACH_RST;
 		break;
 
 	case TOK_UNREACH:
 		action->opcode = O_REJECT;
 		NEED1("missing reject code");
 		fill_reject_code(&action->arg1, *av);
 		ac--; av++;
 		break;
 
 	case TOK_UNREACH6:
 		action->opcode = O_UNREACH6;
 		NEED1("missing unreach code");
 		fill_unreach6_code(&action->arg1, *av);
 		ac--; av++;
 		break;
 
 	case TOK_COUNT:
 		action->opcode = O_COUNT;
 		break;
 
 	case TOK_NAT:
  		action->opcode = O_NAT;
  		action->len = F_INSN_SIZE(ipfw_insn_nat);
 		goto chkarg;
+
 	case TOK_QUEUE:
 		action->opcode = O_QUEUE;
 		goto chkarg;
 	case TOK_PIPE:
 		action->opcode = O_PIPE;
 		goto chkarg;
 	case TOK_SKIPTO:
 		action->opcode = O_SKIPTO;
 		goto chkarg;
 	case TOK_NETGRAPH:
 		action->opcode = O_NETGRAPH;
 		goto chkarg;
 	case TOK_NGTEE:
 		action->opcode = O_NGTEE;
 		goto chkarg;
 	case TOK_DIVERT:
 		action->opcode = O_DIVERT;
 		goto chkarg;
 	case TOK_TEE:
 		action->opcode = O_TEE;
 chkarg:	
 		if (!ac)
 			errx(EX_USAGE, "missing argument for %s", *(av - 1));
 		if (isdigit(**av)) {
 			action->arg1 = strtoul(*av, NULL, 10);
 			if (action->arg1 <= 0 || action->arg1 >= IP_FW_TABLEARG)
 				errx(EX_DATAERR, "illegal argument for %s",
 				    *(av - 1));
 		} else if (_substrcmp(*av, TABLEARG) == 0) {
 			action->arg1 = IP_FW_TABLEARG;
 		} else if (i == TOK_DIVERT || i == TOK_TEE) {
 			struct servent *s;
 			setservent(1);
 			s = getservbyname(av[0], "divert");
 			if (s != NULL)
 				action->arg1 = ntohs(s->s_port);
 			else
 				errx(EX_DATAERR, "illegal divert/tee port");
 		} else
 			errx(EX_DATAERR, "illegal argument for %s", *(av - 1));
 		ac--; av++;
 		break;
 
 	case TOK_FORWARD: {
 		ipfw_insn_sa *p = (ipfw_insn_sa *)action;
 		char *s, *end;
 
 		NEED1("missing forward address[:port]");
 
 		action->opcode = O_FORWARD_IP;
 		action->len = F_INSN_SIZE(ipfw_insn_sa);
 
 		p->sa.sin_len = sizeof(struct sockaddr_in);
 		p->sa.sin_family = AF_INET;
 		p->sa.sin_port = 0;
 		/*
 		 * locate the address-port separator (':' or ',')
 		 */
 		s = strchr(*av, ':');
 		if (s == NULL)
 			s = strchr(*av, ',');
 		if (s != NULL) {
 			*(s++) = '\0';
 			i = strtoport(s, &end, 0 /* base */, 0 /* proto */);
 			if (s == end)
 				errx(EX_DATAERR,
 				    "illegal forwarding port ``%s''", s);
 			p->sa.sin_port = (u_short)i;
 		}
 		if (_substrcmp(*av, "tablearg") == 0) 
 			p->sa.sin_addr.s_addr = INADDR_ANY;
 		else
 			lookup_host(*av, &(p->sa.sin_addr));
 		ac--; av++;
 		break;
 	    }
 	case TOK_COMMENT:
 		/* pretend it is a 'count' rule followed by the comment */
 		action->opcode = O_COUNT;
 		ac++; av--;	/* go back... */
 		break;
+
+	case TOK_SETFIB:
+	    {
+		int numfibs;
+
+		action->opcode = O_SETFIB;
+ 		NEED1("missing fib number");
+ 	        action->arg1 = strtoul(*av, NULL, 10);
+		if (sysctlbyname("net.fibs", &numfibs, &i, NULL, 0) == -1)
+			errx(EX_DATAERR, "fibs not suported.\n");
+		if (action->arg1 >= numfibs)  /* Temporary */
+			errx(EX_DATAERR, "fib too large.\n");
+ 		ac--; av++;
+ 		break;
+	    }
 		
 	default:
 		errx(EX_DATAERR, "invalid action %s\n", av[-1]);
 	}
 	action = next_cmd(action);
 
 	/*
 	 * [altq queuename] -- altq tag, optional
 	 * [log [logamount N]]	-- log, optional
 	 *
 	 * If they exist, it go first in the cmdbuf, but then it is
 	 * skipped in the copy section to the end of the buffer.
 	 */
 	while (ac != 0 && (i = match_token(rule_action_params, *av)) != -1) {
 		ac--; av++;
 		switch (i) {
 		case TOK_LOG:
 		    {
 			ipfw_insn_log *c = (ipfw_insn_log *)cmd;
 			int l;
 
 			if (have_log)
 				errx(EX_DATAERR,
 				    "log cannot be specified more than once");
 			have_log = (ipfw_insn *)c;
 			cmd->len = F_INSN_SIZE(ipfw_insn_log);
 			cmd->opcode = O_LOG;
 			if (ac && _substrcmp(*av, "logamount") == 0) {
 				ac--; av++;
 				NEED1("logamount requires argument");
 				l = atoi(*av);
 				if (l < 0)
 					errx(EX_DATAERR,
 					    "logamount must be positive");
 				c->max_log = l;
 				ac--; av++;
 			} else {
 				len = sizeof(c->max_log);
 				if (sysctlbyname("net.inet.ip.fw.verbose_limit",
 				    &c->max_log, &len, NULL, 0) == -1)
 					errx(1, "sysctlbyname(\"%s\")",
 					    "net.inet.ip.fw.verbose_limit");
 			}
 		    }
 			break;
 
 		case TOK_ALTQ:
 		    {
 			ipfw_insn_altq *a = (ipfw_insn_altq *)cmd;
 
 			NEED1("missing altq queue name");
 			if (have_altq)
 				errx(EX_DATAERR,
 				    "altq cannot be specified more than once");
 			have_altq = (ipfw_insn *)a;
 			cmd->len = F_INSN_SIZE(ipfw_insn_altq);
 			cmd->opcode = O_ALTQ;
 			fill_altq_qid(&a->qid, *av);
 			ac--; av++;
 		    }
 			break;
 
 		case TOK_TAG:
 		case TOK_UNTAG: {
 			uint16_t tag;
 
 			if (have_tag)
 				errx(EX_USAGE, "tag and untag cannot be "
 				    "specified more than once");
 			GET_UINT_ARG(tag, 1, 65534, i, rule_action_params);
 			have_tag = cmd;
 			fill_cmd(cmd, O_TAG, (i == TOK_TAG) ? 0: F_NOT, tag);
 			ac--; av++;
 			break;
 		}
 
 		default:
 			abort();
 		}
 		cmd = next_cmd(cmd);
 	}
 
 	if (have_state)	/* must be a check-state, we are done */
 		goto done;
 
 #define OR_START(target)					\
 	if (ac && (*av[0] == '(' || *av[0] == '{')) {		\
 		if (open_par)					\
 			errx(EX_USAGE, "nested \"(\" not allowed\n"); \
 		prev = NULL;					\
 		open_par = 1;					\
 		if ( (av[0])[1] == '\0') {			\
 			ac--; av++;				\
 		} else						\
 			(*av)++;				\
 	}							\
 	target:							\
 
 
 #define	CLOSE_PAR						\
 	if (open_par) {						\
 		if (ac && (					\
 		    strcmp(*av, ")") == 0 ||			\
 		    strcmp(*av, "}") == 0)) {			\
 			prev = NULL;				\
 			open_par = 0;				\
 			ac--; av++;				\
 		} else						\
 			errx(EX_USAGE, "missing \")\"\n");	\
 	}
 
 #define NOT_BLOCK						\
 	if (ac && _substrcmp(*av, "not") == 0) {		\
 		if (cmd->len & F_NOT)				\
 			errx(EX_USAGE, "double \"not\" not allowed\n"); \
 		cmd->len |= F_NOT;				\
 		ac--; av++;					\
 	}
 
 #define OR_BLOCK(target)					\
 	if (ac && _substrcmp(*av, "or") == 0) {		\
 		if (prev == NULL || open_par == 0)		\
 			errx(EX_DATAERR, "invalid OR block");	\
 		prev->len |= F_OR;				\
 		ac--; av++;					\
 		goto target;					\
 	}							\
 	CLOSE_PAR;
 
 	first_cmd = cmd;
 
 #if 0
 	/*
 	 * MAC addresses, optional.
 	 * If we have this, we skip the part "proto from src to dst"
 	 * and jump straight to the option parsing.
 	 */
 	NOT_BLOCK;
 	NEED1("missing protocol");
 	if (_substrcmp(*av, "MAC") == 0 ||
 	    _substrcmp(*av, "mac") == 0) {
 		ac--; av++;	/* the "MAC" keyword */
 		add_mac(cmd, ac, av); /* exits in case of errors */
 		cmd = next_cmd(cmd);
 		ac -= 2; av += 2;	/* dst-mac and src-mac */
 		NOT_BLOCK;
 		NEED1("missing mac type");
 		if (add_mactype(cmd, ac, av[0]))
 			cmd = next_cmd(cmd);
 		ac--; av++;	/* any or mac-type */
 		goto read_options;
 	}
 #endif
 
 	/*
 	 * protocol, mandatory
 	 */
     OR_START(get_proto);
 	NOT_BLOCK;
 	NEED1("missing protocol");
 	if (add_proto_compat(cmd, *av, &proto)) {
 		av++; ac--;
 		if (F_LEN(cmd) != 0) {
 			prev = cmd;
 			cmd = next_cmd(cmd);
 		}
 	} else if (first_cmd != cmd) {
 		errx(EX_DATAERR, "invalid protocol ``%s''", *av);
 	} else
 		goto read_options;
     OR_BLOCK(get_proto);
 
 	/*
 	 * "from", mandatory
 	 */
 	if (!ac || _substrcmp(*av, "from") != 0)
 		errx(EX_USAGE, "missing ``from''");
 	ac--; av++;
 
 	/*
 	 * source IP, mandatory
 	 */
     OR_START(source_ip);
 	NOT_BLOCK;	/* optional "not" */
 	NEED1("missing source address");
 	if (add_src(cmd, *av, proto)) {
 		ac--; av++;
 		if (F_LEN(cmd) != 0) {	/* ! any */
 			prev = cmd;
 			cmd = next_cmd(cmd);
 		}
 	} else
 		errx(EX_USAGE, "bad source address %s", *av);
     OR_BLOCK(source_ip);
 
 	/*
 	 * source ports, optional
 	 */
 	NOT_BLOCK;	/* optional "not" */
 	if (ac) {
 		if (_substrcmp(*av, "any") == 0 ||
 		    add_ports(cmd, *av, proto, O_IP_SRCPORT)) {
 			ac--; av++;
 			if (F_LEN(cmd) != 0)
 				cmd = next_cmd(cmd);
 		}
 	}
 
 	/*
 	 * "to", mandatory
 	 */
 	if (!ac || _substrcmp(*av, "to") != 0)
 		errx(EX_USAGE, "missing ``to''");
 	av++; ac--;
 
 	/*
 	 * destination, mandatory
 	 */
     OR_START(dest_ip);
 	NOT_BLOCK;	/* optional "not" */
 	NEED1("missing dst address");
 	if (add_dst(cmd, *av, proto)) {
 		ac--; av++;
 		if (F_LEN(cmd) != 0) {	/* ! any */
 			prev = cmd;
 			cmd = next_cmd(cmd);
 		}
 	} else
 		errx( EX_USAGE, "bad destination address %s", *av);
     OR_BLOCK(dest_ip);
 
 	/*
 	 * dest. ports, optional
 	 */
 	NOT_BLOCK;	/* optional "not" */
 	if (ac) {
 		if (_substrcmp(*av, "any") == 0 ||
 		    add_ports(cmd, *av, proto, O_IP_DSTPORT)) {
 			ac--; av++;
 			if (F_LEN(cmd) != 0)
 				cmd = next_cmd(cmd);
 		}
 	}
 
 read_options:
 	if (ac && first_cmd == cmd) {
 		/*
 		 * nothing specified so far, store in the rule to ease
 		 * printout later.
 		 */
 		 rule->_pad = 1;
 	}
 	prev = NULL;
 	while (ac) {
 		char *s;
 		ipfw_insn_u32 *cmd32;	/* alias for cmd */
 
 		s = *av;
 		cmd32 = (ipfw_insn_u32 *)cmd;
 
 		if (*s == '!') {	/* alternate syntax for NOT */
 			if (cmd->len & F_NOT)
 				errx(EX_USAGE, "double \"not\" not allowed\n");
 			cmd->len = F_NOT;
 			s++;
 		}
 		i = match_token(rule_options, s);
 		ac--; av++;
 		switch(i) {
 		case TOK_NOT:
 			if (cmd->len & F_NOT)
 				errx(EX_USAGE, "double \"not\" not allowed\n");
 			cmd->len = F_NOT;
 			break;
 
 		case TOK_OR:
 			if (open_par == 0 || prev == NULL)
 				errx(EX_USAGE, "invalid \"or\" block\n");
 			prev->len |= F_OR;
 			break;
 
 		case TOK_STARTBRACE:
 			if (open_par)
 				errx(EX_USAGE, "+nested \"(\" not allowed\n");
 			open_par = 1;
 			break;
 
 		case TOK_ENDBRACE:
 			if (!open_par)
 				errx(EX_USAGE, "+missing \")\"\n");
 			open_par = 0;
 			prev = NULL;
         		break;
 
 		case TOK_IN:
 			fill_cmd(cmd, O_IN, 0, 0);
 			break;
 
 		case TOK_OUT:
 			cmd->len ^= F_NOT; /* toggle F_NOT */
 			fill_cmd(cmd, O_IN, 0, 0);
 			break;
 
 		case TOK_DIVERTED:
 			fill_cmd(cmd, O_DIVERTED, 0, 3);
 			break;
 
 		case TOK_DIVERTEDLOOPBACK:
 			fill_cmd(cmd, O_DIVERTED, 0, 1);
 			break;
 
 		case TOK_DIVERTEDOUTPUT:
 			fill_cmd(cmd, O_DIVERTED, 0, 2);
 			break;
 
 		case TOK_FRAG:
 			fill_cmd(cmd, O_FRAG, 0, 0);
 			break;
 
 		case TOK_LAYER2:
 			fill_cmd(cmd, O_LAYER2, 0, 0);
 			break;
 
 		case TOK_XMIT:
 		case TOK_RECV:
 		case TOK_VIA:
 			NEED1("recv, xmit, via require interface name"
 				" or address");
 			fill_iface((ipfw_insn_if *)cmd, av[0]);
 			ac--; av++;
 			if (F_LEN(cmd) == 0)	/* not a valid address */
 				break;
 			if (i == TOK_XMIT)
 				cmd->opcode = O_XMIT;
 			else if (i == TOK_RECV)
 				cmd->opcode = O_RECV;
 			else if (i == TOK_VIA)
 				cmd->opcode = O_VIA;
 			break;
 
 		case TOK_ICMPTYPES:
 			NEED1("icmptypes requires list of types");
 			fill_icmptypes((ipfw_insn_u32 *)cmd, *av);
 			av++; ac--;
 			break;
 		
 		case TOK_ICMP6TYPES:
 			NEED1("icmptypes requires list of types");
 			fill_icmp6types((ipfw_insn_icmp6 *)cmd, *av);
 			av++; ac--;
 			break;
 
 		case TOK_IPTTL:
 			NEED1("ipttl requires TTL");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_IPTTL))
 				errx(EX_DATAERR, "invalid ipttl %s", *av);
 			} else
 			    fill_cmd(cmd, O_IPTTL, 0, strtoul(*av, NULL, 0));
 			ac--; av++;
 			break;
 
 		case TOK_IPID:
 			NEED1("ipid requires id");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_IPID))
 				errx(EX_DATAERR, "invalid ipid %s", *av);
 			} else
 			    fill_cmd(cmd, O_IPID, 0, strtoul(*av, NULL, 0));
 			ac--; av++;
 			break;
 
 		case TOK_IPLEN:
 			NEED1("iplen requires length");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_IPLEN))
 				errx(EX_DATAERR, "invalid ip len %s", *av);
 			} else
 			    fill_cmd(cmd, O_IPLEN, 0, strtoul(*av, NULL, 0));
 			ac--; av++;
 			break;
 
 		case TOK_IPVER:
 			NEED1("ipver requires version");
 			fill_cmd(cmd, O_IPVER, 0, strtoul(*av, NULL, 0));
 			ac--; av++;
 			break;
 
 		case TOK_IPPRECEDENCE:
 			NEED1("ipprecedence requires value");
 			fill_cmd(cmd, O_IPPRECEDENCE, 0,
 			    (strtoul(*av, NULL, 0) & 7) << 5);
 			ac--; av++;
 			break;
 
 		case TOK_IPOPTS:
 			NEED1("missing argument for ipoptions");
 			fill_flags(cmd, O_IPOPT, f_ipopts, *av);
 			ac--; av++;
 			break;
 
 		case TOK_IPTOS:
 			NEED1("missing argument for iptos");
 			fill_flags(cmd, O_IPTOS, f_iptos, *av);
 			ac--; av++;
 			break;
 
 		case TOK_UID:
 			NEED1("uid requires argument");
 		    {
 			char *end;
 			uid_t uid;
 			struct passwd *pwd;
 
 			cmd->opcode = O_UID;
 			uid = strtoul(*av, &end, 0);
 			pwd = (*end == '\0') ? getpwuid(uid) : getpwnam(*av);
 			if (pwd == NULL)
 				errx(EX_DATAERR, "uid \"%s\" nonexistent", *av);
 			cmd32->d[0] = pwd->pw_uid;
 			cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
 			ac--; av++;
 		    }
 			break;
 
 		case TOK_GID:
 			NEED1("gid requires argument");
 		    {
 			char *end;
 			gid_t gid;
 			struct group *grp;
 
 			cmd->opcode = O_GID;
 			gid = strtoul(*av, &end, 0);
 			grp = (*end == '\0') ? getgrgid(gid) : getgrnam(*av);
 			if (grp == NULL)
 				errx(EX_DATAERR, "gid \"%s\" nonexistent", *av);
 			cmd32->d[0] = grp->gr_gid;
 			cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
 			ac--; av++;
 		    }
 			break;
 
 		case TOK_JAIL:
 			NEED1("jail requires argument");
 		    {
 			char *end;
 			int jid;
 
 			cmd->opcode = O_JAIL;
 			jid = (int)strtol(*av, &end, 0);
 			if (jid < 0 || *end != '\0')
 				errx(EX_DATAERR, "jail requires prison ID");
 			cmd32->d[0] = (uint32_t)jid;
 			cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
 			ac--; av++;
 		    }
 			break;
 
 		case TOK_ESTAB:
 			fill_cmd(cmd, O_ESTAB, 0, 0);
 			break;
 
 		case TOK_SETUP:
 			fill_cmd(cmd, O_TCPFLAGS, 0,
 				(TH_SYN) | ( (TH_ACK) & 0xff) <<8 );
 			break;
 
 		case TOK_TCPDATALEN:
 			NEED1("tcpdatalen requires length");
 			if (strpbrk(*av, "-,")) {
 			    if (!add_ports(cmd, *av, 0, O_TCPDATALEN))
 				errx(EX_DATAERR, "invalid tcpdata len %s", *av);
 			} else
 			    fill_cmd(cmd, O_TCPDATALEN, 0,
 				    strtoul(*av, NULL, 0));
 			ac--; av++;
 			break;
 
 		case TOK_TCPOPTS:
 			NEED1("missing argument for tcpoptions");
 			fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av);
 			ac--; av++;
 			break;
 
 		case TOK_TCPSEQ:
 		case TOK_TCPACK:
 			NEED1("tcpseq/tcpack requires argument");
 			cmd->len = F_INSN_SIZE(ipfw_insn_u32);
 			cmd->opcode = (i == TOK_TCPSEQ) ? O_TCPSEQ : O_TCPACK;
 			cmd32->d[0] = htonl(strtoul(*av, NULL, 0));
 			ac--; av++;
 			break;
 
 		case TOK_TCPWIN:
 			NEED1("tcpwin requires length");
 			fill_cmd(cmd, O_TCPWIN, 0,
 			    htons(strtoul(*av, NULL, 0)));
 			ac--; av++;
 			break;
 
 		case TOK_TCPFLAGS:
 			NEED1("missing argument for tcpflags");
 			cmd->opcode = O_TCPFLAGS;
 			fill_flags(cmd, O_TCPFLAGS, f_tcpflags, *av);
 			ac--; av++;
 			break;
 
 		case TOK_KEEPSTATE:
 			if (open_par)
 				errx(EX_USAGE, "keep-state cannot be part "
 				    "of an or block");
 			if (have_state)
 				errx(EX_USAGE, "only one of keep-state "
 					"and limit is allowed");
 			have_state = cmd;
 			fill_cmd(cmd, O_KEEP_STATE, 0, 0);
 			break;
 
 		case TOK_LIMIT: {
 			ipfw_insn_limit *c = (ipfw_insn_limit *)cmd;
 			int val;
 
 			if (open_par)
 				errx(EX_USAGE,
 				    "limit cannot be part of an or block");
 			if (have_state)
 				errx(EX_USAGE, "only one of keep-state and "
 				    "limit is allowed");
 			have_state = cmd;
 
 			cmd->len = F_INSN_SIZE(ipfw_insn_limit);
 			cmd->opcode = O_LIMIT;
 			c->limit_mask = c->conn_limit = 0;
 
 			while (ac > 0) {
 				if ((val = match_token(limit_masks, *av)) <= 0)
 					break;
 				c->limit_mask |= val;
 				ac--; av++;
 			}
 
 			if (c->limit_mask == 0)
 				errx(EX_USAGE, "limit: missing limit mask");
 
 			GET_UINT_ARG(c->conn_limit, 1, 65534, TOK_LIMIT,
 			    rule_options);
 
 			ac--; av++;
 			break;
 		}
 
 		case TOK_PROTO:
 			NEED1("missing protocol");
 			if (add_proto(cmd, *av, &proto)) {
 				ac--; av++;
 			} else
 				errx(EX_DATAERR, "invalid protocol ``%s''",
 				    *av);
 			break;
 
 		case TOK_SRCIP:
 			NEED1("missing source IP");
 			if (add_srcip(cmd, *av)) {
 				ac--; av++;
 			}
 			break;
 
 		case TOK_DSTIP:
 			NEED1("missing destination IP");
 			if (add_dstip(cmd, *av)) {
 				ac--; av++;
 			}
 			break;
 
 		case TOK_SRCIP6:
 			NEED1("missing source IP6");
 			if (add_srcip6(cmd, *av)) {
 				ac--; av++;
 			}
 			break;
 				
 		case TOK_DSTIP6:
 			NEED1("missing destination IP6");
 			if (add_dstip6(cmd, *av)) {
 				ac--; av++;
 			}
 			break;
 
 		case TOK_SRCPORT:
 			NEED1("missing source port");
 			if (_substrcmp(*av, "any") == 0 ||
 			    add_ports(cmd, *av, proto, O_IP_SRCPORT)) {
 				ac--; av++;
 			} else
 				errx(EX_DATAERR, "invalid source port %s", *av);
 			break;
 
 		case TOK_DSTPORT:
 			NEED1("missing destination port");
 			if (_substrcmp(*av, "any") == 0 ||
 			    add_ports(cmd, *av, proto, O_IP_DSTPORT)) {
 				ac--; av++;
 			} else
 				errx(EX_DATAERR, "invalid destination port %s",
 				    *av);
 			break;
 
 		case TOK_MAC:
 			if (add_mac(cmd, ac, av)) {
 				ac -= 2; av += 2;
 			}
 			break;
 
 		case TOK_MACTYPE:
 			NEED1("missing mac type");
 			if (!add_mactype(cmd, ac, *av))
 				errx(EX_DATAERR, "invalid mac type %s", *av);
 			ac--; av++;
 			break;
 
 		case TOK_VERREVPATH:
 			fill_cmd(cmd, O_VERREVPATH, 0, 0);
 			break;
 
 		case TOK_VERSRCREACH:
 			fill_cmd(cmd, O_VERSRCREACH, 0, 0);
 			break;
 
 		case TOK_ANTISPOOF:
 			fill_cmd(cmd, O_ANTISPOOF, 0, 0);
 			break;
 
 		case TOK_IPSEC:
 			fill_cmd(cmd, O_IPSEC, 0, 0);
 			break;
 
 		case TOK_IPV6:
 			fill_cmd(cmd, O_IP6, 0, 0);
 			break;
 
 		case TOK_IPV4:
 			fill_cmd(cmd, O_IP4, 0, 0);
 			break;
 
 		case TOK_EXT6HDR:
 			fill_ext6hdr( cmd, *av );
 			ac--; av++;
 			break;
 
 		case TOK_FLOWID:
 			if (proto != IPPROTO_IPV6 )
 				errx( EX_USAGE, "flow-id filter is active "
 				    "only for ipv6 protocol\n");
 			fill_flow6( (ipfw_insn_u32 *) cmd, *av );
 			ac--; av++;
 			break;
 
 		case TOK_COMMENT:
 			fill_comment(cmd, ac, av);
 			av += ac;
 			ac = 0;
 			break;
 
 		case TOK_TAGGED:
 			if (ac > 0 && strpbrk(*av, "-,")) {
 				if (!add_ports(cmd, *av, 0, O_TAGGED))
 					errx(EX_DATAERR, "tagged: invalid tag"
 					    " list: %s", *av);
 			}
 			else {
 				uint16_t tag;
 
 				GET_UINT_ARG(tag, 1, 65534, TOK_TAGGED,
 				    rule_options);
 				fill_cmd(cmd, O_TAGGED, 0, tag);
 			}
+			ac--; av++;
+			break;
+
+		case TOK_FIB:
+			NEED1("fib requires fib number");
+			fill_cmd(cmd, O_FIB, 0, strtoul(*av, NULL, 0));
 			ac--; av++;
 			break;
 
 		default:
 			errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s);
 		}
 		if (F_LEN(cmd) > 0) {	/* prepare to advance */
 			prev = cmd;
 			cmd = next_cmd(cmd);
 		}
 	}
 
 done:
 	/*
 	 * Now copy stuff into the rule.
 	 * If we have a keep-state option, the first instruction
 	 * must be a PROBE_STATE (which is generated here).
 	 * If we have a LOG option, it was stored as the first command,
 	 * and now must be moved to the top of the action part.
 	 */
 	dst = (ipfw_insn *)rule->cmd;
 
 	/*
 	 * First thing to write into the command stream is the match probability.
 	 */
 	if (match_prob != 1) { /* 1 means always match */
 		dst->opcode = O_PROB;
 		dst->len = 2;
 		*((int32_t *)(dst+1)) = (int32_t)(match_prob * 0x7fffffff);
 		dst += dst->len;
 	}
 
 	/*
 	 * generate O_PROBE_STATE if necessary
 	 */
 	if (have_state && have_state->opcode != O_CHECK_STATE) {
 		fill_cmd(dst, O_PROBE_STATE, 0, 0);
 		dst = next_cmd(dst);
 	}
 
 	/* copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT, O_ALTQ, O_TAG */
 	for (src = (ipfw_insn *)cmdbuf; src != cmd; src += i) {
 		i = F_LEN(src);
 
 		switch (src->opcode) {
 		case O_LOG:
 		case O_KEEP_STATE:
 		case O_LIMIT:
 		case O_ALTQ:
 		case O_TAG:
 			break;
 		default:
 			bcopy(src, dst, i * sizeof(uint32_t));
 			dst += i;
 		}
 	}
 
 	/*
 	 * put back the have_state command as last opcode
 	 */
 	if (have_state && have_state->opcode != O_CHECK_STATE) {
 		i = F_LEN(have_state);
 		bcopy(have_state, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 	/*
 	 * start action section
 	 */
 	rule->act_ofs = dst - rule->cmd;
 
 	/* put back O_LOG, O_ALTQ, O_TAG if necessary */
 	if (have_log) {
 		i = F_LEN(have_log);
 		bcopy(have_log, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 	if (have_altq) {
 		i = F_LEN(have_altq);
 		bcopy(have_altq, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 	if (have_tag) {
 		i = F_LEN(have_tag);
 		bcopy(have_tag, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 	/*
 	 * copy all other actions
 	 */
 	for (src = (ipfw_insn *)actbuf; src != action; src += i) {
 		i = F_LEN(src);
 		bcopy(src, dst, i * sizeof(uint32_t));
 		dst += i;
 	}
 
 	rule->cmd_len = (uint32_t *)dst - (uint32_t *)(rule->cmd);
 	i = (char *)dst - (char *)rule;
 	if (do_cmd(IP_FW_ADD, rule, (uintptr_t)&i) == -1)
 		err(EX_UNAVAILABLE, "getsockopt(%s)", "IP_FW_ADD");
 	if (!do_quiet)
 		show_ipfw(rule, 0, 0);
 }
 
 static void
 zero(int ac, char *av[], int optname /* IP_FW_ZERO or IP_FW_RESETLOG */)
 {
 	uint32_t arg, saved_arg;
 	int failed = EX_OK;
 	char const *name = optname == IP_FW_ZERO ?  "ZERO" : "RESETLOG";
 	char const *errstr;
 
 	av++; ac--;
 
 	if (!ac) {
 		/* clear all entries */
 		if (do_cmd(optname, NULL, 0) < 0)
 			err(EX_UNAVAILABLE, "setsockopt(IP_FW_%s)", name);
 		if (!do_quiet)
 			printf("%s.\n", optname == IP_FW_ZERO ?
 			    "Accounting cleared":"Logging counts reset");
 
 		return;
 	}
 
 	while (ac) {
 		/* Rule number */
 		if (isdigit(**av)) {
 			arg = strtonum(*av, 0, 0xffff, &errstr);
 			if (errstr)
 				errx(EX_DATAERR,
 				    "invalid rule number %s\n", *av);
 			saved_arg = arg;
 			if (use_set)
 				arg |= (1 << 24) | ((use_set - 1) << 16);
 			av++;
 			ac--;
 			if (do_cmd(optname, &arg, sizeof(arg))) {
 				warn("rule %u: setsockopt(IP_FW_%s)",
 				    saved_arg, name);
 				failed = EX_UNAVAILABLE;
 			} else if (!do_quiet)
 				printf("Entry %d %s.\n", saved_arg,
 				    optname == IP_FW_ZERO ?
 					"cleared" : "logging count reset");
 		} else {
 			errx(EX_USAGE, "invalid rule number ``%s''", *av);
 		}
 	}
 	if (failed != EX_OK)
 		exit(failed);
 }
 
 static void
 flush(int force)
 {
 	int cmd = do_pipe ? IP_DUMMYNET_FLUSH : IP_FW_FLUSH;
 
 	if (!force && !do_quiet) { /* need to ask user */
 		int c;
 
 		printf("Are you sure? [yn] ");
 		fflush(stdout);
 		do {
 			c = toupper(getc(stdin));
 			while (c != '\n' && getc(stdin) != '\n')
 				if (feof(stdin))
 					return; /* and do not flush */
 		} while (c != 'Y' && c != 'N');
 		printf("\n");
 		if (c == 'N')	/* user said no */
 			return;
 	}
 	/* `ipfw set N flush` - is the same that `ipfw delete set N` */
 	if (use_set) {
 		uint32_t arg = ((use_set - 1) & 0xffff) | (1 << 24);
 		if (do_cmd(IP_FW_DEL, &arg, sizeof(arg)) < 0)
 			err(EX_UNAVAILABLE, "setsockopt(IP_FW_DEL)");
 	} else if (do_cmd(cmd, NULL, 0) < 0)
 		err(EX_UNAVAILABLE, "setsockopt(IP_%s_FLUSH)",
 		    do_pipe ? "DUMMYNET" : "FW");
 	if (!do_quiet)
 		printf("Flushed all %s.\n", do_pipe ? "pipes" : "rules");
 }
 
 /*
  * Free a the (locally allocated) copy of command line arguments.
  */
 static void
 free_args(int ac, char **av)
 {
 	int i;
 
 	for (i=0; i < ac; i++)
 		free(av[i]);
 	free(av);
 }
 
 /*
  * This one handles all table-related commands
  * 	ipfw table N add addr[/masklen] [value]
  * 	ipfw table N delete addr[/masklen]
  * 	ipfw table N flush
  * 	ipfw table N list
  */
 static void
 table_handler(int ac, char *av[])
 {
 	ipfw_table_entry ent;
 	ipfw_table *tbl;
 	int do_add;
 	char *p;
 	socklen_t l;
 	uint32_t a;
 
 	ac--; av++;
 	if (ac && isdigit(**av)) {
 		ent.tbl = atoi(*av);
 		ac--; av++;
 	} else
 		errx(EX_USAGE, "table number required");
 	NEED1("table needs command");
 	if (_substrcmp(*av, "add") == 0 ||
 	    _substrcmp(*av, "delete") == 0) {
 		do_add = **av == 'a';
 		ac--; av++;
 		if (!ac)
 			errx(EX_USAGE, "IP address required");
 		p = strchr(*av, '/');
 		if (p) {
 			*p++ = '\0';
 			ent.masklen = atoi(p);
 			if (ent.masklen > 32)
 				errx(EX_DATAERR, "bad width ``%s''", p);
 		} else
 			ent.masklen = 32;
 		if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0)
 			errx(EX_NOHOST, "hostname ``%s'' unknown", *av);
 		ac--; av++;
 		if (do_add && ac) {
 			unsigned int tval;
 			/* isdigit is a bit of a hack here.. */
 			if (strchr(*av, (int)'.') == NULL && isdigit(**av))  {
 				ent.value = strtoul(*av, NULL, 0);
 			} else {
 		        	if (lookup_host(*av, (struct in_addr *)&tval) == 0) {
 					/* The value must be stored in host order	 *
 					 * so that the values < 65k can be distinguished */
 		       			ent.value = ntohl(tval); 
 				} else {
 					errx(EX_NOHOST, "hostname ``%s'' unknown", *av);
 				}
 			}
 		} else
 			ent.value = 0;
 		if (do_cmd(do_add ? IP_FW_TABLE_ADD : IP_FW_TABLE_DEL,
 		    &ent, sizeof(ent)) < 0) {
 			/* If running silent, don't bomb out on these errors. */
 			if (!(do_quiet && (errno == (do_add ? EEXIST : ESRCH))))
 				err(EX_OSERR, "setsockopt(IP_FW_TABLE_%s)",
 				    do_add ? "ADD" : "DEL");
 			/* In silent mode, react to a failed add by deleting */
 			if (do_add) {
 				do_cmd(IP_FW_TABLE_DEL, &ent, sizeof(ent));
 				if (do_cmd(IP_FW_TABLE_ADD,
 				    &ent, sizeof(ent)) < 0)
 					err(EX_OSERR,
 				            "setsockopt(IP_FW_TABLE_ADD)");
 			}
 		}
 	} else if (_substrcmp(*av, "flush") == 0) {
 		if (do_cmd(IP_FW_TABLE_FLUSH, &ent.tbl, sizeof(ent.tbl)) < 0)
 			err(EX_OSERR, "setsockopt(IP_FW_TABLE_FLUSH)");
 	} else if (_substrcmp(*av, "list") == 0) {
 		a = ent.tbl;
 		l = sizeof(a);
 		if (do_cmd(IP_FW_TABLE_GETSIZE, &a, (uintptr_t)&l) < 0)
 			err(EX_OSERR, "getsockopt(IP_FW_TABLE_GETSIZE)");
 		l = sizeof(*tbl) + a * sizeof(ipfw_table_entry);
 		tbl = malloc(l);
 		if (tbl == NULL)
 			err(EX_OSERR, "malloc");
 		tbl->tbl = ent.tbl;
 		if (do_cmd(IP_FW_TABLE_LIST, tbl, (uintptr_t)&l) < 0)
 			err(EX_OSERR, "getsockopt(IP_FW_TABLE_LIST)");
 		for (a = 0; a < tbl->cnt; a++) {
 			unsigned int tval;
 			tval = tbl->ent[a].value;
 			if (do_value_as_ip) {
 			    char tbuf[128];
 			    strncpy(tbuf, inet_ntoa(*(struct in_addr *)
 				&tbl->ent[a].addr), 127);
 			    /* inet_ntoa expects network order */
 			    tval = htonl(tval);
 			    printf("%s/%u %s\n", tbuf, tbl->ent[a].masklen,
 			        inet_ntoa(*(struct in_addr *)&tval));
 			} else {
 			    printf("%s/%u %u\n",
 			        inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr),
 			        tbl->ent[a].masklen, tval);
 			}
 		}
 	} else
 		errx(EX_USAGE, "invalid table command %s", *av);
 }
 
 static void
 show_nat(int ac, char **av) {
 	struct cfg_nat *n;
 	struct cfg_redir *e;
 	int cmd, i, nbytes, do_cfg, do_rule, frule, lrule, nalloc, size;
 	int nat_cnt, redir_cnt, r;
 	uint8_t *data, *p;
 	char **lav, *endptr;
 
 	do_rule = 0;
 	nalloc = 1024;
 	size = 0;
 	data = NULL;
 	frule = 0;
 	lrule = 65535; /* max ipfw rule number */
 	ac--; av++;
 
 	/* Parse parameters. */
 	for (cmd = IP_FW_NAT_GET_LOG, do_cfg = 0; ac != 0; ac--, av++) {
 		if (!strncmp(av[0], "config", strlen(av[0]))) {
 			cmd = IP_FW_NAT_GET_CONFIG, do_cfg = 1; 
 			continue;
 		}
 		/* Convert command line rule #. */
 		frule = lrule = strtoul(av[0], &endptr, 10);
 		if (*endptr == '-')
 			lrule = strtoul(endptr+1, &endptr, 10);
 		if (lrule == 0)			
 			err(EX_USAGE, "invalid rule number: %s", av[0]);
 		do_rule = 1;
 	}
 
 	nbytes = nalloc;
 	while (nbytes >= nalloc) {
 		nalloc = nalloc * 2;
 		nbytes = nalloc;
 		if ((data = realloc(data, nbytes)) == NULL)
 			err(EX_OSERR, "realloc");
 		if (do_cmd(cmd, data, (uintptr_t)&nbytes) < 0)
 			err(EX_OSERR, "getsockopt(IP_FW_GET_%s)",
 			    (cmd == IP_FW_NAT_GET_LOG) ? "LOG" : "CONFIG");
 	}
 	if (nbytes == 0)
 		exit(0);
 	if (do_cfg) {
 		nat_cnt = *((int *)data);
 		for (i = sizeof(nat_cnt); nat_cnt; nat_cnt--) {
 			n = (struct cfg_nat *)&data[i];
 			if (frule <= n->id && lrule >= n->id)
 				print_nat_config(&data[i]);
 			i += sizeof(struct cfg_nat);
 			for (redir_cnt = 0; redir_cnt < n->redir_cnt; redir_cnt++) {
 				e = (struct cfg_redir *)&data[i];
 				i += sizeof(struct cfg_redir) + e->spool_cnt * 
 				    sizeof(struct cfg_spool);
 			}
 		}
 	} else {
 		for (i = 0; 1; i += LIBALIAS_BUF_SIZE + sizeof(int)) {
 			p = &data[i];
 			if (p == data + nbytes)
 				break;
 			bcopy(p, &r, sizeof(int));
 			if (do_rule) {
 				if (!(frule <= r && lrule >= r))
 					continue;
 			}
 			printf("nat %u: %s\n", r, p+sizeof(int));
 		}
 	}
 }
 
 /*
  * Called with the arguments (excluding program name).
  * Returns 0 if successful, 1 if empty command, errx() in case of errors.
  */
 static int
 ipfw_main(int oldac, char **oldav)
 {
 	int ch, ac, save_ac;
 	const char *errstr;
 	char **av, **save_av;
 	int do_acct = 0;		/* Show packet/byte count */
 
 #define WHITESP		" \t\f\v\n\r"
 	if (oldac == 0)
 		return 1;
 	else if (oldac == 1) {
 		/*
 		 * If we are called with a single string, try to split it into
 		 * arguments for subsequent parsing.
 		 * But first, remove spaces after a ',', by copying the string
 		 * in-place.
 		 */
 		char *arg = oldav[0];	/* The string... */
 		int l = strlen(arg);
 		int copy = 0;		/* 1 if we need to copy, 0 otherwise */
 		int i, j;
 		for (i = j = 0; i < l; i++) {
 			if (arg[i] == '#')	/* comment marker */
 				break;
 			if (copy) {
 				arg[j++] = arg[i];
 				copy = !index("," WHITESP, arg[i]);
 			} else {
 				copy = !index(WHITESP, arg[i]);
 				if (copy)
 					arg[j++] = arg[i];
 			}
 		}
 		if (!copy && j > 0)	/* last char was a 'blank', remove it */
 			j--;
 		l = j;			/* the new argument length */
 		arg[j++] = '\0';
 		if (l == 0)		/* empty string! */
 			return 1;
 
 		/*
 		 * First, count number of arguments. Because of the previous
 		 * processing, this is just the number of blanks plus 1.
 		 */
 		for (i = 0, ac = 1; i < l; i++)
 			if (index(WHITESP, arg[i]) != NULL)
 				ac++;
 
 		av = calloc(ac, sizeof(char *));
 
 		/*
 		 * Second, copy arguments from cmd[] to av[]. For each one,
 		 * j is the initial character, i is the one past the end.
 		 */
 		for (ac = 0, i = j = 0; i < l; i++)
 			if (index(WHITESP, arg[i]) != NULL || i == l-1) {
 				if (i == l-1)
 					i++;
 				av[ac] = calloc(i-j+1, 1);
 				bcopy(arg+j, av[ac], i-j);
 				ac++;
 				j = i + 1;
 			}
 	} else {
 		/*
 		 * If an argument ends with ',' join with the next one.
 		 */
 		int first, i, l;
 
 		av = calloc(oldac, sizeof(char *));
 		for (first = i = ac = 0, l = 0; i < oldac; i++) {
 			char *arg = oldav[i];
 			int k = strlen(arg);
 
 			l += k;
 			if (arg[k-1] != ',' || i == oldac-1) {
 				/* Time to copy. */
 				av[ac] = calloc(l+1, 1);
 				for (l=0; first <= i; first++) {
 					strcat(av[ac]+l, oldav[first]);
 					l += strlen(oldav[first]);
 				}
 				ac++;
 				l = 0;
 				first = i+1;
 			}
 		}
 	}
 
 	/* Set the force flag for non-interactive processes */
 	if (!do_force)
 		do_force = !isatty(STDIN_FILENO);
 
 	/* Save arguments for final freeing of memory. */
 	save_ac = ac;
 	save_av = av;
 
 	optind = optreset = 0;
 	while ((ch = getopt(ac, av, "abcdefhinNqs:STtv")) != -1)
 		switch (ch) {
 		case 'a':
 			do_acct = 1;
 			break;
 
 		case 'b':
 			comment_only = 1;
 			do_compact = 1;
 			break;
 
 		case 'c':
 			do_compact = 1;
 			break;
 
 		case 'd':
 			do_dynamic = 1;
 			break;
 
 		case 'e':
 			do_expired = 1;
 			break;
 
 		case 'f':
 			do_force = 1;
 			break;
 
 		case 'h': /* help */
 			free_args(save_ac, save_av);
 			help();
 			break;	/* NOTREACHED */
 
 		case 'i':
 			do_value_as_ip = 1;
 			break;
 
 		case 'n':
 			test_only = 1;
 			break;
 
 		case 'N':
 			do_resolv = 1;
 			break;
 
 		case 'q':
 			do_quiet = 1;
 			break;
 
 		case 's': /* sort */
 			do_sort = atoi(optarg);
 			break;
 
 		case 'S':
 			show_sets = 1;
 			break;
 
 		case 't':
 			do_time = 1;
 			break;
 
 		case 'T':
 			do_time = 2;	/* numeric timestamp */
 			break;
 
 		case 'v': /* verbose */
 			verbose = 1;
 			break;
 
 		default:
 			free_args(save_ac, save_av);
 			return 1;
 		}
 
 	ac -= optind;
 	av += optind;
 	NEED1("bad arguments, for usage summary ``ipfw''");
 
 	/*
 	 * An undocumented behaviour of ipfw1 was to allow rule numbers first,
 	 * e.g. "100 add allow ..." instead of "add 100 allow ...".
 	 * In case, swap first and second argument to get the normal form.
 	 */
 	if (ac > 1 && isdigit(*av[0])) {
 		char *p = av[0];
 
 		av[0] = av[1];
 		av[1] = p;
 	}
 
 	/*
 	 * Optional: pipe, queue or nat.
 	 */
 	do_nat = 0;
 	do_pipe = 0;
 	if (!strncmp(*av, "nat", strlen(*av)))
  	        do_nat = 1;
  	else if (!strncmp(*av, "pipe", strlen(*av)))
 		do_pipe = 1;
 	else if (_substrcmp(*av, "queue") == 0)
 		do_pipe = 2;
 	else if (!strncmp(*av, "set", strlen(*av))) {
 		if (ac > 1 && isdigit(av[1][0])) {
 			use_set = strtonum(av[1], 0, RESVD_SET, &errstr);
 			if (errstr)
 				errx(EX_DATAERR,
 				    "invalid set number %s\n", av[1]);
 			ac -= 2; av += 2; use_set++;
 		}
 	}
 
 	if (do_pipe || do_nat) {
 		ac--;
 		av++;
 	}
 	NEED1("missing command");
 
 	/*
 	 * For pipes, queues and nats we normally say 'nat|pipe NN config'
 	 * but the code is easier to parse as 'nat|pipe config NN'
 	 * so we swap the two arguments.
 	 */
 	if ((do_pipe || do_nat) && ac > 1 && isdigit(*av[0])) {
 		char *p = av[0];
 
 		av[0] = av[1];
 		av[1] = p;
 	}
 
 	int try_next = 0;
 	if (use_set == 0) {
 		if (_substrcmp(*av, "add") == 0)
 			add(ac, av);
 		else if (do_nat && _substrcmp(*av, "show") == 0)
  			show_nat(ac, av);
 		else if (do_pipe && _substrcmp(*av, "config") == 0)
 			config_pipe(ac, av);
 		else if (do_nat && _substrcmp(*av, "config") == 0)
  			config_nat(ac, av);
 		else if (_substrcmp(*av, "set") == 0)
 			sets_handler(ac, av);
 		else if (_substrcmp(*av, "table") == 0)
 			table_handler(ac, av);
 		else if (_substrcmp(*av, "enable") == 0)
 			sysctl_handler(ac, av, 1);
 		else if (_substrcmp(*av, "disable") == 0)
 			sysctl_handler(ac, av, 0);
 		else
 			try_next = 1;
 	}
 
 	if (use_set || try_next) {
 		if (_substrcmp(*av, "delete") == 0)
 			delete(ac, av);
 		else if (_substrcmp(*av, "flush") == 0)
 			flush(do_force);
 		else if (_substrcmp(*av, "zero") == 0)
 			zero(ac, av, IP_FW_ZERO);
 		else if (_substrcmp(*av, "resetlog") == 0)
 			zero(ac, av, IP_FW_RESETLOG);
 		else if (_substrcmp(*av, "print") == 0 ||
 		         _substrcmp(*av, "list") == 0)
 			list(ac, av, do_acct);
 		else if (_substrcmp(*av, "show") == 0)
 			list(ac, av, 1 /* show counters */);
 		else
 			errx(EX_USAGE, "bad command `%s'", *av);
 	}
 
 	/* Free memory allocated in the argument parsing. */
 	free_args(save_ac, save_av);
 	return 0;
 }
 
 
 static void
 ipfw_readfile(int ac, char *av[])
 {
 #define MAX_ARGS	32
 	char	buf[BUFSIZ];
 	char	*cmd = NULL, *filename = av[ac-1];
 	int	c, lineno=0;
 	FILE	*f = NULL;
 	pid_t	preproc = 0;
 
 	filename = av[ac-1];
 
 	while ((c = getopt(ac, av, "cfNnp:qS")) != -1) {
 		switch(c) {
 		case 'c':
 			do_compact = 1;
 			break;
 
 		case 'f':
 			do_force = 1;
 			break;
 
 		case 'N':
 			do_resolv = 1;
 			break;
 
 		case 'n':
 			test_only = 1;
 			break;
 
 		case 'p':
 			cmd = optarg;
 			/*
 			 * Skip previous args and delete last one, so we
 			 * pass all but the last argument to the preprocessor
 			 * via av[optind-1]
 			 */
 			av += optind - 1;
 			ac -= optind - 1;
 			if (ac < 2)
 				errx(EX_USAGE, "no filename argument");
 			av[ac-1] = NULL;
 			fprintf(stderr, "command is %s\n", av[0]);
 			break;
 
 		case 'q':
 			do_quiet = 1;
 			break;
 
 		case 'S':
 			show_sets = 1;
 			break;
 
 		default:
 			errx(EX_USAGE, "bad arguments, for usage"
 			     " summary ``ipfw''");
 		}
 
 		if (cmd != NULL)
 			break;
 	}
 
 	if (cmd == NULL && ac != optind + 1) {
 		fprintf(stderr, "ac %d, optind %d\n", ac, optind);
 		errx(EX_USAGE, "extraneous filename arguments");
 	}
 
 	if ((f = fopen(filename, "r")) == NULL)
 		err(EX_UNAVAILABLE, "fopen: %s", filename);
 
 	if (cmd != NULL) {			/* pipe through preprocessor */
 		int pipedes[2];
 
 		if (pipe(pipedes) == -1)
 			err(EX_OSERR, "cannot create pipe");
 
 		preproc = fork();
 		if (preproc == -1)
 			err(EX_OSERR, "cannot fork");
 
 		if (preproc == 0) {
 			/*
 			 * Child, will run the preprocessor with the
 			 * file on stdin and the pipe on stdout.
 			 */
 			if (dup2(fileno(f), 0) == -1
 			    || dup2(pipedes[1], 1) == -1)
 				err(EX_OSERR, "dup2()");
 			fclose(f);
 			close(pipedes[1]);
 			close(pipedes[0]);
 			execvp(cmd, av);
 			err(EX_OSERR, "execvp(%s) failed", cmd);
 		} else { /* parent, will reopen f as the pipe */
 			fclose(f);
 			close(pipedes[1]);
 			if ((f = fdopen(pipedes[0], "r")) == NULL) {
 				int savederrno = errno;
 
 				(void)kill(preproc, SIGTERM);
 				errno = savederrno;
 				err(EX_OSERR, "fdopen()");
 			}
 		}
 	}
 
 	while (fgets(buf, BUFSIZ, f)) {		/* read commands */
 		char linename[10];
 		char *args[1];
 
 		lineno++;
 		sprintf(linename, "Line %d", lineno);
 		setprogname(linename); /* XXX */
 		args[0] = buf;
 		ipfw_main(1, args);
 	}
 	fclose(f);
 	if (cmd != NULL) {
 		int status;
 
 		if (waitpid(preproc, &status, 0) == -1)
 			errx(EX_OSERR, "waitpid()");
 		if (WIFEXITED(status) && WEXITSTATUS(status) != EX_OK)
 			errx(EX_UNAVAILABLE,
 			    "preprocessor exited with status %d",
 			    WEXITSTATUS(status));
 		else if (WIFSIGNALED(status))
 			errx(EX_UNAVAILABLE,
 			    "preprocessor exited with signal %d",
 			    WTERMSIG(status));
 	}
 }
 
 int
 main(int ac, char *av[])
 {
 	/*
 	 * If the last argument is an absolute pathname, interpret it
 	 * as a file to be preprocessed.
 	 */
 
 	if (ac > 1 && av[ac - 1][0] == '/' && access(av[ac - 1], R_OK) == 0)
 		ipfw_readfile(ac, av);
 	else {
 		if (ipfw_main(ac-1, av+1))
 			show_usage();
 	}
 	return EX_OK;
 }
Index: head/sys/conf/NOTES
===================================================================
--- head/sys/conf/NOTES	(revision 178887)
+++ head/sys/conf/NOTES	(revision 178888)
@@ -1,2716 +1,2718 @@
 # $FreeBSD$
 #
 # NOTES -- Lines that can be cut/pasted into kernel and hints configs.
 #
 # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers',
 # 'makeoptions', 'hints', etc. go into the kernel configuration that you
 # run config(8) with.
 #
 # Lines that begin with 'hint.' are NOT for config(8), they go into your
 # hints file.  See /boot/device.hints and/or the 'hints' config(8) directive.
 #
 # Please use ``make LINT'' to create an old-style LINT file if you want to
 # do kernel test-builds.
 #
 # This file contains machine independent kernel configuration notes.  For
 # machine dependent notes, look in /sys/<arch>/conf/NOTES.
 #
 
 #
 # NOTES conventions and style guide:
 #
 # Large block comments should begin and end with a line containing only a
 # comment character.
 #
 # To describe a particular object, a block comment (if it exists) should
 # come first.  Next should come device, options, and hints lines in that
 # order.  All device and option lines must be described by a comment that
 # doesn't just expand the device or option name.  Use only a concise
 # comment on the same line if possible.  Very detailed descriptions of
 # devices and subsystems belong in man pages.
 #
 # A space followed by a tab separates 'options' from an option name.  Two
 # spaces followed by a tab separate 'device' from a device name.  Comments
 # after an option or device should use one space after the comment character.
 # To comment out a negative option that disables code and thus should not be
 # enabled for LINT builds, precede 'options' with "#!".
 #
 
 #
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a formula defined in subr_param.c.
 # Omitting this parameter or setting it to 0 will cause the system to
 # auto-size based on physical memory.
 #
 maxusers	10
 
 #
 # The `makeoptions' parameter allows variables to be passed to the
 # generated Makefile in the build area.
 #
 # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS}
 # after most other flags.  Here we use it to inhibit use of non-optimal
 # gcc built-in functions (e.g., memcmp).
 #
 # DEBUG happens to be magic.
 # The following is equivalent to 'config -g KERNELNAME' and creates
 # 'kernel.debug' compiled with -g debugging as well as a normal
 # 'kernel'.  Use 'make install.debug' to install the debug kernel
 # but that isn't normally necessary as the debug symbols are not loaded
 # by the kernel and are not useful there anyway.
 #
 # KERNEL can be overridden so that you can change the default name of your
 # kernel.
 #
 # MODULES_OVERRIDE can be used to limit modules built to a specific list.
 #
 makeoptions	CONF_CFLAGS=-fno-builtin  #Don't allow use of memcmp, etc.
 #makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 #makeoptions	KERNEL=foo		#Build kernel "foo" and install "/foo"
 # Only build ext2fs module plus those parts of the sound system I need.
 #makeoptions	MODULES_OVERRIDE="ext2fs sound/sound sound/driver/maestro3"
 makeoptions	DESTDIR=/tmp
 
 #
 # FreeBSD processes are subject to certain limits to their consumption
 # of system resources.  See getrlimit(2) for more details.  Each
 # resource limit has two values, a "soft" limit and a "hard" limit.
 # The soft limits can be modified during normal system operation, but
 # the hard limits are set at boot time.  Their default values are
 # in sys/<arch>/include/vmparam.h.  There are two ways to change them:
 # 
 # 1.  Set the values at kernel build time.  The options below are one
 #     way to allow that limit to grow to 1GB.  They can be increased
 #     further by changing the parameters:
 #	
 # 2.  In /boot/loader.conf, set the tunables kern.maxswzone,
 #     kern.maxbcache, kern.maxtsiz, kern.dfldsiz, kern.maxdsiz,
 #     kern.dflssiz, kern.maxssiz and kern.sgrowsiz.
 #
 # The options in /boot/loader.conf override anything in the kernel
 # configuration file.  See the function init_param1 in
 # sys/kern/subr_param.c for more details.
 #
 
 options 	MAXDSIZ=(1024UL*1024*1024)
 options 	MAXSSIZ=(128UL*1024*1024)
 options 	DFLDSIZ=(1024UL*1024*1024)
 
 #
 # BLKDEV_IOSIZE sets the default block size used in user block
 # device I/O.  Note that this value will be overridden by the label
 # when specifying a block device from a label with a non-0
 # partition blocksize.  The default is PAGE_SIZE.
 #
 options 	BLKDEV_IOSIZE=8192
 
 #
 # MAXPHYS and DFLTPHYS
 #
 # These are the max and default 'raw' I/O block device access sizes.
 # Reads and writes will be split into DFLTPHYS chunks. Some applications
 # have better performance with larger raw I/O access sizes. Typically
 # MAXPHYS should be twice the size of DFLTPHYS. Note that certain VM
 # parameters are derived from these values and making them too large
 # can make an an unbootable kernel.
 #
 # The defaults are 64K and 128K respectively.
 options 	DFLTPHYS=(64*1024)
 options 	MAXPHYS=(128*1024)
 
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself. See config(8) for more details.
 #
 options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 options 	GEOM_AES		# Don't use, use GEOM_BDE
 options 	GEOM_BDE		# Disk encryption.
 options 	GEOM_BSD		# BSD disklabels
 options 	GEOM_CACHE		# Disk cache.
 options 	GEOM_CONCAT		# Disk concatenation.
 options 	GEOM_ELI		# Disk encryption.
 options 	GEOM_FOX		# Redundant path mitigation
 options 	GEOM_GATE		# Userland services.
 options 	GEOM_JOURNAL		# Journaling.
 options 	GEOM_LABEL		# Providers labelization.
 options 	GEOM_LINUX_LVM		# Linux LVM2 volumes
 options 	GEOM_MBR		# DOS/MBR partitioning
 options 	GEOM_MIRROR		# Disk mirroring.
 options 	GEOM_MULTIPATH		# Disk multipath
 options 	GEOM_NOP		# Test class.
 options 	GEOM_PART_APM		# Apple partitioning
 options 	GEOM_PART_BSD		# BSD disklabel
 options 	GEOM_PART_GPT		# GPT partitioning
 options 	GEOM_PART_MBR		# MBR partitioning
 options 	GEOM_PART_PC98		# PC-9800 disk partitioning
 options 	GEOM_PART_VTOC8		# SMI VTOC8 disk label
 options 	GEOM_PC98		# NEC PC9800 partitioning
 options 	GEOM_RAID3		# RAID3 functionality.
 options 	GEOM_SHSEC		# Shared secret.
 options 	GEOM_STRIPE		# Disk striping.
 options 	GEOM_SUNLABEL		# Sun/Solaris partitioning
 options 	GEOM_UZIP		# Read-only compressed disks
 options 	GEOM_VIRSTOR		# Virtual storage.
 options 	GEOM_VOL		# Volume names from UFS superblock
 options 	GEOM_ZERO		# Performance testing helper.
 
 #
 # The root device and filesystem type can be compiled in;
 # this provides a fallback option if the root device cannot
 # be correctly guessed by the bootstrap code, or an override if
 # the RB_DFLTROOT flag (-r) is specified when booting the kernel.
 #
 options 	ROOTDEVNAME=\"ufs:da0s2e\"
 
 
 #####################################################################
 # Scheduler options:
 #
 # Specifying one of SCHED_4BSD or SCHED_ULE is mandatory.  These options
 # select which scheduler is compiled in.
 #
 # SCHED_4BSD is the historical, proven, BSD scheduler.  It has a global run
 # queue and no CPU affinity which makes it suboptimal for SMP.  It has very
 # good interactivity and priority selection.
 #
 # SCHED_ULE provides significant performance advantages over 4BSD on many
 # workloads on SMP machines.  It supports cpu-affinity, per-cpu runqueues
 # and scheduler locks.  It also has a stronger notion of interactivity 
 # which leads to better responsiveness even on uniprocessor machines.  This
 # will eventually become the default scheduler.
 #
 # SCHED_STATS is a debugging option which keeps some stats in the sysctl
 # tree at 'kern.sched.stats' and is useful for debugging scheduling decisions.
 #
 options 	SCHED_4BSD
 options		SCHED_STATS
 #options 	SCHED_ULE
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 
 # Mandatory:
 options 	SMP			# Symmetric MultiProcessor Kernel
 
 # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin
 # if the thread that currently owns the mutex is executing on another
 # CPU.  This behaviour is enabled by default, so this option can be used
 # to disable it.
 options 	NO_ADAPTIVE_MUTEXES
 
 # ADAPTIVE_RWLOCKS changes the behavior of reader/writer locks to spin
 # if the thread that currently owns the rwlock is executing on another
 # CPU.  This behaviour is enabled by default, so this option can be used
 # to disable it.
 options 	NO_ADAPTIVE_RWLOCKS
 
 # ADAPTIVE_SX changes the behavior of sx locks to spin if the thread
 # that currently owns the lock is executing on another CPU.  Note that
 # in addition to enabling this option, individual sx locks must be
 # initialized with the SX_ADAPTIVESPIN flag.
 options 	ADAPTIVE_SX
 
 # MUTEX_NOINLINE forces mutex operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	MUTEX_NOINLINE
 
 # RWLOCK_NOINLINE forces rwlock operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	RWLOCK_NOINLINE
 
 # SX_NOINLINE forces sx lock operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, KTR, LOCK_PROFILING,
 # and WITNESS options.
 options 	SX_NOINLINE
 
 # SMP Debugging Options:
 #
 # PREEMPTION allows the threads that are in the kernel to be preempted
 #	  by higher priority threads.  It helps with interactivity and
 #	  allows interrupt threads to run sooner rather than waiting.
 #	  WARNING! Only tested on amd64 and i386.
 # FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel
 #	  threads.  Its sole use is to expose race conditions and other
 #	  bugs during development.  Enabling this option will reduce
 #	  performance and increase the frequency of kernel panics by
 #	  design.  If you aren't sure that you need it then you don't.
 #	  Relies on the PREEMPTION option.  DON'T TURN THIS ON.
 # MUTEX_DEBUG enables various extra assertions in the mutex code.
 # SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table
 #	  used to hold active sleep queues as well as sleep wait message
 #	  frequency.
 # TURNSTILE_PROFILING enables rudimentary profiling of the hash table
 #	  used to hold active lock queues.
 # WITNESS enables the witness code which detects deadlocks and cycles
 #         during locking operations.
 # WITNESS_KDB causes the witness code to drop into the kernel debugger if
 #	  a lock hierarchy violation occurs or if locks are held when going to
 #	  sleep.
 # WITNESS_SKIPSPIN disables the witness checks on spin mutexes.
 options 	PREEMPTION
 options 	FULL_PREEMPTION
 options 	MUTEX_DEBUG
 options 	WITNESS
 options 	WITNESS_KDB
 options 	WITNESS_SKIPSPIN
 
 # LOCK_PROFILING - Profiling locks.  See LOCK_PROFILING(9) for details.
 options 	LOCK_PROFILING
 # Set the number of buffers and the hash size.  The hash size MUST be larger
 # than the number of buffers.  Hash size should be prime.
 options 	MPROF_BUFFERS="1536"
 options 	MPROF_HASH_SIZE="1543"
 
 # Profiling for internal hash tables.
 options 	SLEEPQUEUE_PROFILING
 options 	TURNSTILE_PROFILING
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.  Note that some architectures that
 # are supported by FreeBSD do not include support for certain important
 # aspects of this compatibility option, namely those related to the
 # signal delivery mechanism.
 #
 options 	COMPAT_43
 
 # Old tty interface.
 options 	COMPAT_43TTY
 
 # Enable FreeBSD4 compatibility syscalls
 options 	COMPAT_FREEBSD4
 
 # Enable FreeBSD5 compatibility syscalls
 options 	COMPAT_FREEBSD5
 
 # Enable FreeBSD6 compatibility syscalls
 options 	COMPAT_FREEBSD6
 
 # Enable FreeBSD7 compatibility syscalls
 options 	COMPAT_FREEBSD7
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options 	SYSVSHM
 options 	SYSVSEM
 options 	SYSVMSG
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Compile with kernel debugger related code.
 #
 options 	KDB
 
 #
 # Print a stack trace of the current thread on the console for a panic.
 #
 options 	KDB_TRACE
 
 #
 # Don't enter the debugger for a panic. Intended for unattended operation
 # where you may want to enter the debugger from the console, but still want
 # the machine to recover from a panic.
 #
 options 	KDB_UNATTENDED
 
 #
 # Enable the ddb debugger backend.
 #
 options 	DDB
 
 #
 # Print the numerical value of symbols in addition to the symbolic
 # representation.
 #
 options 	DDB_NUMSYM
 
 #
 # Enable the remote gdb debugger backend.
 #
 options 	GDB
 
 #
 # SYSCTL_DEBUG enables a 'sysctl' debug tree that can be used to dump the
 # contents of the registered sysctl nodes on the console.  It is disabled by
 # default because it generates excessively verbose console output that can
 # interfere with serial console operation.
 #
 options 	SYSCTL_DEBUG
 
 #
 # DEBUG_MEMGUARD builds and enables memguard(9), a replacement allocator
 # for the kernel used to detect modify-after-free scenarios.  See the
 # memguard(9) man page for more information on usage.
 #
 options 	DEBUG_MEMGUARD
 
 #
 # DEBUG_REDZONE enables buffer underflows and buffer overflows detection for
 # malloc(9).
 #
 options 	DEBUG_REDZONE
 
 #
 # KTRACE enables the system-call tracing facility ktrace(2).  To be more
 # SMP-friendly, KTRACE uses a worker thread to process most trace events
 # asynchronously to the thread generating the event.  This requires a
 # pre-allocated store of objects representing trace events.  The
 # KTRACE_REQUEST_POOL option specifies the initial size of this store.
 # The size of the pool can be adjusted both at boottime and runtime via
 # the kern.ktrace_request_pool tunable and sysctl.
 #
 options 	KTRACE			#kernel tracing
 options 	KTRACE_REQUEST_POOL=101
 
 #
 # KTR is a kernel tracing mechanism imported from BSD/OS.  Currently
 # it has no userland interface aside from a few sysctl's.  It is
 # enabled with the KTR option.  KTR_ENTRIES defines the number of
 # entries in the circular trace buffer; it must be a power of two.
 # KTR_COMPILE defines the mask of events to compile into the kernel as
 # defined by the KTR_* constants in <sys/ktr.h>.  KTR_MASK defines the
 # initial value of the ktr_mask variable which determines at runtime
 # what events to trace.  KTR_CPUMASK determines which CPU's log
 # events, with bit X corresponding to CPU X.  KTR_VERBOSE enables
 # dumping of KTR events to the console by default.  This functionality
 # can be toggled via the debug.ktr_verbose sysctl and defaults to off
 # if KTR_VERBOSE is not defined.
 #
 options 	KTR
 options 	KTR_ENTRIES=1024
 options 	KTR_COMPILE=(KTR_INTR|KTR_PROC)
 options 	KTR_MASK=KTR_INTR
 options 	KTR_CPUMASK=0x3
 options 	KTR_VERBOSE
 
 #
 # ALQ(9) is a facility for the asynchronous queuing of records from the kernel
 # to a vnode, and is employed by services such as KTR(4) to produce trace
 # files based on a kernel event stream.  Records are written asynchronously
 # in a worker thread.
 #
 options 	ALQ
 options 	KTR_ALQ
 
 #
 # The INVARIANTS option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options 	INVARIANTS
 
 #
 # The INVARIANT_SUPPORT option makes us compile in support for
 # verifying some of the internal structures.  It is a prerequisite for
 # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be
 # called.  The intent is that you can set 'INVARIANTS' for single
 # source files (by changing the source file or specifying it on the
 # command line) if you have 'INVARIANT_SUPPORT' enabled.  Also, if you
 # wish to build a kernel module with 'INVARIANTS', then adding
 # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary
 # infrastructure without the added overhead.
 #
 options 	INVARIANT_SUPPORT
 
 #
 # The DIAGNOSTIC option is used to enable extra debugging information
 # from some parts of the kernel.  As this makes everything more noisy,
 # it is disabled by default.
 #
 options 	DIAGNOSTIC
 
 #
 # REGRESSION causes optional kernel interfaces necessary only for regression
 # testing to be enabled.  These interfaces may constitute security risks
 # when enabled, as they permit processes to easily modify aspects of the
 # run-time environment to reproduce unlikely or unusual (possibly normally
 # impossible) scenarios.
 #
 options 	REGRESSION
 
 #
 # RESTARTABLE_PANICS allows one to continue from a panic as if it were
 # a call to the debugger to continue from a panic as instead.  It is only
 # useful if a kernel debugger is present.  To restart from a panic, reset
 # the panicstr variable to NULL and continue execution.  This option is
 # for development use only and should NOT be used in production systems
 # to "workaround" a panic.
 #
 #options 	RESTARTABLE_PANICS
 
 #
 # This option let some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options 	COMPILING_LINT
 
 #
 # STACK enables the stack(9) facility, allowing the capture of kernel stack
 # for the purpose of procinfo(1), etc.  stack(9) will also be compiled in
 # automatically if DDB(4) is compiled into the kernel.
 #
 options 	STACK
 
 
 #####################################################################
 # PERFORMANCE MONITORING OPTIONS
 
 #
 # The hwpmc driver that allows the use of in-CPU performance monitoring
 # counters for performance monitoring.  The base kernel needs to configured
 # with the 'options' line, while the hwpmc device can be either compiled
 # in or loaded as a loadable kernel module.
 #
 # Additional configuration options may be required on specific architectures,
 # please see hwpmc(4).
 
 device  	hwpmc			# Driver (also a loadable module)
 options 	HWPMC_HOOKS		# Other necessary kernel hooks
 
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families
 #
 options 	INET			#Internet communications protocols
 options 	INET6			#IPv6 communications protocols
 
+options		ROUTETABLES=2		# max 16. 1 is back compatible.
+
 # In order to enable IPSEC you MUST also add device crypto to 
 # your kernel configuration
 options 	IPSEC			#IP security (requires device crypto)
 #options 	IPSEC_DEBUG		#debug for IP security
 #
 # Set IPSEC_FILTERTUNNEL to force packets coming through a tunnel
 # to be processed by any configured packet filtering twice.
 # The default is that packets coming out of a tunnel are _not_ processed;
 # they are assumed trusted.
 #
 # IPSEC history is preserved for such packets, and can be filtered
 # using ipfw(8)'s 'ipsec' keyword, when this option is enabled.
 #
 #options 	IPSEC_FILTERTUNNEL	#filter ipsec packets from a tunnel
 
 options 	IPX			#IPX/SPX communications protocols
 
 options 	NCP			#NetWare Core protocol
 
 options 	NETATALK		#Appletalk communications protocols
 options 	NETATALKDEBUG		#Appletalk debugging
 
 #
 # SMB/CIFS requester
 # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV
 # options.
 options 	NETSMB			#SMB/CIFS requester
 
 # mchain library. It can be either loaded as KLD or compiled into kernel
 options 	LIBMCHAIN
 
 # libalias library, performing NAT
 options		LIBALIAS
 
 #
 # SCTP is a NEW transport protocol defined by
 # RFC2960 updated by RFC3309 and RFC3758.. and
 # soon to have a new base RFC and many many more
 # extensions. This release supports all the extensions
 # including many drafts (most about to become RFC's).
 # It is the premeier SCTP implementation in the NET
 # and is quite well tested.
 #
 # Note YOU MUST have both INET and INET6 defined.
 # you don't have to enable V6, but SCTP is 
 # dual stacked and so far we have not teased apart
 # the V6 and V4.. since an association can span
 # both a V6 and V4 address at the SAME time :-)
 #
 options         SCTP
 # There are bunches of options:
 # this one turns on all sorts of
 # nastly printing that you can
 # do. Its all controled by a
 # bit mask (settable by socket opt and
 # by sysctl). Including will not cause
 # logging until you set the bits.. but it
 # can be quite verbose.. so without this
 # option we don't do any of the tests for
 # bits and prints.. which makes the code run
 # faster.. if you are not debugging don't use.
 options SCTP_DEBUG
 #
 # This option turns off the CRC32c checksum. Basically
 # You will not be able to talk to anyone else that
 # has not done this. Its more for expermentation to
 # see how much CPU the CRC32c really takes. Most new
 # cards for TCP support checksum offload.. so this 
 # option gives you a "view" into what SCTP would be
 # like with such an offload (which only exists in
 # high in iSCSI boards so far). With the new
 # splitting 8's algorithm its not as bad as it used
 # to be.. but it does speed things up try only
 # for in a captured lab environment :-)
 options SCTP_WITH_NO_CSUM
 #
 
 #
 # All that options after that turn on specific types of
 # logging. You can monitor CWND growth, flight size
 # and all sorts of things. Go look at the code and
 # see. I have used this to produce interesting 
 # charts and graphs as well :->
 # 
 # I have not yet commited the tools to get and print
 # the logs, I will do that eventually .. before then
 # if you want them send me an email rrs@freebsd.org
 # You basically must have KTR enabled for these
 # and you then set the sysctl to turn on/off various
 # logging bits. Use ktrdump to pull the log and run
 # it through a dispaly program.. and graphs and other
 # things too.
 #
 options 	SCTP_LOCK_LOGGING
 options 	SCTP_MBUF_LOGGING
 options 	SCTP_MBCNT_LOGGING
 options 	SCTP_PACKET_LOGGING
 options		SCTP_LTRACE_CHUNKS
 options 	SCTP_LTRACE_ERRORS
 
 
 # altq(9). Enable the base part of the hooks with the ALTQ option.
 # Individual disciplines must be built into the base system and can not be
 # loaded as modules at this point. ALTQ requires a stable TSC so if yours is
 # broken or changes with CPU throttling then you must also have the ALTQ_NOPCC
 # option.
 options 	ALTQ
 options 	ALTQ_CBQ	# Class Based Queueing
 options 	ALTQ_RED	# Random Early Detection
 options 	ALTQ_RIO	# RED In/Out
 options 	ALTQ_HFSC	# Hierarchical Packet Scheduler
 options 	ALTQ_CDNR	# Traffic conditioner
 options 	ALTQ_PRIQ	# Priority Queueing
 options 	ALTQ_NOPCC	# Required if the TSC is unusable
 options 	ALTQ_DEBUG
 
 # netgraph(4). Enable the base netgraph code with the NETGRAPH option.
 # Individual node types can be enabled with the corresponding option
 # listed below; however, this is not strictly necessary as netgraph
 # will automatically load the corresponding KLD module if the node type
 # is not already compiled into the kernel. Each type below has a
 # corresponding man page, e.g., ng_async(8).
 options 	NETGRAPH		# netgraph(4) system
 options 	NETGRAPH_DEBUG		# enable extra debugging, this
 					# affects netgraph(4) and nodes
 # Node types
 options 	NETGRAPH_ASYNC
 options 	NETGRAPH_ATMLLC
 options 	NETGRAPH_ATM_ATMPIF
 options 	NETGRAPH_BLUETOOTH		# ng_bluetooth(4)
 options 	NETGRAPH_BLUETOOTH_BT3C		# ng_bt3c(4)
 options 	NETGRAPH_BLUETOOTH_H4		# ng_h4(4)
 options 	NETGRAPH_BLUETOOTH_HCI		# ng_hci(4)
 options 	NETGRAPH_BLUETOOTH_L2CAP	# ng_l2cap(4)
 options 	NETGRAPH_BLUETOOTH_SOCKET	# ng_btsocket(4)
 options 	NETGRAPH_BLUETOOTH_UBT		# ng_ubt(4)
 options 	NETGRAPH_BLUETOOTH_UBTBCMFW	# ubtbcmfw(4)
 options 	NETGRAPH_BPF
 options 	NETGRAPH_BRIDGE
 options 	NETGRAPH_CAR
 options 	NETGRAPH_CISCO
 options 	NETGRAPH_DEFLATE
 options 	NETGRAPH_DEVICE
 options 	NETGRAPH_ECHO
 options 	NETGRAPH_EIFACE
 options 	NETGRAPH_ETHER
 options 	NETGRAPH_FEC
 options 	NETGRAPH_FRAME_RELAY
 options 	NETGRAPH_GIF
 options 	NETGRAPH_GIF_DEMUX
 options 	NETGRAPH_HOLE
 options 	NETGRAPH_IFACE
 options 	NETGRAPH_IP_INPUT
 options 	NETGRAPH_IPFW
 options 	NETGRAPH_KSOCKET
 options 	NETGRAPH_L2TP
 options 	NETGRAPH_LMI
 # MPPC compression requires proprietary files (not included)
 #options 	NETGRAPH_MPPC_COMPRESSION
 options 	NETGRAPH_MPPC_ENCRYPTION
 options 	NETGRAPH_NETFLOW
 options 	NETGRAPH_NAT
 options 	NETGRAPH_ONE2MANY
 options 	NETGRAPH_PPP
 options 	NETGRAPH_PPPOE
 options 	NETGRAPH_PPTPGRE
 options 	NETGRAPH_PRED1
 options 	NETGRAPH_RFC1490
 options 	NETGRAPH_SOCKET
 options 	NETGRAPH_SPLIT
 options 	NETGRAPH_SPPP
 options 	NETGRAPH_TAG
 options 	NETGRAPH_TCPMSS
 options 	NETGRAPH_TEE
 options 	NETGRAPH_TTY
 options 	NETGRAPH_UI
 options 	NETGRAPH_VJC
 
 # NgATM - Netgraph ATM
 options 	NGATM_ATM
 options 	NGATM_ATMBASE
 options 	NGATM_SSCOP
 options 	NGATM_SSCFU
 options 	NGATM_UNI
 options 	NGATM_CCATM
 
 device		mn	# Munich32x/Falc54 Nx64kbit/sec cards.
 
 #
 # Network interfaces:
 #  The `loop' device is MANDATORY when networking is enabled.
 #  The `ether' device provides generic code to handle
 #  Ethernets; it is MANDATORY when an Ethernet device driver is
 #  configured or token-ring is enabled.
 #  The `vlan' device implements the VLAN tagging of Ethernet frames
 #  according to IEEE 802.1Q.  It requires `device miibus'.
 #  The `wlan' device provides generic code to support 802.11
 #  drivers, including host AP mode; it is MANDATORY for the wi,
 #  and ath drivers and will eventually be required by all 802.11 drivers.
 #  The `wlan_wep', `wlan_tkip', and `wlan_ccmp' devices provide
 #  support for WEP, TKIP, and AES-CCMP crypto protocols optionally
 #  used with 802.11 devices that depend on the `wlan' module.
 #  The `wlan_xauth' device provides support for external (i.e. user-mode)
 #  authenticators for use with 802.11 drivers that use the `wlan'
 #  module and support 802.1x and/or WPA security protocols.
 #  The `wlan_acl' device provides a MAC-based access control mechanism
 #  for use with 802.11 drivers operating in ap mode and using the
 #  `wlan' module.
 #  The `fddi' device provides generic code to support FDDI.
 #  The `arcnet' device provides generic code to support Arcnet.
 #  The `sppp' device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 #  The `sl' device implements the Serial Line IP (SLIP) service.
 #  The `ppp' device implements the Point-to-Point Protocol.
 #  The `bpf' device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  The number of devices determines the maximum number of
 #  simultaneous BPF clients programs runnable.  DHCP requires bpf.
 #  The `disc' device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing and benchmarking purposes.
 #  The `edsc' device implements a minimal Ethernet interface,
 #  which discards all packets sent and receives none.
 #  The `tap' device is a pty-like virtual Ethernet interface
 #  The `tun' device implements (user-)ppp and nos-tun
 #  The `gif' device implements IPv6 over IP4 tunneling,
 #  IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and
 #  IPv6 over IPv6 tunneling.
 #  The `gre' device implements two types of IP4 over IP4 tunneling:
 #  GRE and MOBILE, as specified in the RFC1701 and RFC2004.
 #  The XBONEHACK option allows the same pair of addresses to be configured on
 #  multiple gif interfaces.
 #  The `faith' device captures packets sent to it and diverts them
 #  to the IPv4/IPv6 translation daemon.
 #  The `stf' device implements 6to4 encapsulation.
 #  The `ef' device provides support for multiple ethernet frame types
 #  specified via ETHER_* options. See ef(4) for details.
 #
 # The pf packet filter consists of three devices:
 #  The `pf' device provides /dev/pf and the firewall code itself.
 #  The `pflog' device provides the pflog0 interface which logs packets.
 #  The `pfsync' device provides the pfsync0 interface used for
 #   synchronization of firewall state tables (over the net).
 #
 # The PPP_BSDCOMP option enables support for compress(1) style entire
 # packet compression, the PPP_DEFLATE is for zlib/gzip style compression.
 # PPP_FILTER enables code for filtering the ppp data stream and selecting
 # events for resetting the demand dial activity timer - requires bpf.
 # See pppd(8) for more details.
 #
 device		ether			#Generic Ethernet
 device		vlan			#VLAN support (needs miibus)
 device		wlan			#802.11 support
 options		IEEE80211_DEBUG		#enable debugging msgs
 options		IEEE80211_AMPDU_AGE	#age frames in AMPDU reorder q's
 device		wlan_wep		#802.11 WEP support
 device		wlan_ccmp		#802.11 CCMP support
 device		wlan_tkip		#802.11 TKIP support
 device		wlan_xauth		#802.11 external authenticator support
 device		wlan_acl		#802.11 MAC ACL support
 device		wlan_amrr		#AMRR transmit rate control algorithm
 device		token			#Generic TokenRing
 device		fddi			#Generic FDDI
 device		arcnet			#Generic Arcnet
 device		sppp			#Generic Synchronous PPP
 device		loop			#Network loopback device
 device		bpf			#Berkeley packet filter
 device		disc			#Discard device based on loopback
 device		edsc			#Ethernet discard device
 device		tap			#Virtual Ethernet driver
 device		tun			#Tunnel driver (ppp(8), nos-tun(8))
 device		sl			#Serial Line IP
 device		gre			#IP over IP tunneling
 device		if_bridge		#Bridge interface
 device		pf			#PF OpenBSD packet-filter firewall
 device		pflog			#logging support interface for PF
 device		pfsync			#synchronization interface for PF
 device		carp			#Common Address Redundancy Protocol
 device		enc			#IPsec interface
 device		ppp			#Point-to-point protocol
 options 	PPP_BSDCOMP		#PPP BSD-compress support
 options 	PPP_DEFLATE		#PPP zlib/deflate/gzip support
 options 	PPP_FILTER		#enable bpf filtering (needs bpf)
 device		lagg			#Link aggregation interface
 
 device		ef			# Multiple ethernet frames support
 options 	ETHER_II		# enable Ethernet_II frame
 options 	ETHER_8023		# enable Ethernet_802.3 (Novell) frame
 options 	ETHER_8022		# enable Ethernet_802.2 frame
 options 	ETHER_SNAP		# enable Ethernet_802.2/SNAP frame
 
 # for IPv6
 device		gif			#IPv6 and IPv4 tunneling
 options 	XBONEHACK
 device		faith			#for IPv6 and IPv4 translation
 device		stf			#6to4 IPv6 over IPv4 encapsulation
 
 #
 # Internet family options:
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted and XORP.
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall_type=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''.  It
 # depends on IPFIREWALL if compiled into the kernel.
 #
 # IPFIREWALL_FORWARD enables changing of the packet destination either
 # to do some sort of policy routing or transparent proxying.  Used by
 # ``ipfw forward''. All  redirections apply to locally generated
 # packets too.  Because of this great care is required when
 # crafting the ruleset.
 #
 # IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires
 # LIBALIAS.
 #
 # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding
 # packets without touching the TTL).  This can be useful to hide firewalls
 # from traceroute and similar tools.
 #
 # TCPDEBUG enables code which keeps traces of the TCP state machine
 # for sockets with the SO_DEBUG option set, which can then be examined
 # using the trpt(8) utility.
 #
 options 	MROUTING		# Multicast routing
 options 	IPFIREWALL		#firewall
 options 	IPFIREWALL_VERBOSE	#enable logging to syslogd(8)
 options 	IPFIREWALL_VERBOSE_LIMIT=100	#limit verbosity
 options 	IPFIREWALL_DEFAULT_TO_ACCEPT	#allow everything by default
 options 	IPFIREWALL_FORWARD	#packet destination changes
 options 	IPFIREWALL_NAT		#ipfw kernel nat support
 options 	IPDIVERT		#divert sockets
 options 	IPFILTER		#ipfilter support
 options 	IPFILTER_LOG		#ipfilter logging
 options 	IPFILTER_LOOKUP		#ipfilter pools
 options 	IPFILTER_DEFAULT_BLOCK	#block all packets by default
 options 	IPSTEALTH		#support for stealth forwarding
 options 	TCPDEBUG
 
 # The MBUF_STRESS_TEST option enables options which create
 # various random failures / extreme cases related to mbuf
 # functions.  See mbuf(9) for a list of available test cases.
 # MBUF_PROFILING enables code to profile the mbuf chains
 # exiting the system (via participating interfaces) and
 # return a logarithmic histogram of monitored parameters
 # (e.g. packet size, wasted space, number of mbufs in chain).
 options 	MBUF_STRESS_TEST
 options		MBUF_PROFILING
 
 # Statically Link in accept filters
 options 	ACCEPT_FILTER_DATA
 options 	ACCEPT_FILTER_HTTP
 
 # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are
 # carried in TCP option 19. This option is commonly used to protect
 # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable.
 # This is enabled on a per-socket basis using the TCP_MD5SIG socket option.
 # This requires the use of 'device crypto', 'options IPSEC'
 # or 'device cryptodev'.
 #options 	TCP_SIGNATURE		#include support for RFC 2385
 
 # DUMMYNET enables the "dummynet" bandwidth limiter.  You need IPFIREWALL
 # as well.  See dummynet(4) and ipfw(8) for more info.  When you run
 # DUMMYNET it is advisable to also have at least "options HZ=1000" to achieve
 # a smooth scheduling of the traffic.
 options 	DUMMYNET
 
 # Zero copy sockets support.  This enables "zero copy" for sending and
 # receiving data via a socket.  The send side works for any type of NIC,
 # the receive side only works for NICs that support MTUs greater than the
 # page size of your architecture and that support header splitting.  See
 # zero_copy(9) for more details.
 options 	ZERO_COPY_SOCKETS
 
 #
 # ATM (HARP version) options
 #
 # XXX: These have been disabled in FreeBSD 7.0 as they are not MPSAFE.
 #
 # ATM_CORE includes the base ATM functionality code.  This must be included
 #	for ATM support.
 #
 # ATM_IP includes support for running IP over ATM.
 #
 # At least one (and usually only one) of the following signalling managers
 # must be included (note that all signalling managers include PVC support):
 # ATM_SIGPVC includes support for the PVC-only signalling manager `sigpvc'.
 # ATM_SPANS includes support for the `spans' signalling manager, which runs
 #	the FORE Systems's proprietary SPANS signalling protocol.
 # ATM_UNI includes support for the `uni30' and `uni31' signalling managers,
 #	which run the ATM Forum UNI 3.x signalling protocols.
 #
 # The `hfa' driver provides support for the FORE Systems, Inc.
 # PCA-200E ATM PCI Adapter.
 #
 # The `harp' pseudo-driver makes all NATM interface drivers available to HARP.
 #
 #options 	ATM_CORE		#core ATM protocol family
 #options 	ATM_IP			#IP over ATM support
 #options 	ATM_SIGPVC		#SIGPVC signalling manager
 #options 	ATM_SPANS		#SPANS signalling manager
 #options 	ATM_UNI			#UNI signalling manager
 
 #device		hfa			#FORE PCA-200E ATM PCI
 #device		harp			#Pseudo-interface for NATM
 
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root, /usr, and /tmp filesystems need be statically
 # compiled; everything else will be automatically loaded at mount
 # time.  (Exception: the UFS family--- FFS --- cannot
 # currently be demand-loaded.)  Some people still prefer to statically
 # compile other filesystems as well.
 #
 # NB: The PORTAL filesystem is known to be buggy, and WILL panic your
 # system if you attempt to do anything with it.  It is included here
 # as an incentive for some enterprising soul to sit down and fix it.
 # The UNION filesystem was known to be buggy in the past.  It is now
 # being actively maintained, although there are still some issues being
 # resolved.
 #
 
 # One of these is mandatory:
 options 	FFS			#Fast filesystem
 options 	NFSCLIENT		#Network File System client
 
 # The rest are optional:
 options 	CD9660			#ISO 9660 filesystem
 options 	FDESCFS			#File descriptor filesystem
 options 	HPFS			#OS/2 File system
 options 	MSDOSFS			#MS DOS File System (FAT, FAT32)
 options 	NFSSERVER		#Network File System server
 options		NFSLOCKD		#Network Lock Manager
 options 	NTFS			#NT File System
 options 	NULLFS			#NULL filesystem
 # Broken (depends on NCP):
 #options 	NWFS			#NetWare filesystem
 options 	PORTALFS		#Portal filesystem
 options 	PROCFS			#Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		#Pseudo-filesystem framework
 options 	PSEUDOFS_TRACE		#Debugging support for PSEUDOFS
 options 	SMBFS			#SMB/CIFS filesystem
 options 	UDF			#Universal Disk Format
 options 	UNIONFS			#Union filesystem
 # The xFS_ROOT options REQUIRE the associated ``options xFS''
 options 	NFS_ROOT		#NFS usable as root device
 
 # Soft updates is a technique for improving filesystem speed and
 # making abrupt shutdown less risky.
 #
 options 	SOFTUPDATES
 
 # Extended attributes allow additional data to be associated with files,
 # and is used for ACLs, Capabilities, and MAC labels.
 # See src/sys/ufs/ufs/README.extattr for more information.
 options 	UFS_EXTATTR
 options 	UFS_EXTATTR_AUTOSTART
 
 # Access Control List support for UFS filesystems.  The current ACL
 # implementation requires extended attribute support, UFS_EXTATTR,
 # for the underlying filesystem.
 # See src/sys/ufs/ufs/README.acls for more information.
 options 	UFS_ACL
 
 # Directory hashing improves the speed of operations on very large
 # directories at the expense of some memory.
 options 	UFS_DIRHASH
 
 # Gjournal-based UFS journaling support.
 options 	UFS_GJOURNAL
 
 # Make space in the kernel for a root filesystem on a md device.
 # Define to the number of kilobytes to reserve for the filesystem.
 options 	MD_ROOT_SIZE=10
 
 # Make the md device a potential root device, either with preloaded
 # images of type mfs_root or md_root.
 options 	MD_ROOT
 
 # Disk quotas are supported when this option is enabled.
 options 	QUOTA			#enable disk quotas
 
 # If you are running a machine just as a fileserver for PC and MAC
 # users, using SAMBA or Netatalk, you may consider setting this option
 # and keeping all those users' directories on a filesystem that is
 # mounted with the suiddir option. This gives new files the same
 # ownership as the directory (similar to group). It's a security hole
 # if you let these users run programs, so confine it to file-servers
 # (but it'll save you lots of headaches in those cases). Root owned
 # directories are exempt and X bits are cleared. The suid bit must be
 # set on the directory as well; see chmod(1) PC owners can't see/set
 # ownerships so they keep getting their toes trodden on. This saves
 # you all the support calls as the filesystem it's used on will act as
 # they expect: "It's my dir so it must be my file".
 #
 options 	SUIDDIR
 
 # NFS options:
 options 	NFS_MINATTRTIMO=3	# VREG attrib cache timeout in sec
 options 	NFS_MAXATTRTIMO=60
 options 	NFS_MINDIRATTRTIMO=30	# VDIR attrib cache timeout in sec
 options 	NFS_MAXDIRATTRTIMO=60
 options 	NFS_GATHERDELAY=10	# Default write gather delay (msec)
 options 	NFS_WDELAYHASHSIZ=16	# and with this
 options 	NFS_DEBUG		# Enable NFS Debugging
 
 # Coda stuff:
 options 	CODA			#CODA filesystem.
 device		vcoda			#coda minicache <-> venus comm.
 # Use the old Coda 5.x venus<->kernel interface instead of the new
 # realms-aware 6.x protocol.
 #options 	CODA_COMPAT_5
 
 #
 # Add support for the EXT2FS filesystem of Linux fame.  Be a bit
 # careful with this - the ext2fs code has a tendency to lag behind
 # changes and not be exercised very much, so mounting read/write could
 # be dangerous (and even mounting read only could result in panics.)
 #
 options 	EXT2FS
 
 #
 # Add support for the ReiserFS filesystem (used in Linux). Currently,
 # this is limited to read-only access.
 #
 options 	REISERFS
 
 #
 # Add support for the SGI XFS filesystem. Currently,
 # this is limited to read-only access.
 #
 options 	XFS
 
 # Use real implementations of the aio_* system calls.  There are numerous
 # stability and security issues in the current aio code that make it
 # unsuitable for inclusion on machines with untrusted local users.
 options 	VFS_AIO
 
 # Cryptographically secure random number generator; /dev/random
 device		random
 
 # The system memory devices; /dev/mem, /dev/kmem
 device		mem
 
 # Optional character code conversion support with LIBICONV.
 # Each option requires their base file system and LIBICONV.
 options 	CD9660_ICONV
 options 	MSDOSFS_ICONV
 options 	NTFS_ICONV
 options 	UDF_ICONV
 
 
 #####################################################################
 # POSIX P1003.1B
 
 # Real time extensions added in the 1993 POSIX
 # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING
 
 options 	_KPOSIX_PRIORITY_SCHEDULING
 # p1003_1b_semaphores are very experimental,
 # user should be ready to assist in debugging if problems arise.
 options 	P1003_1B_SEMAPHORES
 
 # POSIX message queue
 options 	P1003_1B_MQUEUE
 
 #####################################################################
 # SECURITY POLICY PARAMETERS
 
 # Support for BSM audit
 options 	AUDIT
 
 # Support for Mandatory Access Control (MAC):
 options 	MAC
 options 	MAC_BIBA
 options 	MAC_BSDEXTENDED
 options 	MAC_IFOFF
 options 	MAC_LOMAC
 options 	MAC_MLS
 options 	MAC_NONE
 options 	MAC_PARTITION
 options 	MAC_PORTACL
 options 	MAC_SEEOTHERUIDS
 options 	MAC_STUB
 options 	MAC_TEST
 
 
 #####################################################################
 # CLOCK OPTIONS
 
 # The granularity of operation is controlled by the kernel option HZ whose
 # default value (1000 on most architectures) means a granularity of 1ms
 # (1s/HZ).  Historically, the default was 100, but finer granularity is
 # required for DUMMYNET and other systems on modern hardware.  There are
 # reasonable arguments that HZ should, in fact, be 100 still; consider,
 # that reducing the granularity too much might cause excessive overhead in
 # clock interrupt processing, potentially causing ticks to be missed and thus
 # actually reducing the accuracy of operation.
 
 options 	HZ=100
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp
 
 options 	PPS_SYNC
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # It is possible to wire down your SCSI devices so that a given bus,
 # target, and LUN always come on line as the same device unit.  In
 # earlier versions the unit numbers were assigned in the order that
 # the devices were probed on the SCSI bus.  This means that if you
 # removed a disk drive, you may have had to rewrite your /etc/fstab
 # file, and also that you had to be careful when adding a new disk
 # as it may have been probed earlier and moved your device configuration
 # around.  (See also option GEOM_VOL for a different solution to this
 # problem.)
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "da3" then the first
 # non-wired disk will be assigned da4.
 
 # The syntax for wiring down devices is:
 
 hint.scbus.0.at="ahc0"
 hint.scbus.1.at="ahc1"
 hint.scbus.1.bus="0"
 hint.scbus.3.at="ahc2"
 hint.scbus.3.bus="0"
 hint.scbus.2.at="ahc2"
 hint.scbus.2.bus="1"
 hint.da.0.at="scbus0"
 hint.da.0.target="0"
 hint.da.0.unit="0"
 hint.da.1.at="scbus3"
 hint.da.1.target="1"
 hint.da.2.at="scbus2"
 hint.da.2.target="3"
 hint.sa.1.at="scbus1"
 hint.sa.1.target="6"
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The ch driver drives SCSI Media Changer ("jukebox") devices.
 #
 # The da driver drives SCSI Direct Access ("disk") and Optical Media
 # ("WORM") devices.
 #
 # The sa driver drives SCSI Sequential Access ("tape") devices.
 #
 # The cd driver drives SCSI Read Only Direct Access ("cd") devices.
 #
 # The ses driver drives SCSI Environment Services ("ses") and
 # SAF-TE ("SCSI Accessible Fault-Tolerant Enclosure") devices.
 #
 # The pt driver drives SCSI Processor devices.
 #
 # The sg driver provides a passthrough API that is compatible with the
 # Linux SG driver.  It will work in conjunction with the COMPAT_LINUX
 # option to run linux SG apps.  It can also stand on its own and provide
 # source level API compatiblity for porting apps to FreeBSD.
 #
 # Target Mode support is provided here but also requires that a SIM
 # (SCSI Host Adapter Driver) provide support as well.
 #
 # The targ driver provides target mode support as a Processor type device.
 # It exists to give the minimal context necessary to respond to Inquiry
 # commands. There is a sample user application that shows how the rest
 # of the command support might be done in /usr/share/examples/scsi_target.
 #
 # The targbh driver provides target mode support and exists to respond
 # to incoming commands that do not otherwise have a logical unit assigned
 # to them.
 #
 # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI
 # configuration as the "pass" driver.
 
 device		scbus		#base SCSI code
 device		ch		#SCSI media changers
 device		da		#SCSI direct access devices (aka disks)
 device		sa		#SCSI tapes
 device		cd		#SCSI CD-ROMs
 device		ses		#SCSI Environmental Services (and SAF-TE)
 device		pt		#SCSI processor
 device		targ		#SCSI Target Mode Code
 device		targbh		#SCSI Target Mode Blackhole Device
 device		pass		#CAM passthrough driver
 device		sg		#Linux SCSI passthrough
 
 # CAM OPTIONS:
 # debugging options:
 # -- NOTE --  If you specify one of the bus/target/lun options, you must
 #             specify them all!
 # CAMDEBUG: When defined enables debugging macros
 # CAM_DEBUG_BUS:  Debug the given bus.  Use -1 to debug all busses.
 # CAM_DEBUG_TARGET:  Debug the given target.  Use -1 to debug all targets.
 # CAM_DEBUG_LUN:  Debug the given lun.  Use -1 to debug all luns.
 # CAM_DEBUG_FLAGS:  OR together CAM_DEBUG_INFO, CAM_DEBUG_TRACE,
 #                   CAM_DEBUG_SUBTRACE, and CAM_DEBUG_CDB
 #
 # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds
 # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions
 # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions
 # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter)
 #             queue after a bus reset, and the number of milliseconds to
 #             freeze the device queue after a bus device reset.  This
 #             can be changed at boot and runtime with the
 #             kern.cam.scsi_delay tunable/sysctl.
 options 	CAMDEBUG
 options 	CAM_DEBUG_BUS=-1
 options 	CAM_DEBUG_TARGET=-1
 options 	CAM_DEBUG_LUN=-1
 options 	CAM_DEBUG_FLAGS=(CAM_DEBUG_INFO|CAM_DEBUG_TRACE|CAM_DEBUG_CDB)
 options 	CAM_MAX_HIGHPOWER=4
 options 	SCSI_NO_SENSE_STRINGS
 options 	SCSI_NO_OP_STRINGS
 options 	SCSI_DELAY=5000	# Be pessimistic about Joe SCSI device
 
 # Options for the CAM CDROM driver:
 # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN
 # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only
 #                           enforced if there is I/O waiting for another LUN
 # The compiled in defaults for these variables are 2 and 10 seconds,
 # respectively.
 #
 # These can also be changed on the fly with the following sysctl variables:
 # kern.cam.cd.changer.min_busy_seconds
 # kern.cam.cd.changer.max_busy_seconds
 #
 options 	CHANGER_MIN_BUSY_SECONDS=2
 options 	CHANGER_MAX_BUSY_SECONDS=10
 
 # Options for the CAM sequential access driver:
 # SA_IO_TIMEOUT: Timeout for read/write/wfm  operations, in minutes
 # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes
 # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes
 # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes
 # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT.
 options 	SA_IO_TIMEOUT=4
 options 	SA_SPACE_TIMEOUT=60
 options 	SA_REWIND_TIMEOUT=(2*60)
 options 	SA_ERASE_TIMEOUT=(4*60)
 options 	SA_1FM_AT_EOD
 
 # Optional timeout for the CAM processor target (pt) device
 # This is specified in seconds.  The default is 60 seconds.
 options 	SCSI_PT_DEFAULT_TIMEOUT=60
 
 # Optional enable of doing SES passthrough on other devices (e.g., disks)
 #
 # Normally disabled because a lot of newer SCSI disks report themselves
 # as having SES capabilities, but this can then clot up attempts to build
 # build a topology with the SES device that's on the box these drives
 # are in....
 options 	SES_ENABLE_PASSTHROUGH
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 # The `pty' device usually turns out to be ``effectively mandatory'',
 # as it is required for `telnetd', `rlogind', `screen', `emacs', and
 # `xterm', among others.
 
 device		pty		#Pseudo ttys
 device		nmdm		#back-to-back tty devices
 device		md		#Memory/malloc disk
 device		snp		#Snoop device - to look at pty/vty/etc..
 device		ccd		#Concatenated disk driver
 device		firmware	#firmware(9) support
 
 # Kernel side iconv library
 options 	LIBICONV
 
 # Size of the kernel message buffer.  Should be N * pagesize.
 options 	MSGBUF_SIZE=40960
 
 # Maximum size of a tty or pty input buffer.
 options 	TTYHOG=8193
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # For ISA the required hints are listed.
 # EISA, MCA, PCI, CardBus, SD/MMC and pccard are self identifying buses, so
 # no hints are needed.
 
 #
 # Mandatory devices:
 #
 
 # These options are valid for other keyboard drivers as well.
 options 	KBD_DISABLE_KEYMAP_LOAD	# refuse to load a keymap
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 
 options 	FB_DEBUG		# Frame buffer debugging
 
 device		splash			# Splash screen and screen saver support
 
 # Various screen savers.
 device		blank_saver
 device		daemon_saver
 device		dragon_saver
 device		fade_saver
 device		fire_saver
 device		green_saver
 device		logo_saver
 device		rain_saver
 device		snake_saver
 device		star_saver
 device		warp_saver
 
 # The syscons console driver (SCO color console compatible).
 device		sc
 hint.sc.0.at="isa"
 options 	MAXCONS=16		# number of virtual consoles
 options 	SC_ALT_MOUSE_IMAGE	# simplified mouse cursor in text mode
 options 	SC_DFLT_FONT		# compile font in
 makeoptions	SC_DFLT_FONT=cp850
 options 	SC_DISABLE_KDBKEY	# disable `debug' key
 options 	SC_DISABLE_REBOOT	# disable reboot key sequence
 options 	SC_HISTORY_SIZE=200	# number of history buffer lines
 options 	SC_MOUSE_CHAR=0x3	# char code for text mode mouse cursor
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # The following options will let you change the default colors of syscons.
 options 	SC_NORM_ATTR=(FG_GREEN|BG_BLACK)
 options 	SC_NORM_REV_ATTR=(FG_YELLOW|BG_GREEN)
 options 	SC_KERNEL_CONS_ATTR=(FG_RED|BG_BLACK)
 options 	SC_KERNEL_CONS_REV_ATTR=(FG_BLACK|BG_RED)
 
 # The following options will let you change the default behaviour of
 # cut-n-paste feature
 options 	SC_CUT_SPACES2TABS	# convert leading spaces into tabs
 options 	SC_CUT_SEPCHARS=\"x09\"	# set of characters that delimit words
 					# (default is single space - \"x20\")
 
 # If you have a two button mouse, you may want to add the following option
 # to use the right button of the mouse to paste text.
 options 	SC_TWOBUTTON_MOUSE
 
 # You can selectively disable features in syscons.
 options 	SC_NO_CUTPASTE
 options 	SC_NO_FONT_LOADING
 options 	SC_NO_HISTORY
 options 	SC_NO_MODE_CHANGE
 options 	SC_NO_SYSMOUSE
 options 	SC_NO_SUSPEND_VTYSWITCH
 
 # `flags' for sc
 #	0x80	Put the video card in the VESA 800x600 dots, 16 color mode
 #	0x100	Probe for a keyboard device periodically if one is not present
 
 #
 # Optional devices:
 #
 
 #
 # SCSI host adapters:
 #
 # adv: All Narrow SCSI bus AdvanSys controllers.
 # adw: Second Generation AdvanSys controllers including the ADV940UW.
 # aha: Adaptec 154x/1535/1640
 # ahb: Adaptec 174x EISA controllers
 # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/
 #      19160x/29160x, aic7770/aic78xx
 # ahd: Adaptec 29320/39320 Controllers.
 # aic: Adaptec 6260/6360, APA-1460 (PC Card), NEC PC9801-100 (C-BUS)
 # amd: Support for the AMD 53C974 SCSI host adapter chip as found on devices
 #      such as the Tekram DC-390(T).
 # bt:  Most Buslogic controllers: including BT-445, BT-54x, BT-64x, BT-74x,
 #      BT-75x, BT-946, BT-948, BT-956, BT-958, SDC3211B, SDC3211F, SDC3222F
 # esp: NCR53c9x.  Only for SBUS hardware right now.
 # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters,
 #      ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2,
 #      ISP 12160 Ultra3 SCSI,
 #      Qlogic ISP 2100 and ISP 2200 1Gb Fibre Channel host adapters.
 #      Qlogic ISP 2300 and ISP 2312 2Gb Fibre Channel host adapters.
 #      Qlogic ISP 2322 and ISP 6322 2Gb Fibre Channel host adapters.
 # ispfw: Firmware module for Qlogic host adapters
 # mpt: LSI-Logic MPT/Fusion 53c1020 or 53c1030 Ultra4
 #      or FC9x9 Fibre Channel host adapters.
 # ncr: NCR 53C810, 53C825 self-contained SCSI host adapters.
 # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors:
 #      53C810, 53C810A, 53C815, 53C825,  53C825A, 53C860, 53C875,
 #      53C876, 53C885,  53C895, 53C895A, 53C896,  53C897, 53C1510D,
 #      53C1010-33, 53C1010-66.
 # trm: Tekram DC395U/UW/F DC315U adapters.
 # wds: WD7000
 
 #
 # Note that the order is important in order for Buslogic ISA/EISA cards to be
 # probed correctly.
 #
 device		bt
 hint.bt.0.at="isa"
 hint.bt.0.port="0x330"
 device		adv
 hint.adv.0.at="isa"
 device		adw
 device		aha
 hint.aha.0.at="isa"
 device		aic
 hint.aic.0.at="isa"
 device		ahb
 device		ahc
 device		ahd
 device		amd
 device		esp
 device		iscsi_initiator
 device		isp
 hint.isp.0.disable="1"
 hint.isp.0.role="3"
 hint.isp.0.prefer_iomap="1"
 hint.isp.0.prefer_memmap="1"
 hint.isp.0.fwload_disable="1"
 hint.isp.0.ignore_nvram="1"
 hint.isp.0.fullduplex="1"
 hint.isp.0.topology="lport"
 hint.isp.0.topology="nport"
 hint.isp.0.topology="lport-only"
 hint.isp.0.topology="nport-only"
 # we can't get u_int64_t types, nor can we get strings if it's got
 # a leading 0x, hence this silly dodge.
 hint.isp.0.portwnn="w50000000aaaa0000"
 hint.isp.0.nodewnn="w50000000aaaa0001"
 device		ispfw
 device		mpt
 device		ncr
 device		sym
 device		trm
 device		wds
 hint.wds.0.at="isa"
 hint.wds.0.port="0x350"
 hint.wds.0.irq="11"
 hint.wds.0.drq="6"
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options 	AHC_ALLOW_MEMIO
 
 # Dump the contents of the ahc controller configuration PROM.
 options 	AHC_DUMP_EEPROM
 
 # Bitmap of units to enable targetmode operations.
 options 	AHC_TMODE_ENABLE
 
 # Compile in Aic7xxx Debugging code.
 options 	AHC_DEBUG
 
 # Aic7xxx driver debugging options. See sys/dev/aic7xxx/aic7xxx.h
 options 	AHC_DEBUG_OPTS
 
 # Print register bitfields in debug output.  Adds ~128k to driver
 # See ahc(4).
 options 	AHC_REG_PRETTY_PRINT
 
 # Compile in aic79xx debugging code.
 options 	AHD_DEBUG
 
 # Aic79xx driver debugging options.  Adds ~215k to driver.  See ahd(4).
 options 	AHD_DEBUG_OPTS=0xFFFFFFFF
 
 # Print human-readable register definitions when debugging
 options 	AHD_REG_PRETTY_PRINT
 
 # Bitmap of units to enable targetmode operations.
 options 	AHD_TMODE_ENABLE
 
 # The adw driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set.
 options 	ADW_ALLOW_MEMIO
 
 # Options used in dev/iscsi (Software iSCSI stack)
 #
 options		ISCSI_INITIATOR_DEBUG=9
 
 # Options used in dev/isp/ (Qlogic SCSI/FC driver).
 #
 #	ISP_TARGET_MODE		-	enable target mode operation
 #
 options 	ISP_TARGET_MODE=1
 #
 #	ISP_DEFAULT_ROLES	-	default role
 #		none=0
 #		target=1
 #		initiator=2
 #		both=3			(not supported currently)
 #
 options 	ISP_DEFAULT_ROLES=2
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 #options 	SYM_SETUP_LP_PROBE_MAP	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d
 #options 	SYM_SETUP_SCSI_DIFF	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 #options 	SYM_SETUP_PCI_PARITY	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 #options 	SYM_SETUP_MAX_LUN	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # The 'dpt' driver provides support for old DPT controllers (http://www.dpt.com/).
 # These have hardware RAID-{0,1,5} support, and do multi-initiator I/O.
 # The DPT controllers are commonly re-licensed under other brand-names -
 # some controllers by Olivetti, Dec, HP, AT&T, SNI, AST, Alphatronic, NEC and
 # Compaq are actually DPT controllers.
 #
 # See src/sys/dev/dpt for debugging and other subtle options.
 #   DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  The tools in
 #                           /usr/sbin/dpt_* assume these to be enabled.
 #   DPT_HANDLE_TIMEOUTS     Normally device timeouts are handled by the DPT.
 #                           If you ant the driver to handle timeouts, enable
 #                           this option.  If your system is very busy, this
 #                           option will create more trouble than solve.
 #   DPT_TIMEOUT_FACTOR      Used to compute the excessive amount of time to
 #                           wait when timing out with the above option.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_LOST_IRQ             When enabled, will try, once per second, to catch
 #                           any interrupt that got lost.  Seems to help in some
 #                           DPT-firmware/Motherboard combinations.  Minimal
 #                           cost, great benefit.
 #  DPT_RESET_HBA            Make "reset" actually reset the controller
 #                           instead of fudging it.  Only enable this if you
 #			    are 100% certain you need it.
 
 device		dpt
 
 # DPT options
 #!CAM# options 	DPT_MEASURE_PERFORMANCE
 #!CAM# options 	DPT_HANDLE_TIMEOUTS
 options 	DPT_TIMEOUT_FACTOR=4
 options 	DPT_LOST_IRQ
 options 	DPT_RESET_HBA
 
 #
 # Compaq "CISS" RAID controllers (SmartRAID 5* series)
 # These controllers have a SCSI-like interface, and require the
 # CAM infrastructure.
 #
 device		ciss
 
 #
 # Intel Integrated RAID controllers.
 # This driver was developed and is maintained by Intel.  Contacts
 # at Intel for this driver are
 # "Kannanthanam, Boji T" <boji.t.kannanthanam@intel.com> and
 # "Leubner, Achim" <achim.leubner@intel.com>.
 #
 device		iir
 
 #
 # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later
 # firmware.  These controllers have a SCSI-like interface, and require
 # the CAM infrastructure.
 #
 device		mly
 
 #
 # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers.  Only
 # one entry is needed; the code will find and configure all supported
 # controllers.
 #
 device		ida		# Compaq Smart RAID
 device		mlx		# Mylex DAC960
 device		amr		# AMI MegaRAID
 device		mfi		# LSI MegaRAID SAS
 device		mfip		# LSI MegaRAID SAS passthrough, requires CAM
 options 	MFI_DEBUG
 
 #
 # 3ware ATA RAID
 #
 device		twe		# 3ware ATA RAID
 
 #
 # The 'ATA' driver supports all ATA and ATAPI devices, including PC Card
 # devices. You only need one "device ata" for it to find all
 # PCI and PC Card ATA/ATAPI devices on modern machines.
 device		ata
 device		atadisk		# ATA disk drives
 device		ataraid		# ATA RAID drives
 device		atapicd		# ATAPI CDROM drives
 device		atapifd		# ATAPI floppy drives
 device		atapist		# ATAPI tape drives
 device		atapicam	# emulate ATAPI devices as SCSI ditto via CAM
 				# needs CAM to be present (scbus & pass)
 #
 # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add:
 hint.ata.0.at="isa"
 hint.ata.0.port="0x1f0"
 hint.ata.0.irq="14"
 hint.ata.1.at="isa"
 hint.ata.1.port="0x170"
 hint.ata.1.irq="15"
 
 #
 # The following options are valid on the ATA driver:
 #
 # ATA_STATIC_ID:	controller numbering is static ie depends on location
 #			else the device numbers are dynamically allocated.
 
 options 	ATA_STATIC_ID
 
 #
 # Standard floppy disk controllers and floppy tapes, supports
 # the Y-E DATA External FDD (PC Card)
 #
 device		fdc
 hint.fdc.0.at="isa"
 hint.fdc.0.port="0x3F0"
 hint.fdc.0.irq="6"
 hint.fdc.0.drq="2"
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options 	FDC_DEBUG
 #
 # Activate this line if you happen to have an Insight floppy tape.
 # Probing them proved to be dangerous for people with floppy disks only,
 # so it's "hidden" behind a flag:
 #hint.fdc.0.flags="1"
 
 # Specify floppy devices
 hint.fd.0.at="fdc0"
 hint.fd.0.drive="0"
 hint.fd.1.at="fdc0"
 hint.fd.1.drive="1"
 
 #
 # uart: newbusified driver for serial interfaces.  It consolidates the sio(4),
 #	sab(4) and zs(4) drivers.
 #
 device		uart
 
 # Options for uart(4)
 options 	UART_PPS_ON_CTS		# Do time pulse capturing using CTS
 					# instead of DCD.
 
 # The following hint should only be used for pure ISA devices.  It is not
 # needed otherwise.  Use of hints is strongly discouraged.
 hint.uart.0.at="isa"
 
 # The following 3 hints are used when the UART is a system device (i.e., a
 # console or debug port), but only on platforms that don't have any other
 # means to pass the information to the kernel.  The unit number of the hint
 # is only used to bundle the hints together.  There is no relation to the
 # unit number of the probed UART.
 hint.uart.0.port="0x3f8"
 hint.uart.0.flags="0x10"
 hint.uart.0.baud="115200"
 
 # `flags' for serial drivers that support consoles like sio(4) and uart(4):
 #	0x10	enable console support for this unit.  Other console flags
 #		(if applicable) are ignored unless this is set.  Enabling
 #		console support does not make the unit the preferred console.
 #		Boot with -h or set boot_serial=YES in the loader.  For sio(4)
 #		specifically, the 0x20 flag can also be set (see above).
 #		Currently, at most one unit can have console support; the
 #		first one (in config file order) with this flag set is
 #		preferred.  Setting this flag for sio0 gives the old behaviour.
 #	0x80	use this port for serial line gdb support in ddb.  Also known
 #		as debug port.
 #
 
 # Options for serial drivers that support consoles:
 options 	BREAK_TO_DEBUGGER	# A BREAK on a serial console goes to
 					# ddb, if available.
 
 # Solaris implements a new BREAK which is initiated by a character
 # sequence CR ~ ^b which is similar to a familiar pattern used on
 # Sun servers by the Remote Console.
 options 	ALT_BREAK_TO_DEBUGGER
 
 # Serial Communications Controller
 # Supports the Siemens SAB 82532 and Zilog Z8530 multi-channel
 # communications controllers.
 device		scc
 
 # PCI Universal Communications driver
 # Supports various multi port PCI I/O cards.
 device		puc
 
 #
 # Network interfaces:
 #
 # MII bus support is required for some PCI 10/100 ethernet NICs,
 # namely those which use MII-compliant transceivers or implement
 # transceiver control interfaces that operate like an MII. Adding
 # "device miibus0" to the kernel config pulls in support for
 # the generic miibus API and all of the PHY drivers, including a
 # generic one for PHYs that aren't specifically handled by an
 # individual driver.
 device		miibus
 
 # an:   Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA,
 #       PCI and ISA varieties.
 # bce:	Broadcom NetXtreme II (BCM5706/BCM5708) PCI/PCIe Gigabit Ethernet
 #       adapters.
 # bfe:	Broadcom BCM4401 Ethernet adapter.
 # bge:	Support for gigabit ethernet adapters based on the Broadcom
 #	BCM570x family of controllers, including the 3Com 3c996-T,
 #	the Netgear GA302T, the SysKonnect SK-9D21 and SK-9D41, and
 #	the embedded gigE NICs on Dell PowerEdge 2550 servers.
 # cm:	Arcnet SMC COM90c26 / SMC COM90c56
 #	(and SMC COM90c66 in '56 compatibility mode) adapters.
 # cnw:  Xircom CNW/Netware Airsurfer PC Card adapter
 # dc:   Support for PCI fast ethernet adapters based on the DEC/Intel 21143
 #       and various workalikes including:
 #       the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics
 #       AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On
 #       82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II
 #       and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver
 #       replaces the old al, ax, dm, pn and mx drivers.  List of brands:
 #       Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110,
 #       SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX,
 #       LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204,
 #       KNE110TX.
 # de:   Digital Equipment DC21040
 # em:   Intel Pro/1000 Gigabit Ethernet 82542, 82543, 82544 based adapters.
 # igb:  Intel Pro/1000 PCI Express Gigabit Ethernet: 82575 and later adapters.
 # ep:   3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589
 #       and PC Card devices using these chipsets.
 # ex:   Intel EtherExpress Pro/10 and other i82595-based adapters,
 #       Olicom Ethernet PC Card devices.
 # fe:   Fujitsu MB86960A/MB86965A Ethernet
 # fea:  DEC DEFEA EISA FDDI adapter
 # fpa:  Support for the Digital DEFPA PCI FDDI. `device fddi' is also needed.
 # fxp:  Intel EtherExpress Pro/100B
 #	(hint of prefer_iomap can be done to prefer I/O instead of Mem mapping)
 # gem:  Apple GMAC/Sun ERI/Sun GEM
 # hme:  Sun HME (Happy Meal Ethernet)
 # le:   AMD Am7900 LANCE and Am79C9xx PCnet
 # lge:	Support for PCI gigabit ethernet adapters based on the Level 1
 #	LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX,
 #	SMC TigerCard 1000 (SMC9462SX), and some Addtron cards.
 # msk:	Support for gigabit ethernet adapters based on the Marvell/SysKonnect
 #	Yukon II Gigabit controllers, including 88E8021, 88E8022, 88E8061,
 #	88E8062, 88E8035, 88E8036, 88E8038, 88E8050, 88E8052, 88E8053,
 #	88E8055, 88E8056 and D-Link 560T/550SX.
 # lmc:	Support for the LMC/SBE wide-area network interface cards.
 # my:	Myson Fast Ethernet (MTD80X, MTD89X)
 # nge:	Support for PCI gigabit ethernet adapters based on the National
 #	Semiconductor DP83820 and DP83821 chipset. This includes the
 #	SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet
 #	GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the Surecom
 #	EP-320G-TX and the Netgear GA622T.
 # pcn:	Support for PCI fast ethernet adapters based on the AMD Am79c97x
 #	PCnet-FAST, PCnet-FAST+, PCnet-FAST III, PCnet-PRO and PCnet-Home
 #	chipsets. These can also be handled by the le(4) driver if the
 #	pcn(4) driver is left out of the kernel. The le(4) driver does not
 #	support the additional features like the MII bus and burst mode of
 #	the PCnet-FAST and greater chipsets though.
 # rl:   Support for PCI fast ethernet adapters based on the RealTek 8129/8139
 #       chipset.  Note that the RealTek driver defaults to using programmed
 #       I/O to do register accesses because memory mapped mode seems to cause
 #       severe lockups on SMP hardware.  This driver also supports the
 #       Accton EN1207D `Cheetah' adapter, which uses a chip called
 #       the MPX 5030/5038, which is either a RealTek in disguise or a
 #       RealTek workalike.  Note that the D-Link DFE-530TX+ uses the RealTek
 #       chipset and is supported by this driver, not the 'vr' driver.
 # sf:   Support for Adaptec Duralink PCI fast ethernet adapters based on the
 #       Adaptec AIC-6915 "starfire" controller.
 #       This includes dual and quad port cards, as well as one 100baseFX card.
 #       Most of these are 64-bit PCI devices, except for one single port
 #       card which is 32-bit.
 # sis:  Support for NICs based on the Silicon Integrated Systems SiS 900,
 #       SiS 7016 and NS DP83815 PCI fast ethernet controller chips.
 # sbsh:	Support for Granch SBNI16 SHDSL modem PCI adapters
 # sk:   Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs.
 #       This includes the SK-9841 and SK-9842 single port cards (single mode
 #       and multimode fiber) and the SK-9843 and SK-9844 dual port cards
 #       (also single mode and multimode).
 #       The driver will autodetect the number of ports on the card and
 #       attach each one as a separate network interface.
 # sn:   Support for ISA and PC Card Ethernet devices using the
 #       SMC91C90/92/94/95 chips.
 # ste:  Sundance Technologies ST201 PCI fast ethernet controller, includes
 #       the D-Link DFE-550TX.
 # stge: Support for gigabit ethernet adapters based on the Sundance/Tamarack
 #       TC9021 family of controllers, including the Sundance ST2021/ST2023,
 #       the Sundance/Tamarack TC9021, the D-Link DL-4000 and ASUS NX1101.
 # ti:   Support for PCI gigabit ethernet NICs based on the Alteon Networks
 #       Tigon 1 and Tigon 2 chipsets.  This includes the Alteon AceNIC, the
 #       3Com 3c985, the Netgear GA620 and various others.  Note that you will
 #       probably want to bump up kern.ipc.nmbclusters a lot to use this driver.
 # tl:   Support for the Texas Instruments TNETE100 series 'ThunderLAN'
 #       cards and integrated ethernet controllers.  This includes several
 #       Compaq Netelligent 10/100 cards and the built-in ethernet controllers
 #       in several Compaq Prosignia, Proliant and Deskpro systems.  It also
 #       supports several Olicom 10Mbps and 10/100 boards.
 # tx:   SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II series)
 # txp:	Support for 3Com 3cR990 cards with the "Typhoon" chipset
 # vr:   Support for various fast ethernet adapters based on the VIA
 #       Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips,
 #       including the D-Link DFE530TX (see 'rl' for DFE530TX+), the Hawking
 #       Technologies PN102TX, and the AOpen/Acer ALN-320.
 # vx:   3Com 3C590 and 3C595
 # wb:   Support for fast ethernet adapters based on the Winbond W89C840F chip.
 #       Note: this is not the same as the Winbond W89C940F, which is a
 #       NE2000 clone.
 # wi:   Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both
 #       the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA
 #       bridge with a PCMCIA adapter plugged into it.
 # xe:   Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller,
 #       Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card,
 #       Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56
 # xl:   Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast)
 #       Etherlink XL cards and integrated controllers.  This includes the
 #       integrated 3c905B-TX chips in certain Dell Optiplex and Dell
 #       Precision desktop machines and the integrated 3c905-TX chips
 #       in Dell Latitude laptop docking stations.
 #       Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX
 
 # Order for ISA/EISA devices is important here
 
 device		cm
 hint.cm.0.at="isa"
 hint.cm.0.port="0x2e0"
 hint.cm.0.irq="9"
 hint.cm.0.maddr="0xdc000"
 device		ep
 device		ex
 device		fe
 hint.fe.0.at="isa"
 hint.fe.0.port="0x300"
 device		fea
 device		sn
 hint.sn.0.at="isa"
 hint.sn.0.port="0x300"
 hint.sn.0.irq="10"
 device		an
 device		cnw
 device		wi
 device		xe
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 device		bce		# Broadcom BCM5706/BCM5708 Gigabit Ethernet
 device		bfe		# Broadcom BCM440x 10/100 Ethernet
 device		bge		# Broadcom BCM570xx Gigabit Ethernet
 device		cxgb		# Chelsio T3 10 Gigabit Ethernet
 device		cxgb_t3fw	# Chelsio T3 10 Gigabit Ethernet firmware
 device		dc		# DEC/Intel 21143 and various workalikes
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 hint.fxp.0.prefer_iomap="0"
 device		gem		# Apple GMAC/Sun ERI/Sun GEM
 device		hme		# Sun HME (Happy Meal Ethernet)
 device		lge		# Level 1 LXT1001 gigabit Ethernet
 device		my		# Myson Fast Ethernet (MTD80X, MTD89X)
 device		nge		# NatSemi DP83820 gigabit Ethernet
 device		rl		# RealTek 8129/8139
 device		pcn		# AMD Am79C97x PCI 10/100 NICs
 device		sf		# Adaptec AIC-6915 (``Starfire'')
 device		sbsh		# Granch SBNI16 SHDSL modem
 device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
 device		sk		# SysKonnect SK-984x & SK-982x gigabit Ethernet
 device		ste		# Sundance ST201 (D-Link DFE-550TX)
 device		ti		# Alteon Networks Tigon I/II gigabit Ethernet
 device		tl		# Texas Instruments ThunderLAN
 device		tx		# SMC EtherPower II (83c170 ``EPIC'')
 device		vr		# VIA Rhine, Rhine II
 device		wb		# Winbond W89C840F
 device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # PCI Ethernet NICs.
 device		de		# DEC/Intel DC21x4x (``Tulip'')
 #device		em		# Intel Pro/1000 Gigabit Ethernet
 #device		igb		# Intel Pro/1000 PCIE Gigabit Ethernet
 #device		ixgbe		# Intel Pro/10Gbe PCIE Ethernet
 device		le		# AMD Am7900 LANCE and Am79C9xx PCnet
 device		mxge		# Myricom Myri-10G 10GbE NIC
 device		nxge		# Neterion Xframe 10GbE Server/Storage Adapter
 device		txp		# 3Com 3cR990 (``Typhoon'')
 device		vx		# 3Com 3c590, 3c595 (``Vortex'')
 
 # PCI FDDI NICs.
 device		fpa
 
 # PCI WAN adapters.
 device		lmc
 
 # Use "private" jumbo buffers allocated exclusively for the ti(4) driver.
 # This option is incompatible with the TI_JUMBO_HDRSPLIT option below.
 #options 	TI_PRIVATE_JUMBOS
 # Turn on the header splitting option for the ti(4) driver firmware.  This
 # only works for Tigon II chips, and has no effect for Tigon I chips.
 options 	TI_JUMBO_HDRSPLIT
 
 # These two options allow manipulating the mbuf cluster size and mbuf size,
 # respectively.  Be very careful with NIC driver modules when changing
 # these from their default values, because that can potentially cause a
 # mismatch between the mbuf size assumed by the kernel and the mbuf size
 # assumed by a module.  The only driver that currently has the ability to
 # detect a mismatch is ti(4).
 options 	MCLSHIFT=12	# mbuf cluster shift in bits, 12 == 4KB
 options 	MSIZE=512	# mbuf size in bytes
 
 #
 # ATM related options (Cranor version)
 # (note: this driver cannot be used with the HARP ATM stack)
 #
 # The `en' device provides support for Efficient Networks (ENI)
 # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0).
 #
 # The `hatm' device provides support for Fore/Marconi HE155 and HE622
 # ATM PCI cards.
 #
 # The `fatm' device provides support for Fore PCA200E ATM PCI cards.
 #
 # The `patm' device provides support for IDT77252 based cards like
 # ProSum's ProATM-155 and ProATM-25 and IDT's evaluation boards.
 #
 # atm device provides generic atm functions and is required for
 # atm devices.
 # NATM enables the netnatm protocol family that can be used to
 # bypass TCP/IP.
 #
 # utopia provides the access to the ATM PHY chips and is required for en,
 # hatm and fatm.
 #
 # the current driver supports only PVC operations (no atm-arp, no multicast).
 # for more details, please read the original documents at
 # http://www.ccrc.wustl.edu/pub/chuck/tech/bsdatm/bsdatm.html
 #
 device		atm
 device		en
 device		fatm			#Fore PCA200E
 device		hatm			#Fore/Marconi HE155/622
 device		patm			#IDT77252 cards (ProATM and IDT)
 device		utopia			#ATM PHY driver
 options 	NATM			#native ATM
 
 options 	LIBMBPOOL		#needed by patm, iatm
 
 #
 # Sound drivers
 #
 # sound: The generic sound driver.
 #
 
 device		sound
 
 #
 # snd_*: Device-specific drivers.
 #
 # The flags of the device tells the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # snd_ad1816:		Analog Devices AD1816 ISA PnP/non-PnP.
 # snd_als4000:		Avance Logic ALS4000 PCI.
 # snd_atiixp:		ATI IXP 200/300/400 PCI.
 # snd_au88x0		Aureal Vortex 1/2/Advantage PCI. This driver
 #			lacks support for playback and recording.
 # snd_audiocs:		Crystal Semiconductor CS4231 SBus/EBus. Only
 #			for sparc64.
 # snd_cmi:		CMedia CMI8338/CMI8738 PCI.
 # snd_cs4281:		Crystal Semiconductor CS4281 PCI.
 # snd_csa:		Crystal Semiconductor CS461x/428x PCI. (except
 #			4281)
 # snd_ds1:		Yamaha DS-1 PCI.
 # snd_emu10k1:		Creative EMU10K1 PCI and EMU10K2 (Audigy) PCI.
 # snd_emu10kx:		Creative SoundBlaster Live! and Audigy
 # snd_envy24:		VIA Envy24 and compatible, needs snd_spicds.
 # snd_envy24ht:		VIA Envy24HT and compatible, needs snd_spicds.
 # snd_es137x:		Ensoniq AudioPCI ES137x PCI.
 # snd_ess:		Ensoniq ESS ISA PnP/non-PnP, to be used in
 #			conjunction with snd_sbc.
 # snd_fm801:		Forte Media FM801 PCI.
 # snd_gusc:		Gravis UltraSound ISA PnP/non-PnP.
 # snd_hda:		Intel High Definition Audio (Controller) and
 #			compatible.
 # snd_ich:		Intel ICH PCI and some more audio controllers
 #			embedded in a chipset, for example nVidia
 #			nForce controllers.
 # snd_maestro:		ESS Technology Maestro-1/2x PCI.
 # snd_maestro3:		ESS Technology Maestro-3/Allegro PCI.
 # snd_mss:		Microsoft Sound System ISA PnP/non-PnP.
 # snd_neomagic:		Neomagic 256 AV/ZX PCI.
 # snd_sb16:		Creative SoundBlaster16, to be used in
 #			conjunction with snd_sbc.
 # snd_sb8:		Creative SoundBlaster (pre-16), to be used in
 #			conjunction with snd_sbc.
 # snd_sbc:		Creative SoundBlaster ISA PnP/non-PnP.
 #			Supports ESS and Avance ISA chips as well.
 # snd_spicds:		SPI codec driver, needed by Envy24/Envy24HT drivers.
 # snd_solo:		ESS Solo-1x PCI.
 # snd_t4dwave:		Trident 4DWave DX/NX PCI, Sis 7018 PCI and Acer Labs
 #			M5451 PCI.
 # snd_via8233:		VIA VT8233x PCI.
 # snd_via82c686:	VIA VT82C686A PCI.
 # snd_vibes:		S3 Sonicvibes PCI.
 # snd_uaudio:		USB audio.
 
 device		snd_ad1816
 device		snd_als4000
 device		snd_atiixp
 #device		snd_au88x0
 #device		snd_audiocs
 device		snd_cmi
 device		snd_cs4281
 device		snd_csa
 device		snd_ds1
 device		snd_emu10k1
 device		snd_emu10kx
 device		snd_envy24
 device		snd_envy24ht
 device		snd_es137x
 device		snd_ess
 device		snd_fm801
 device		snd_gusc
 device		snd_hda
 device		snd_ich
 device		snd_maestro
 device		snd_maestro3
 device		snd_mss
 device		snd_neomagic
 device		snd_sb16
 device		snd_sb8
 device		snd_sbc
 device		snd_solo
 device		snd_spicds
 device		snd_t4dwave
 device		snd_via8233
 device		snd_via82c686
 device		snd_vibes
 device		snd_uaudio
 
 # For non-PnP sound cards:
 hint.pcm.0.at="isa"
 hint.pcm.0.irq="10"
 hint.pcm.0.drq="1"
 hint.pcm.0.flags="0x0"
 hint.sbc.0.at="isa"
 hint.sbc.0.port="0x220"
 hint.sbc.0.irq="5"
 hint.sbc.0.drq="1"
 hint.sbc.0.flags="0x15"
 hint.gusc.0.at="isa"
 hint.gusc.0.port="0x220"
 hint.gusc.0.irq="5"
 hint.gusc.0.drq="1"
 hint.gusc.0.flags="0x13"
 
 #
 # IEEE-488 hardware:
 # pcii:		PCIIA cards (uPD7210 based isa cards)
 # tnt4882:	National Instruments PCI-GPIB card.
 
 device	pcii
 hint.pcii.0.at="isa"
 hint.pcii.0.port="0x2e1"
 hint.pcii.0.irq="5"
 hint.pcii.0.drq="1"
 
 device	tnt4882
 
 #
 # Miscellaneous hardware:
 #
 # scd: Sony CD-ROM using proprietary (non-ATAPI) interface
 # mcd: Mitsumi CD-ROM using proprietary (non-ATAPI) interface
 # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board
 # cy: Cyclades serial driver
 # joy: joystick (including IO DATA PCJOY PC Card joystick)
 # rc: RISCom/8 multiport card
 # rp: Comtrol Rocketport(ISA/PCI) - single card
 # si: Specialix SI/XIO 4-32 port terminal multiplexor
 # cmx: OmniKey CardMan 4040 pccard smartcard reader
 
 # Notes on the Comtrol Rocketport driver:
 #
 # The exact values used for rp0 depend on how many boards you have
 # in the system.  The manufacturer's sample configs are listed as:
 #
 #               device  rp	# core driver support
 #
 #   Comtrol Rocketport ISA single card
 #		hint.rp.0.at="isa"
 #		hint.rp.0.port="0x280"
 #
 #   If instead you have two ISA cards, one installed at 0x100 and the
 #   second installed at 0x180, then you should add the following to
 #   your kernel probe hints:
 #		hint.rp.0.at="isa"
 #		hint.rp.0.port="0x100"
 #		hint.rp.1.at="isa"
 #		hint.rp.1.port="0x180"
 #
 #   For 4 ISA cards, it might be something like this:
 #		hint.rp.0.at="isa"
 #		hint.rp.0.port="0x180"
 #		hint.rp.1.at="isa"
 #		hint.rp.1.port="0x100"
 #		hint.rp.2.at="isa"
 #		hint.rp.2.port="0x340"
 #		hint.rp.3.at="isa"
 #		hint.rp.3.port="0x240"
 #
 #   For PCI cards, you need no hints.
 
 # Mitsumi CD-ROM
 device		mcd
 hint.mcd.0.at="isa"
 hint.mcd.0.port="0x300"
 # for the Sony CDU31/33A CDROM
 device		scd
 hint.scd.0.at="isa"
 hint.scd.0.port="0x230"
 device		joy			# PnP aware, hints for non-PnP only
 hint.joy.0.at="isa"
 hint.joy.0.port="0x201"
 device		rc
 hint.rc.0.at="isa"
 hint.rc.0.port="0x220"
 hint.rc.0.irq="12"
 device		rp
 hint.rp.0.at="isa"
 hint.rp.0.port="0x280"
 device		si
 options 	SI_DEBUG
 hint.si.0.at="isa"
 hint.si.0.maddr="0xd0000"
 hint.si.0.irq="12"
 device		cmx
 
 #
 # The 'bktr' device is a PCI video capture device using the Brooktree
 # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a
 # TV card, e.g. Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator,
 # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo.
 #
 # options 	OVERRIDE_CARD=xxx
 # options 	OVERRIDE_TUNER=xxx
 # options 	OVERRIDE_MSP=1
 # options 	OVERRIDE_DBX=1
 # These options can be used to override the auto detection
 # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h
 # Using sysctl(8) run-time overrides on a per-card basis can be made
 #
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL
 # or
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC
 # Specifies the default video capture mode.
 # This is required for Dual Crystal (28&35Mhz) boards where PAL is used
 # to prevent hangs during initialisation, e.g. VideoLogic Captivator PCI.
 #
 # options 	BKTR_USE_PLL
 # This is required for PAL or SECAM boards with a 28Mhz crystal and no 35Mhz
 # crystal, e.g. some new Bt878 cards.
 #
 # options 	BKTR_GPIO_ACCESS
 # This enable IOCTLs which give user level access to the GPIO port.
 #
 # options 	BKTR_NO_MSP_RESET
 # Prevents the MSP34xx reset. Good if you initialise the MSP in another OS first
 #
 # options 	BKTR_430_FX_MODE
 # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode.
 #
 # options 	BKTR_SIS_VIA_MODE
 # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is
 # needed for some old SiS and VIA chipset motherboards.
 # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset
 # motherboards and motherboards with bad or incomplete PCI 2.1 support.
 # As a rough guess, old = before 1998
 #
 # options 	BKTR_NEW_MSP34XX_DRIVER
 # Use new, more complete initialization scheme for the msp34* soundchip.
 # Should fix stereo autodetection if the old driver does only output
 # mono sound.
 
 #
 # options 	BKTR_USE_FREEBSD_SMBUS
 # Compile with FreeBSD SMBus implementation
 #
 # Brooktree driver has been ported to the new I2C framework. Thus,
 # you'll need to have the following 3 lines in the kernel config.
 #     device smbus
 #     device iicbus
 #     device iicbb
 #     device iicsmb
 # The iic and smb devices are only needed if you want to control other
 # I2C slaves connected to the external connector of some cards.
 #
 device		bktr
  
 #
 # PC Card/PCMCIA and Cardbus
 #
 # cbb: pci/cardbus bridge implementing YENTA interface
 # pccard: pccard slots
 # cardbus: cardbus slots
 device		cbb
 device		pccard
 device		cardbus
 
 #
 # MMC/SD
 #
 # mmc: mmc bus
 # mmcsd: mmc memory and sd cards.
 #device		mmc
 #device		mmcsd
 
 #
 # SMB bus
 #
 # System Management Bus support is provided by the 'smbus' device.
 # Access to the SMBus device is via the 'smb' device (/dev/smb*),
 # which is a child of the 'smbus' device.
 #
 # Supported devices:
 # smb		standard I/O through /dev/smb*
 #
 # Supported SMB interfaces:
 # iicsmb	I2C to SMB bridge with any iicbus interface
 # bktr		brooktree848 I2C hardware interface
 # intpm		Intel PIIX4 (82371AB, 82443MX) Power Management Unit
 # alpm		Acer Aladdin-IV/V/Pro2 Power Management Unit
 # ichsmb	Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA)
 # viapm		VIA VT82C586B/596B/686A and VT8233 Power Management Unit
 # amdpm		AMD 756 Power Management Unit
 # amdsmb	AMD 8111 SMBus 2.0 Controller
 # nfpm		NVIDIA nForce Power Management Unit
 # nfsmb		NVIDIA nForce2/3/4 MCP SMBus 2.0 Controller
 #
 device		smbus		# Bus support, required for smb below.
 
 device		intpm
 device		alpm
 device		ichsmb
 device		viapm
 device		amdpm
 device		amdsmb
 device		nfpm
 device		nfsmb
 
 device		smb
 
 #
 # I2C Bus
 #
 # Philips i2c bus support is provided by the `iicbus' device.
 #
 # Supported devices:
 # ic	i2c network interface
 # iic	i2c standard io
 # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands.
 #
 # Supported interfaces:
 # bktr	brooktree848 I2C software interface
 #
 # Other:
 # iicbb	generic I2C bit-banging code (needed by lpbb, bktr)
 #
 device		iicbus		# Bus support, required for ic/iic/iicsmb below.
 device		iicbb
 
 device		ic
 device		iic
 device		iicsmb		# smb over i2c bridge
 
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'da'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # lpt	Parallel Printer
 # plip	Parallel network interface
 # ppi	General-purpose I/O ("Geek Port") + IEEE1284 I/O
 # pps	Pulse per second Timing Interface
 # lpbb	Philips official parallel port I2C bit-banging interface
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.
 #
 
 options 	PPC_PROBE_CHIPSET # Enable chipset specific detection
 				  # (see flags in ppc(4))
 options 	DEBUG_1284	# IEEE1284 signaling protocol debug
 options 	PERIPH_1284	# Makes your computer act as an IEEE1284
 				# compliant peripheral
 options 	DONTPROBE_1284	# Avoid boot detection of PnP parallel devices
 options 	VP0_DEBUG	# ZIP/ZIP+ debug
 options 	LPT_DEBUG	# Printer driver debug
 options 	PPC_DEBUG	# Parallel chipset level debug
 options 	PLIP_DEBUG	# Parallel network IP interface debug
 options 	PCFCLOCK_VERBOSE         # Verbose pcfclock driver
 options 	PCFCLOCK_MAX_RETRIES=5   # Maximum read tries (default 10)
 
 device		ppc
 hint.ppc.0.at="isa"
 hint.ppc.0.irq="7"
 device		ppbus
 device		vpo
 device		lpt
 device		plip
 device		ppi
 device		pps
 device		lpbb
 device		pcfclock
 
 # Kernel BOOTP support
 
 options 	BOOTP		# Use BOOTP to obtain IP address/hostname
 				# Requires NFSCLIENT and NFS_ROOT
 options 	BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options 	BOOTP_NFSV3	# Use NFS v3 to NFS mount root
 options 	BOOTP_COMPAT	# Workaround for broken bootp daemons.
 options 	BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP
 
 #
 # Add software watchdog routines.
 #
 options 	SW_WATCHDOG
 
 #
 # Disable swapping of stack pages.  This option removes all
 # code which actually performs swapping, so it's not possible to turn
 # it back on at run-time.
 #
 # This is sometimes usable for systems which don't have any swap space
 # (see also sysctls "vm.defer_swapspace_pageouts" and
 # "vm.disable_swapspace_pageouts")
 #
 #options 	NO_SWAPPING
 
 # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers
 # for sendfile(2) that are used to map file VM pages, and normally
 # default to a quantity that is roughly 16*MAXUSERS+512. You would
 # typically want about 4 of these for each simultaneous file send.
 #
 options 	NSFBUFS=1024
 
 #
 # Enable extra debugging code for locks.  This stores the filename and
 # line of whatever acquired the lock in the lock itself, and change a
 # number of function calls to pass around the relevant data.  This is
 # not at all useful unless you are debugging lock code.  Also note
 # that it is likely to break e.g. fstat(1) unless you recompile your
 # userland with -DDEBUG_LOCKS as well.
 #
 options 	DEBUG_LOCKS
 
 
 #####################################################################
 # USB support
 # UHCI controller
 device		uhci
 # OHCI controller
 device		ohci
 # EHCI controller
 device		ehci
 # SL811 Controller
 device 		slhci
 # General USB code (mandatory for USB)
 device		usb
 #
 # USB Double Bulk Pipe devices
 device		udbp
 # USB Fm Radio
 device		ufm
 # Generic USB device driver
 device		ugen
 # Human Interface Device (anything with buttons and dials)
 device		uhid
 # USB keyboard
 device		ukbd
 # USB printer
 device		ulpt
 # USB Iomega Zip 100 Drive (Requires scbus and da)
 device		umass
 # USB support for Belkin F5U109 and Magic Control Technology serial adapters
 device		umct
 # USB modem support
 device		umodem
 # USB mouse
 device		ums
 # Diamond Rio 500 MP3 player
 device		urio
 # USB scanners
 device		uscanner
 #
 # USB serial support
 device		ucom
 # USB support for Technologies ARK3116 based serial adapters
 device		uark
 # USB support for Belkin F5U103 and compatible serial adapters
 device		ubsa
 # USB support for BWCT console serial adapters
 device		ubser
 # USB support for serial adapters based on the FT8U100AX and FT8U232AM
 device		uftdi
 # USB support for some Windows CE based serial communication.
 device		uipaq
 # USB support for Prolific PL-2303 serial adapters
 device		uplcom
 # USB support for Silicon Laboratories CP2101/CP2102 based USB serial adapters
 device		uslcom
 # USB Visor and Palm devices
 device		uvisor
 # USB serial support for DDI pocket's PHS
 device		uvscom
 #
 # ADMtek USB ethernet. Supports the LinkSys USB100TX,
 # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX
 # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus
 # eval board.
 device		aue
 
 # ASIX Electronics AX88172 USB 2.0 ethernet driver. Used in the
 # LinkSys USB200M and various other adapters.
 
 device		axe
 
 #
 # Devices which communicate using Ethernet over USB, particularly
 # Communication Device Class (CDC) Ethernet specification. Supports
 # Sharp Zaurus PDAs, some DOCSIS cable modems and so on.
 device		cdce
 #
 # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate
 # and Netmate II, and the Belkin F5U111.
 device		cue
 #
 # Kawasaki LSI ethernet. Supports the LinkSys USB10T,
 # Entrega USB-NET-E45, Peracom Ethernet Adapter, the
 # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T,
 # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB
 # and 2104USB, and the Corega USB-T.
 device		kue
 #
 # RealTek RTL8150 USB to fast ethernet. Supports the Melco LUA-KTX
 # and the GREEN HOUSE GH-USB100B.
 device		rue
 #
 # Davicom DM9601E USB to fast ethernet. Supports the Corega FEther USB-TXC.
 device		udav
 
 #
 # ZyDas ZD1211/ZD1211B wireless ethernet driver
 device		zyd
 #
 # Ralink Technology RT2500USB chispet driver
 device		ural
 #
 # Ralink Technology RT2501USB/RT2601USB chispet driver
 device		rum
 
 # 
 # debugging options for the USB subsystem
 #
 options 	USB_DEBUG
 
 # options for ukbd:
 options 	UKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	UKBD_DFLT_KEYMAP=it.iso
 
 # options for uplcom:
 options 	UPLCOM_INTR_INTERVAL=100	# interrupt pipe interval
 						# in milliseconds
 
 # options for uvscom:
 options 	UVSCOM_DEFAULT_OPKTSIZE=8	# default output packet size
 options 	UVSCOM_INTR_INTERVAL=100	# interrupt pipe interval
 						# in milliseconds
 
 #####################################################################
 # FireWire support
 
 device		firewire	# FireWire bus code
 device		sbp		# SCSI over Firewire (Requires scbus and da)
 device		sbp_targ	# SBP-2 Target mode  (Requires scbus and targ)
 device		fwe		# Ethernet over FireWire (non-standard!)
 device		fwip		# IP over FireWire (RFC2734 and RFC3146)
 
 #####################################################################
 # dcons support (Dumb Console Device)
 
 device		dcons			# dumb console driver
 device		dcons_crom		# FireWire attachment
 options 	DCONS_BUF_SIZE=16384	# buffer size
 options 	DCONS_POLL_HZ=100	# polling rate
 options 	DCONS_FORCE_CONSOLE=0	# force to be the primary console
 options 	DCONS_FORCE_GDB=1	# force to be the gdb device
 
 #####################################################################
 # crypto subsystem
 #
 # This is a port of the OpenBSD crypto framework.  Include this when
 # configuring IPSEC and when you have a h/w crypto device to accelerate
 # user applications that link to OpenSSL.
 #
 # Drivers are ports from OpenBSD with some simple enhancements that have
 # been fed back to OpenBSD.
 
 device		crypto		# core crypto support
 device		cryptodev	# /dev/crypto for access to h/w
 
 device		rndtest		# FIPS 140-2 entropy tester
 
 device		hifn		# Hifn 7951, 7781, etc.
 options 	HIFN_DEBUG	# enable debugging support: hw.hifn.debug
 options 	HIFN_RNDTEST	# enable rndtest support
 
 device		ubsec		# Broadcom 5501, 5601, 58xx
 options 	UBSEC_DEBUG	# enable debugging support: hw.ubsec.debug
 options 	UBSEC_RNDTEST	# enable rndtest support
 
 #####################################################################
 
 
 #
 # Embedded system options:
 #
 # An embedded system might want to run something other than init.
 options 	INIT_PATH=/sbin/init:/stand/sysinstall
 
 # Debug options
 options 	BUS_DEBUG	# enable newbus debugging
 options 	DEBUG_VFS_LOCKS	# enable VFS lock debugging
 options 	SOCKBUF_DEBUG	# enable sockbuf last record/mb tail checking
 
 #
 # Verbose SYSINIT
 #
 # Make the SYSINIT process performed by mi_startup() verbose.  This is very
 # useful when porting to a new architecture.  If DDB is also enabled, this
 # will print function names instead of addresses.
 options 	VERBOSE_SYSINIT
 
 #####################################################################
 # SYSV IPC KERNEL PARAMETERS
 #
 # Maximum number of entries in a semaphore map.
 options 	SEMMAP=31
 
 # Maximum number of System V semaphores that can be used on the system at
 # one time.
 options 	SEMMNI=11
 
 # Total number of semaphores system wide
 options 	SEMMNS=61
 
 # Total number of undo structures in system
 options 	SEMMNU=31
 
 # Maximum number of System V semaphores that can be used by a single process
 # at one time.
 options 	SEMMSL=61
 
 # Maximum number of operations that can be outstanding on a single System V
 # semaphore at one time.
 options 	SEMOPM=101
 
 # Maximum number of undo operations that can be outstanding on a single
 # System V semaphore at one time.
 options 	SEMUME=11
 
 # Maximum number of shared memory pages system wide.
 options 	SHMALL=1025
 
 # Maximum size, in bytes, of a single System V shared memory region.
 options 	SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)
 options 	SHMMAXPGS=1025
 
 # Minimum size, in bytes, of a single System V shared memory region.
 options 	SHMMIN=2
 
 # Maximum number of shared memory regions that can be used on the system
 # at one time.
 options 	SHMMNI=33
 
 # Maximum number of System V shared memory regions that can be attached to
 # a single process at one time.
 options 	SHMSEG=9
 
 # Set the amount of time (in seconds) the system will wait before
 # rebooting automatically when a kernel panic occurs.  If set to (-1),
 # the system will wait indefinitely until a key is pressed on the
 # console.
 options 	PANIC_REBOOT_WAIT_TIME=16
 
 # Attempt to bypass the buffer cache and put data directly into the
 # userland buffer for read operation when O_DIRECT flag is set on the
 # file.  Both offset and length of the read operation must be
 # multiples of the physical media sector size.
 #
 options 	DIRECTIO
 
 # Specify a lower limit for the number of swap I/O buffers.  They are
 # (among other things) used when bypassing the buffer cache due to
 # DIRECTIO kernel option enabled and O_DIRECT flag set on file.
 #
 options 	NSWBUF_MIN=120
 
 #####################################################################
 
 # More undocumented options for linting.
 # Note that documenting these are not considered an affront.
 
 options 	CAM_DEBUG_DELAY
 
 # VFS cluster debugging.
 options 	CLUSTERDEBUG
 
 options 	DEBUG
 
 # Kernel filelock debugging.
 options 	LOCKF_DEBUG
 
 # System V compatible message queues
 # Please note that the values provided here are used to test kernel
 # building.  The defaults in the sources provide almost the same numbers.
 # MSGSSZ must be a power of 2 between 8 and 1024.
 options 	MSGMNB=2049	# Max number of chars in queue
 options 	MSGMNI=41	# Max number of message queue identifiers
 options 	MSGSEG=2049	# Max number of message segments
 options 	MSGSSZ=16	# Size of a message segment
 options 	MSGTQL=41	# Max number of messages in system
 
 options 	NBUF=512	# Number of buffer headers
 
 options 	SCSI_NCR_DEBUG
 options 	SCSI_NCR_MAX_SYNC=10000
 options 	SCSI_NCR_MAX_WIDE=1
 options 	SCSI_NCR_MYADDR=7
 
 options 	SC_DEBUG_LEVEL=5	# Syscons debug level
 options 	SC_RENDER_DEBUG	# syscons rendering debugging
 
 options 	SHOW_BUSYBUFS	# List buffers that prevent root unmount
 options 	SLIP_IFF_OPTS
 options 	VFS_BIO_DEBUG	# VFS buffer I/O debugging
 
 options 	KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack
 
 # Adaptec Array Controller driver options
 options 	AAC_DEBUG	# Debugging levels:
 				# 0 - quiet, only emit warnings
 				# 1 - noisy, emit major function
 				#     points and things done
 				# 2 - extremely noisy, emit trace
 				#     items in loops, etc.
 
 # Yet more undocumented options for linting.
 # BKTR_ALLOC_PAGES has no effect except to cause warnings, and
 # BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the
 # driver still mostly spells this option BROOKTREE_ALLOC_PAGES.
 ##options 	BKTR_ALLOC_PAGES=(217*4+1)
 options 	BROOKTREE_ALLOC_PAGES=(217*4+1)
 options 	MAXFILES=999
Index: head/sys/conf/options
===================================================================
--- head/sys/conf/options	(revision 178887)
+++ head/sys/conf/options	(revision 178888)
@@ -1,778 +1,779 @@
 # $FreeBSD$
 #
 #        On the handling of kernel options
 #
 # All kernel options should be listed in NOTES, with suitable
 # descriptions.  Negative options (options that make some code not
 # compile) should be commented out; LINT (generated from NOTES) should
 # compile as much code as possible.  Try to structure option-using
 # code so that a single option only switch code on, or only switch
 # code off, to make it possible to have a full compile-test.  If
 # necessary, you can check for COMPILING_LINT to get maximum code
 # coverage.
 #
 # All new options shall also be listed in either "conf/options" or
 # "conf/options.<machine>".  Options that affect a single source-file
 # <xxx>.[c|s] should be directed into "opt_<xxx>.h", while options
 # that affect multiple files should either go in "opt_global.h" if
 # this is a kernel-wide option (used just about everywhere), or in
 # "opt_<option-name-in-lower-case>.h" if it affects only some files.
 # Note that the effect of listing only an option without a
 # header-file-name in conf/options (and cousins) is that the last
 # convention is followed.
 #
 # This handling scheme is not yet fully implemented.
 #
 #
 # Format of this file:
 # Option name	filename
 #
 # If filename is missing, the default is
 # opt_<name-of-option-in-lower-case>.h
 
 AAC_DEBUG		opt_aac.h
 AHC_ALLOW_MEMIO		opt_aic7xxx.h
 AHC_TMODE_ENABLE	opt_aic7xxx.h
 AHC_DUMP_EEPROM		opt_aic7xxx.h
 AHC_DEBUG		opt_aic7xxx.h
 AHC_DEBUG_OPTS		opt_aic7xxx.h
 AHC_REG_PRETTY_PRINT    opt_aic7xxx.h
 AHD_DEBUG		opt_aic79xx.h
 AHD_DEBUG_OPTS		opt_aic79xx.h
 AHD_TMODE_ENABLE	opt_aic79xx.h	
 AHD_REG_PRETTY_PRINT	opt_aic79xx.h
 ADW_ALLOW_MEMIO		opt_adw.h
 
 TWA_DEBUG		opt_twa.h
 TWA_FLASH_FIRMWARE	opt_twa.h
 
 # Debugging options.
 DDB
 DDB_CAPTURE_DEFAULTBUFSIZE	opt_ddb.h
 DDB_CAPTURE_MAXBUFSIZE	opt_ddb.h
 DDB_NUMSYM	opt_ddb.h
 GDB
 KDB		opt_global.h
 KDB_TRACE	opt_kdb.h
 KDB_UNATTENDED	opt_kdb.h
 SYSCTL_DEBUG	opt_sysctl.h
 
 NO_SYSCTL_DESCR	opt_global.h
 
 # Miscellaneous options.
 ADAPTIVE_SX
 ALQ
 AUDIT		opt_global.h
 CODA_COMPAT_5	opt_coda.h
 COMPAT_43	opt_compat.h
 COMPAT_43TTY	opt_compat.h
 COMPAT_FREEBSD4	opt_compat.h
 COMPAT_FREEBSD5	opt_compat.h
 COMPAT_FREEBSD6	opt_compat.h
 COMPAT_FREEBSD7	opt_compat.h
 COMPILING_LINT	opt_global.h
 CY_PCI_FASTINTR
 DIRECTIO
 FULL_PREEMPTION	opt_sched.h
 IPI_PREEMPTION	opt_sched.h
 GEOM_AES	opt_geom.h
 GEOM_BDE	opt_geom.h
 GEOM_BSD	opt_geom.h
 GEOM_CACHE	opt_geom.h
 GEOM_CONCAT	opt_geom.h
 GEOM_ELI	opt_geom.h
 GEOM_FOX	opt_geom.h
 GEOM_GATE	opt_geom.h
 GEOM_JOURNAL	opt_geom.h
 GEOM_LABEL	opt_geom.h
 GEOM_LINUX_LVM	opt_geom.h
 GEOM_MBR	opt_geom.h
 GEOM_MIRROR	opt_geom.h
 GEOM_MULTIPATH	opt_geom.h
 GEOM_NOP	opt_geom.h
 GEOM_PART_APM	opt_geom.h
 GEOM_PART_BSD	opt_geom.h
 GEOM_PART_GPT	opt_geom.h
 GEOM_PART_MBR	opt_geom.h
 GEOM_PART_PC98	opt_geom.h
 GEOM_PART_VTOC8	opt_geom.h
 GEOM_PC98	opt_geom.h
 GEOM_RAID3	opt_geom.h
 GEOM_SHSEC	opt_geom.h
 GEOM_STRIPE	opt_geom.h
 GEOM_SUNLABEL	opt_geom.h
 GEOM_UZIP	opt_geom.h
 GEOM_VIRSTOR	opt_geom.h
 GEOM_VOL	opt_geom.h
 GEOM_ZERO	opt_geom.h
 KSTACK_MAX_PAGES
 KSTACK_PAGES
 KTRACE
 KTRACE_REQUEST_POOL	opt_ktrace.h
 LIBICONV
 MAC
 MAC_ALWAYS_LABEL_MBUF	opt_mac.h
 MAC_BIBA	opt_dontuse.h
 MAC_BSDEXTENDED	opt_dontuse.h
 MAC_IFOFF	opt_dontuse.h
 MAC_LOMAC	opt_dontuse.h
 MAC_MLS		opt_dontuse.h
 MAC_NONE	opt_dontuse.h
 MAC_PARTITION	opt_dontuse.h
 MAC_PORTACL	opt_dontuse.h
 MAC_SEEOTHERUIDS	opt_dontuse.h
 MAC_STATIC	opt_mac.h
 MAC_STUB	opt_dontuse.h
 MAC_TEST	opt_dontuse.h
 MD_ROOT		opt_md.h
 MD_ROOT_SIZE	opt_md.h
 MFI_DEBUG	opt_mfi.h
 MFI_DECODE_LOG	opt_mfi.h
 MPROF_BUFFERS	opt_mprof.h
 MPROF_HASH_SIZE	opt_mprof.h
 NO_ADAPTIVE_MUTEXES	opt_adaptive_mutexes.h
 NO_ADAPTIVE_RWLOCKS
 NSWBUF_MIN	opt_swap.h
 MBUF_PACKET_ZONE_DISABLE	opt_global.h
 PANIC_REBOOT_WAIT_TIME	opt_panic.h
 PPC_DEBUG	opt_ppc.h
 PPC_PROBE_CHIPSET opt_ppc.h
 PPS_SYNC	opt_ntp.h
 PREEMPTION	opt_sched.h
 QUOTA
 SCHED_4BSD	opt_sched.h
 SCHED_STATS	opt_sched.h
 SCHED_ULE	opt_sched.h
 SHOW_BUSYBUFS
 SLEEPQUEUE_PROFILING
 SLHCI_DEBUG	opt_slhci.h
 SPX_HACK
 STACK		opt_stack.h
 SUIDDIR
 MSGMNB		opt_sysvipc.h
 MSGMNI		opt_sysvipc.h
 MSGSEG		opt_sysvipc.h
 MSGSSZ		opt_sysvipc.h
 MSGTQL		opt_sysvipc.h
 SEMMAP		opt_sysvipc.h
 SEMMNI		opt_sysvipc.h
 SEMMNS		opt_sysvipc.h
 SEMMNU		opt_sysvipc.h
 SEMMSL		opt_sysvipc.h
 SEMOPM		opt_sysvipc.h
 SEMUME		opt_sysvipc.h
 SHMALL		opt_sysvipc.h
 SHMMAX		opt_sysvipc.h
 SHMMAXPGS	opt_sysvipc.h
 SHMMIN		opt_sysvipc.h
 SHMMNI		opt_sysvipc.h
 SHMSEG		opt_sysvipc.h
 SYSVMSG		opt_sysvipc.h
 SYSVSEM		opt_sysvipc.h
 SYSVSHM		opt_sysvipc.h
 SW_WATCHDOG	opt_watchdog.h
 TURNSTILE_PROFILING
 TTYHOG		opt_tty.h
 VFS_AIO
 VERBOSE_SYSINIT	opt_global.h
 WLCACHE		opt_wavelan.h
 WLDEBUG		opt_wavelan.h
 
 # POSIX kernel options
 P1003_1B_MQUEUE			opt_posix.h
 P1003_1B_SEMAPHORES		opt_posix.h
 _KPOSIX_PRIORITY_SCHEDULING	opt_posix.h
 
 # Do we want the config file compiled into the kernel?
 INCLUDE_CONFIG_FILE	opt_config.h
 
 # Options for static filesystems.  These should only be used at config
 # time, since the corresponding lkms cannot work if there are any static
 # dependencies.  Unusability is enforced by hiding the defines for the
 # options in a never-included header.
 CD9660		opt_dontuse.h
 CODA		opt_dontuse.h
 EXT2FS		opt_dontuse.h
 FDESCFS		opt_dontuse.h
 HPFS		opt_dontuse.h
 MSDOSFS		opt_dontuse.h
 NTFS		opt_dontuse.h
 NULLFS		opt_dontuse.h
 NWFS		opt_dontuse.h
 PORTALFS	opt_dontuse.h
 PROCFS		opt_dontuse.h
 PSEUDOFS	opt_dontuse.h
 REISERFS	opt_dontuse.h
 SMBFS		opt_dontuse.h
 TMPFS		opt_dontuse.h
 UDF		opt_dontuse.h
 UNIONFS		opt_dontuse.h
 
 # Pseudofs debugging
 PSEUDOFS_TRACE	opt_pseudofs.h
 
 # Broken - ffs_snapshot() dependency from ufs_lookup() :-(
 FFS		opt_ffs_broken_fixme.h
 
 # These static filesystems have one slightly bogus static dependency in
 # sys/i386/i386/autoconf.c.  If any of these filesystems are
 # statically compiled into the kernel, code for mounting them as root
 # filesystems will be enabled - but look below.
 NFSCLIENT	opt_nfs.h
 NFSSERVER	opt_nfs.h
 NFS4CLIENT	opt_nfs.h
 
 # filesystems and libiconv bridge
 CD9660_ICONV	opt_dontuse.h
 MSDOSFS_ICONV	opt_dontuse.h
 NTFS_ICONV	opt_dontuse.h
 UDF_ICONV	opt_dontuse.h
 
 # If you are following the conditions in the copyright,
 # you can enable soft-updates which will speed up a lot of thigs
 # and make the system safer from crashes at the same time.
 # otherwise a STUB module will be compiled in.
 SOFTUPDATES	opt_ffs.h
 
 # On small, embedded systems, it can be useful to turn off support for
 # snapshots.  It saves about 30-40k for a feature that would be lightly
 # used, if it is used at all.
 NO_FFS_SNAPSHOT	opt_ffs.h
 
 # Enabling this option turns on support for Access Control Lists in UFS,
 # which can be used to support high security configurations.  Depends on
 # UFS_EXTATTR.
 UFS_ACL		opt_ufs.h
 
 # Enabling this option turns on support for extended attributes in UFS-based
 # filesystems, which can be used to support high security configurations
 # as well as new filesystem features.
 UFS_EXTATTR	opt_ufs.h
 UFS_EXTATTR_AUTOSTART	opt_ufs.h
 
 # Enable fast hash lookups for large directories on UFS-based filesystems.
 UFS_DIRHASH	opt_ufs.h
 
 # Enable gjournal-based UFS journal.
 UFS_GJOURNAL	opt_ufs.h
 
 # The below sentence is not in English, and neither is this one.
 # We plan to remove the static dependences above, with a
 # <filesystem>_ROOT option to control if it usable as root.  This list
 # allows these options to be present in config files already (though
 # they won't make any difference yet).
 NFS_ROOT	opt_nfsroot.h
 
 # SMB/CIFS requester
 NETSMB		opt_netsmb.h
 
 # Options used only in subr_param.c.
 HZ		opt_param.h
 MAXFILES	opt_param.h
 NBUF		opt_param.h
 NSFBUFS		opt_param.h
 VM_BCACHE_SIZE_MAX	opt_param.h
 VM_SWZONE_SIZE_MAX	opt_param.h
 MAXUSERS
 DFLDSIZ		opt_param.h
 MAXDSIZ		opt_param.h
 MAXSSIZ		opt_param.h
 
 # Generic SCSI options.
 CAM_MAX_HIGHPOWER	opt_cam.h
 CAMDEBUG		opt_cam.h
 CAM_DEBUG_DELAY		opt_cam.h
 CAM_DEBUG_BUS		opt_cam.h
 CAM_DEBUG_TARGET	opt_cam.h
 CAM_DEBUG_LUN		opt_cam.h
 CAM_DEBUG_FLAGS		opt_cam.h
 SCSI_DELAY		opt_scsi.h
 SCSI_NO_SENSE_STRINGS	opt_scsi.h
 SCSI_NO_OP_STRINGS	opt_scsi.h
 
 # Options used only in cam/scsi/scsi_cd.c
 CHANGER_MIN_BUSY_SECONDS	opt_cd.h
 CHANGER_MAX_BUSY_SECONDS	opt_cd.h
 
 # Options used only in cam/scsi/scsi_sa.c.
 SA_IO_TIMEOUT		opt_sa.h
 SA_SPACE_TIMEOUT	opt_sa.h
 SA_REWIND_TIMEOUT	opt_sa.h
 SA_ERASE_TIMEOUT	opt_sa.h
 SA_1FM_AT_EOD		opt_sa.h
 
 # Options used only in cam/scsi/scsi_pt.c
 SCSI_PT_DEFAULT_TIMEOUT	opt_pt.h
 
 # Options used only in cam/scsi/scsi_ses.c
 SES_ENABLE_PASSTHROUGH	opt_ses.h
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 SYM_SETUP_LP_PROBE_MAP	opt_sym.h	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d 
 SYM_SETUP_SCSI_DIFF	opt_sym.h	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 SYM_SETUP_PCI_PARITY	opt_sym.h	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 SYM_SETUP_MAX_LUN	opt_sym.h	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # Options used only in pci/ncr.c
 SCSI_NCR_DEBUG		opt_ncr.h
 SCSI_NCR_MAX_SYNC	opt_ncr.h
 SCSI_NCR_MAX_WIDE	opt_ncr.h
 SCSI_NCR_MYADDR		opt_ncr.h
 
 # Options used only in dev/isp/*
 ISP_TARGET_MODE		opt_isp.h
 ISP_FW_CRASH_DUMP	opt_isp.h
 ISP_DEFAULT_ROLES	opt_isp.h
 
 # Options used only in dev/iscsi
 ISCSI_INITIATOR_DEBUG	opt_iscsi_initiator.h
 
 # Options used in the 'ata' ATA/ATAPI driver
 ATA_STATIC_ID		opt_ata.h
 ATA_NOPCI		opt_ata.h
 
 # Net stuff.
 ACCEPT_FILTER_DATA
 ACCEPT_FILTER_HTTP
 ALTQ			opt_global.h
 ALTQ_CBQ		opt_altq.h
 ALTQ_RED		opt_altq.h
 ALTQ_RIO		opt_altq.h
 ALTQ_HFSC		opt_altq.h
 ALTQ_CDNR		opt_altq.h
 ALTQ_PRIQ		opt_altq.h
 ALTQ_NOPCC		opt_altq.h
 ALTQ_DEBUG		opt_altq.h
 BOOTP			opt_bootp.h
 BOOTP_COMPAT		opt_bootp.h
 BOOTP_NFSROOT		opt_bootp.h
 BOOTP_NFSV3		opt_bootp.h
 BOOTP_WIRED_TO		opt_bootp.h
 DEVICE_POLLING
 DEV_ENC			opt_enc.h
 DEV_PF			opt_pf.h
 DEV_PFLOG		opt_pf.h
 DEV_PFSYNC		opt_pf.h
 ETHER_II		opt_ef.h
 ETHER_8023		opt_ef.h
 ETHER_8022		opt_ef.h
 ETHER_SNAP		opt_ef.h
+ROUTETABLES		opt_route.h
 MROUTING		opt_mrouting.h
 INET			opt_inet.h
 INET6			opt_inet6.h
 IPSEC			opt_ipsec.h
 IPSEC_DEBUG		opt_ipsec.h
 IPSEC_FILTERTUNNEL	opt_ipsec.h
 IPDIVERT
 DUMMYNET		opt_ipdn.h
 IPFILTER		opt_ipfilter.h
 IPFILTER_LOG		opt_ipfilter.h
 IPFILTER_LOOKUP		opt_ipfilter.h
 IPFILTER_DEFAULT_BLOCK	opt_ipfilter.h
 IPFIREWALL		opt_ipfw.h
 IPFIREWALL_VERBOSE	opt_ipfw.h
 IPFIREWALL_VERBOSE_LIMIT	opt_ipfw.h
 IPFIREWALL_DEFAULT_TO_ACCEPT	opt_ipfw.h
 IPFIREWALL_FORWARD	opt_ipfw.h
 IPFIREWALL_NAT		opt_ipfw.h
 IPSTEALTH
 IPX
 LIBMBPOOL
 LIBMCHAIN
 LIBALIAS
 MBUF_STRESS_TEST
 MBUF_PROFILING
 NCP
 NETATALK		opt_atalk.h
 PPP_BSDCOMP		opt_ppp.h
 PPP_DEFLATE		opt_ppp.h
 PPP_FILTER		opt_ppp.h
 RADIX_MPATH		opt_mpath.h
 SLIP_IFF_OPTS		opt_slip.h
 TCPDEBUG
 TCP_SIGNATURE		opt_inet.h
 DEV_VLAN		opt_vlan.h
 VLAN_ARRAY		opt_vlan.h
 XBONEHACK
 KRPC
 NFSLOCKD
 
 #
 # SCTP
 #
 SCTP			opt_sctp.h
 SCTP_DEBUG		opt_sctp.h # Enable debug printfs
 SCTP_WITH_NO_CSUM       opt_sctp.h # Use this at your peril
 SCTP_LOCK_LOGGING       opt_sctp.h # Log to KTR lock activity
 SCTP_MBUF_LOGGING       opt_sctp.h # Log to KTR general mbuf aloc/free
 SCTP_MBCNT_LOGGING	opt_sctp.h # Log to KTR mbcnt activity
 SCTP_PACKET_LOGGING     opt_sctp.h # Log to a packet buffer last N packets
 SCTP_LTRACE_CHUNKS      opt_sctp.h # Log to KTR chunks processed
 SCTP_LTRACE_ERRORS      opt_sctp.h # Log to KTR error returns.
 #
 #
 #
 
 # Netgraph(4). Use option NETGRAPH to enable the base netgraph code.
 # Each netgraph node type can be either be compiled into the kernel
 # or loaded dynamically. To get the former, include the corresponding
 # option below. Each type has its own man page, e.g. ng_async(4).
 NETGRAPH
 NETGRAPH_DEBUG		opt_netgraph.h
 NETGRAPH_ASYNC		opt_netgraph.h
 NETGRAPH_ATMLLC		opt_netgraph.h
 NETGRAPH_ATM_ATMPIF	opt_netgraph.h
 NETGRAPH_BLUETOOTH	opt_netgraph.h
 NETGRAPH_BLUETOOTH_BT3C	opt_netgraph.h
 NETGRAPH_BLUETOOTH_H4	opt_netgraph.h
 NETGRAPH_BLUETOOTH_HCI	opt_netgraph.h
 NETGRAPH_BLUETOOTH_L2CAP	opt_netgraph.h
 NETGRAPH_BLUETOOTH_SOCKET	opt_netgraph.h
 NETGRAPH_BLUETOOTH_UBT	opt_netgraph.h
 NETGRAPH_BLUETOOTH_UBTBCMFW	opt_netgraph.h
 NETGRAPH_BPF		opt_netgraph.h
 NETGRAPH_BRIDGE		opt_netgraph.h
 NETGRAPH_CAR		opt_netgraph.h
 NETGRAPH_CISCO		opt_netgraph.h
 NETGRAPH_DEFLATE	opt_netgraph.h
 NETGRAPH_DEVICE		opt_netgraph.h
 NETGRAPH_ECHO		opt_netgraph.h
 NETGRAPH_EIFACE		opt_netgraph.h
 NETGRAPH_ETHER		opt_netgraph.h
 NETGRAPH_FEC		opt_netgraph.h
 NETGRAPH_FRAME_RELAY	opt_netgraph.h
 NETGRAPH_GIF		opt_netgraph.h
 NETGRAPH_GIF_DEMUX	opt_netgraph.h
 NETGRAPH_HOLE		opt_netgraph.h
 NETGRAPH_IFACE		opt_netgraph.h
 NETGRAPH_IP_INPUT	opt_netgraph.h
 NETGRAPH_IPFW		opt_netgraph.h
 NETGRAPH_KSOCKET	opt_netgraph.h
 NETGRAPH_L2TP		opt_netgraph.h
 NETGRAPH_LMI		opt_netgraph.h
 # MPPC compression requires proprietary files (not included)
 NETGRAPH_MPPC_COMPRESSION	opt_netgraph.h
 NETGRAPH_MPPC_ENCRYPTION	opt_netgraph.h
 NETGRAPH_NAT		opt_netgraph.h
 NETGRAPH_NETFLOW	opt_netgraph.h
 NETGRAPH_ONE2MANY	opt_netgraph.h
 NETGRAPH_PPP		opt_netgraph.h
 NETGRAPH_PPPOE		opt_netgraph.h
 NETGRAPH_PPTPGRE	opt_netgraph.h
 NETGRAPH_PRED1		opt_netgraph.h
 NETGRAPH_RFC1490	opt_netgraph.h
 NETGRAPH_SOCKET		opt_netgraph.h
 NETGRAPH_SPLIT		opt_netgraph.h
 NETGRAPH_SPPP		opt_netgraph.h
 NETGRAPH_TAG		opt_netgraph.h
 NETGRAPH_TCPMSS		opt_netgraph.h
 NETGRAPH_TEE		opt_netgraph.h
 NETGRAPH_TTY		opt_netgraph.h
 NETGRAPH_UI		opt_netgraph.h
 NETGRAPH_VJC		opt_netgraph.h
 
 # NgATM options
 NGATM_ATM		opt_netgraph.h
 NGATM_ATMBASE		opt_netgraph.h
 NGATM_SSCOP		opt_netgraph.h
 NGATM_SSCFU		opt_netgraph.h
 NGATM_UNI		opt_netgraph.h
 NGATM_CCATM		opt_netgraph.h
 
 # DRM options
 DRM_DEBUG		opt_drm.h
 
 ZERO_COPY_SOCKETS	opt_zero.h
 TI_PRIVATE_JUMBOS	opt_ti.h
 TI_JUMBO_HDRSPLIT	opt_ti.h
 
 # ATM (HARP version) - disabled as not MPSAFE in 7.0
 #ATM_CORE		opt_atm.h
 #ATM_IP			opt_atm.h
 #ATM_SIGPVC		opt_atm.h
 #ATM_SPANS		opt_atm.h
 #ATM_UNI			opt_atm.h
 
 # XXX Conflict: # of devices vs network protocol (Native ATM).
 # This makes "atm.h" unusable.
 NATM
 
 # DPT driver debug flags
 DPT_MEASURE_PERFORMANCE	opt_dpt.h
 DPT_HANDLE_TIMEOUTS	opt_dpt.h
 DPT_TIMEOUT_FACTOR	opt_dpt.h
 DPT_LOST_IRQ		opt_dpt.h
 DPT_RESET_HBA		opt_dpt.h
 
 # Misc debug flags.  Most of these should probably be replaced with
 # 'DEBUG', and then let people recompile just the interesting modules
 # with 'make CC="cc -DDEBUG"'.
 CLUSTERDEBUG		opt_debug_cluster.h
 DEBUG_1284		opt_ppb_1284.h
 VP0_DEBUG		opt_vpo.h
 LPT_DEBUG		opt_lpt.h
 PLIP_DEBUG		opt_plip.h
 LOCKF_DEBUG		opt_debug_lockf.h
 NETATALKDEBUG		opt_atalk.h
 SI_DEBUG		opt_debug_si.h
 
 # Fb options
 FB_DEBUG		opt_fb.h
 FB_INSTALL_CDEV		opt_fb.h
 
 # ppbus related options
 PERIPH_1284		opt_ppb_1284.h
 DONTPROBE_1284		opt_ppb_1284.h
 
 # smbus related options
 ENABLE_ALART		opt_intpm.h
 
 # These cause changes all over the kernel
 BLKDEV_IOSIZE		opt_global.h
 MAXPHYS 		opt_global.h
 DFLTPHYS		opt_global.h
 BURN_BRIDGES		opt_global.h
 DEBUG			opt_global.h
 DEBUG_LOCKS		opt_global.h
 DEBUG_VFS_LOCKS		opt_global.h
 DIAGNOSTIC		opt_global.h
 INVARIANT_SUPPORT	opt_global.h
 INVARIANTS		opt_global.h
 MCLSHIFT		opt_global.h
 MUTEX_DEBUG		opt_global.h
 MUTEX_NOINLINE		opt_global.h
 LOCK_PROFILING		opt_global.h
 LOCK_PROFILING_FAST	opt_global.h
 MSIZE			opt_global.h
 REGRESSION		opt_global.h
 RESTARTABLE_PANICS	opt_global.h
 RWLOCK_NOINLINE		opt_global.h
 SX_NOINLINE		opt_global.h
 VFS_BIO_DEBUG		opt_global.h
 
 # These are VM related options
 VM_KMEM_SIZE		opt_vm.h
 VM_KMEM_SIZE_SCALE	opt_vm.h
 VM_KMEM_SIZE_MAX	opt_vm.h
 VM_NRESERVLEVEL		opt_vm.h
 VM_LEVEL_0_ORDER	opt_vm.h
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h
 MALLOC_PROFILE		opt_vm.h
 
 # The MemGuard replacement allocator used for tamper-after-free detection
 DEBUG_MEMGUARD		opt_vm.h
 
 # The RedZone malloc(9) protection
 DEBUG_REDZONE		opt_vm.h
 
 # Standard SMP options
 SMP			opt_global.h
 
 # Size of the kernel message buffer
 MSGBUF_SIZE		opt_msgbuf.h
 
 # NFS options
 NFS_MINATTRTIMO		opt_nfs.h
 NFS_MAXATTRTIMO		opt_nfs.h
 NFS_MINDIRATTRTIMO	opt_nfs.h
 NFS_MAXDIRATTRTIMO	opt_nfs.h
 NFS_GATHERDELAY		opt_nfs.h
 NFS_WDELAYHASHSIZ	opt_nfs.h
 NFS_DEBUG		opt_nfs.h
 
 # For the Bt848/Bt848A/Bt849/Bt878/Bt879 driver
 OVERRIDE_CARD			opt_bktr.h
 OVERRIDE_TUNER			opt_bktr.h
 OVERRIDE_DBX			opt_bktr.h
 OVERRIDE_MSP			opt_bktr.h
 BROOKTREE_SYSTEM_DEFAULT	opt_bktr.h
 BROOKTREE_ALLOC_PAGES		opt_bktr.h
 BKTR_OVERRIDE_CARD		opt_bktr.h
 BKTR_OVERRIDE_TUNER		opt_bktr.h
 BKTR_OVERRIDE_DBX		opt_bktr.h
 BKTR_OVERRIDE_MSP		opt_bktr.h
 BKTR_SYSTEM_DEFAULT		opt_bktr.h
 BKTR_ALLOC_PAGES		opt_bktr.h
 BKTR_USE_PLL			opt_bktr.h	
 BKTR_GPIO_ACCESS		opt_bktr.h
 BKTR_NO_MSP_RESET		opt_bktr.h
 BKTR_430_FX_MODE		opt_bktr.h
 BKTR_SIS_VIA_MODE		opt_bktr.h
 BKTR_USE_FREEBSD_SMBUS		opt_bktr.h
 BKTR_NEW_MSP34XX_DRIVER		opt_bktr.h
 
 BREAK_TO_DEBUGGER	opt_comconsole.h
 ALT_BREAK_TO_DEBUGGER	opt_comconsole.h
 
 # Options to support PPS
 UART_PPS_ON_CTS		opt_uart.h
 
 # options for bus/device framework
 BUS_DEBUG		opt_bus.h
 
 # options for USB support
 USB_DEBUG		opt_usb.h
 USBVERBOSE		opt_usb.h
 UKBD_DFLT_KEYMAP	opt_ukbd.h
 UPLCOM_INTR_INTERVAL	opt_uplcom.h
 UVSCOM_DEFAULT_OPKTSIZE	opt_uvscom.h
 UVSCOM_INTR_INTERVAL	opt_uvscom.h
 
 # Embedded system options
 INIT_PATH
 
 ROOTDEVNAME
 
 FDC_DEBUG		opt_fdc.h
 PCFCLOCK_VERBOSE	opt_pcfclock.h
 PCFCLOCK_MAX_RETRIES	opt_pcfclock.h
 
 KTR			opt_global.h
 KTR_ALQ			opt_ktr.h
 KTR_MASK		opt_ktr.h
 KTR_CPUMASK		opt_ktr.h
 KTR_COMPILE		opt_global.h
 KTR_ENTRIES		opt_global.h
 KTR_VERBOSE		opt_ktr.h
 WITNESS			opt_global.h
 WITNESS_KDB		opt_witness.h
 WITNESS_SKIPSPIN	opt_witness.h
 
 # options for ACPI support
 ACPI_DEBUG		opt_acpi.h
 ACPI_MAX_THREADS	opt_acpi.h
 ACPI_NO_SEMAPHORES	opt_acpi.h
 
 # ISA support
 DEV_ISA			opt_isa.h
 ISAPNP			opt_isa.h
 
 # various 'device presence' options.
 DEV_BPF			opt_bpf.h
 DEV_MCA			opt_mca.h
 DEV_CARP		opt_carp.h
 DEV_SPLASH		opt_splash.h
 
 # EISA support
 DEV_EISA		opt_eisa.h
 EISA_SLOTS		opt_eisa.h
 
 # ed driver
 ED_HPP			opt_ed.h
 ED_3C503		opt_ed.h
 ED_SIC			opt_ed.h
 
 # bce driver
 BCE_DEBUG		opt_bce.h
 
 # wi driver
 WI_SYMBOL_FIRMWARE	opt_wi.h
 
 SOCKBUF_DEBUG		opt_global.h
 
 # options for ubsec driver
 UBSEC_DEBUG		opt_ubsec.h
 UBSEC_RNDTEST		opt_ubsec.h
 UBSEC_NO_RNG		opt_ubsec.h
 
 # options for hifn driver
 HIFN_DEBUG		opt_hifn.h
 HIFN_RNDTEST		opt_hifn.h
 
 # options for safenet driver
 SAFE_DEBUG		opt_safe.h
 SAFE_NO_RNG		opt_safe.h
 SAFE_RNDTEST		opt_safe.h
 
 # syscons options
 MAXCONS			opt_syscons.h
 SC_ALT_MOUSE_IMAGE	opt_syscons.h
 SC_CUT_SPACES2TABS	opt_syscons.h
 SC_CUT_SEPCHARS		opt_syscons.h
 SC_DEBUG_LEVEL		opt_syscons.h
 SC_DFLT_FONT		opt_syscons.h
 SC_DISABLE_KDBKEY	opt_syscons.h
 SC_DISABLE_REBOOT	opt_syscons.h
 SC_HISTORY_SIZE		opt_syscons.h
 SC_KERNEL_CONS_ATTR	opt_syscons.h
 SC_KERNEL_CONS_REV_ATTR	opt_syscons.h
 SC_MOUSE_CHAR		opt_syscons.h
 SC_NO_CUTPASTE		opt_syscons.h
 SC_NO_FONT_LOADING	opt_syscons.h
 SC_NO_HISTORY		opt_syscons.h
 SC_NO_MODE_CHANGE	opt_syscons.h
 SC_NO_SUSPEND_VTYSWITCH	opt_syscons.h
 SC_NO_SYSMOUSE		opt_syscons.h
 SC_NORM_ATTR		opt_syscons.h
 SC_NORM_REV_ATTR	opt_syscons.h
 SC_PIXEL_MODE		opt_syscons.h
 SC_RENDER_DEBUG		opt_syscons.h
 SC_TWOBUTTON_MOUSE	opt_syscons.h
 
 # options for printf
 PRINTF_BUFR_SIZE	opt_printf.h
 
 # kbd options
 KBD_DISABLE_KEYMAP_LOAD	opt_kbd.h
 KBD_INSTALL_CDEV	opt_kbd.h
 KBD_MAXRETRY		opt_kbd.h
 KBD_MAXWAIT		opt_kbd.h
 KBD_RESETDELAY		opt_kbd.h
 KBDIO_DEBUG		opt_kbd.h
 
 # options for the Atheros driver
 ATH_DEBUG		opt_ath.h
 ATH_TXBUF		opt_ath.h
 ATH_RXBUF		opt_ath.h
 ATH_DIAGAPI		opt_ath.h
 ATH_TX99_DIAG		opt_ath.h
 
 # options for the Marvell 8335 wireless driver
 MALO_DEBUG		opt_malo.h
 MALO_TXBUF		opt_malo.h
 MALO_RXBUF		opt_malo.h
 
 # dcons options 
 DCONS_BUF_SIZE		opt_dcons.h
 DCONS_POLL_HZ		opt_dcons.h
 DCONS_FORCE_CONSOLE	opt_dcons.h
 DCONS_FORCE_GDB		opt_dcons.h
 
 # Static unit counts
 NI4BTRC			opt_i4b.h
 NI4BRBCH		opt_i4b.h
 NI4BTEL			opt_i4b.h
 #XXXBZ#NI4BIPR			opt_i4b.h
 #XXXBZ#NI4BING			opt_i4b.h
 #XXXBZ#NI4BISPPP		opt_i4b.h
 
 # VFS options
 LOOKUP_SHARED		opt_vfs.h
 
 # HWPMC options
 HWPMC_HOOKS
 
 # XBOX options for FreeBSD/i386, but some files are MI
 XBOX			opt_xbox.h
 
 # XFS
 XFS
 
 # Interrupt filtering
 INTR_FILTER
 
 # 802.11 support layer
 IEEE80211_DEBUG		opt_wlan.h
 IEEE80211_DEBUG_REFCNT	opt_wlan.h
 IEEE80211_AMPDU_AGE	opt_wlan.h
 
 #Disable code to dispatch tcp offloading
 TCP_OFFLOAD_DISABLE	opt_inet.h
Index: head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
===================================================================
--- head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 178887)
+++ head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 178888)
@@ -1,1650 +1,1650 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 1993-2003 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 #if !defined(lint)
 static const char sccsid[] = "@(#)ip_fil.c	2.41 6/5/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$Id: ip_fil_freebsd.c,v 2.53.2.50 2007/09/20 12:51:50 darrenr Exp $";
 #endif
 
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define	KERNEL	1
 # define	_KERNEL	1
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_inet6.h"
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 440000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_random_ip_id.h"
 #endif
 #include <sys/param.h>
 #if defined(__FreeBSD__) && !defined(__FreeBSD_version)
 # if defined(IPFILTER_LKM)
 #  ifndef __FreeBSD_cc_version
 #   include <osreldate.h>
 #  else
 #   if __FreeBSD_cc_version < 430000
 #    include <osreldate.h>
 #   endif
 #  endif
 # endif
 #endif
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/file.h>
 #if __FreeBSD_version >= 220000
 # include <sys/fcntl.h>
 # include <sys/filio.h>
 #else
 # include <sys/ioctl.h>
 #endif
 #include <sys/time.h>
 #include <sys/systm.h>
 #if (__FreeBSD_version >= 300000)
 # include <sys/dirent.h>
 #else
 # include <sys/dir.h>
 #endif
 #if !defined(__hpux)
 # include <sys/mbuf.h>
 #endif
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #if __FreeBSD_version >= 500043
 # include <sys/selinfo.h>
 #else
 # include <sys/select.h>
 #endif
 
 #include <net/if.h>
 #if __FreeBSD_version >= 300000
 # include <net/if_var.h>
 # if __FreeBSD_version >= 500043
 #  include <net/netisr.h>
 # endif
 # if !defined(IPFILTER_LKM)
 #  include "opt_ipfilter.h"
 # endif
 #endif
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #if defined(__osf__)
 # include <netinet/tcp_timer.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/tcpip.h>
 #include <netinet/ip_icmp.h>
 #ifndef _KERNEL
 # include "netinet/ipf.h"
 #endif
 #include "netinet/ip_compat.h"
 #ifdef USE_INET6
 # include <netinet/icmp6.h>
 #endif
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_auth.h"
 #ifdef	IPFILTER_SYNC
 #include "netinet/ip_sync.h"
 #endif
 #ifdef	IPFILTER_SCAN
 #include "netinet/ip_scan.h"
 #endif
 #include "netinet/ip_pool.h"
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
 # include <sys/malloc.h>
 #endif
 #include <sys/kernel.h>
 #ifdef CSUM_DATA_VALID
 #include <machine/in_cksum.h>
 #endif
 extern	int	ip_optcopy __P((struct ip *, struct ip *));
 
 #if (__FreeBSD_version > 460000)
 extern	int	path_mtu_discovery;
 #endif
 
 # ifdef IPFILTER_M_IPFILTER
 MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures");
 # endif
 
 
 #if !defined(__osf__)
 extern	struct	protosw	inetsw[];
 #endif
 
 static	int	(*fr_savep) __P((ip_t *, int, void *, int, struct mbuf **));
 static	int	fr_send_ip __P((fr_info_t *, mb_t *, mb_t **));
 # ifdef USE_MUTEXES
 ipfmutex_t	ipl_mutex, ipf_authmx, ipf_rw, ipf_stinsert;
 ipfmutex_t	ipf_nat_new, ipf_natio, ipf_timeoutlock;
 ipfrwlock_t	ipf_mutex, ipf_global, ipf_ipidfrag, ipf_frcache, ipf_tokens;
 ipfrwlock_t	ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth;
 # endif
 int		ipf_locks_done = 0;
 
 #if (__FreeBSD_version >= 300000)
 struct callout_handle fr_slowtimer_ch;
 #endif
 struct	selinfo	ipfselwait[IPL_LOGSIZE];
 
 #if (__FreeBSD_version >= 500011)
 # include <sys/conf.h>
 # if defined(NETBSD_PF)
 #  include <net/pfil.h>
 #  include <netinet/ipprotosw.h>
 /*
  * We provide the fr_checkp name just to minimize changes later.
  */
 int (*fr_checkp) __P((ip_t *ip, int hlen, void *ifp, int out, mb_t **mp));
 # endif /* NETBSD_PF */
 #endif /* __FreeBSD_version >= 500011 */
 
 
 #if (__FreeBSD_version >= 502103)
 static eventhandler_tag ipf_arrivetag, ipf_departtag, ipf_clonetag;
 
 static void ipf_ifevent(void *arg);
 
 static void ipf_ifevent(arg)
 void *arg;
 {
         frsync(NULL);
 }
 #endif
 
 
 #if (__FreeBSD_version >= 501108) && defined(_KERNEL)
 
 static int
 fr_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 	struct ip *ip = mtod(*mp, struct ip *);
 	return fr_check(ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), mp);
 }
 
 # ifdef USE_INET6
 #  include <netinet/ip6.h>
 
 static int
 fr_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 	return (fr_check(mtod(*mp, struct ip *), sizeof(struct ip6_hdr),
 	    ifp, (dir == PFIL_OUT), mp));
 }
 # endif
 #endif /* __FreeBSD_version >= 501108 */
 #if	defined(IPFILTER_LKM)
 int iplidentify(s)
 char *s;
 {
 	if (strcmp(s, "ipl") == 0)
 		return 1;
 	return 0;
 }
 #endif /* IPFILTER_LKM */
 
 
 int ipfattach()
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	SPL_NET(s);
 	if (fr_running > 0) {
 		SPL_X(s);
 		return EBUSY;
 	}
 
 	MUTEX_INIT(&ipf_rw, "ipf rw mutex");
 	MUTEX_INIT(&ipf_timeoutlock, "ipf timeout queue mutex");
 	RWLOCK_INIT(&ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
 	RWLOCK_INIT(&ipf_tokens, "ipf token rwlock");
 	ipf_locks_done = 1;
 
 	if (fr_initialise() < 0) {
 		SPL_X(s);
 		return EIO;
 	}
 
 
 	if (fr_checkp != fr_check) {
 		fr_savep = fr_checkp;
 		fr_checkp = fr_check;
 	}
 
 	bzero((char *)ipfselwait, sizeof(ipfselwait));
 	bzero((char *)frcache, sizeof(frcache));
 	fr_running = 1;
 
 	if (fr_control_forwarding & 1)
 		ipforwarding = 1;
 
 	SPL_X(s);
 #if (__FreeBSD_version >= 300000)
 	fr_slowtimer_ch = timeout(fr_slowtimer, NULL,
 				    (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT);
 #else
 	timeout(fr_slowtimer, NULL, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT);
 #endif
 	return 0;
 }
 
 
 /*
  * Disable the filter by removing the hooks from the IP input/output
  * stream.
  */
 int ipfdetach()
 {
 #ifdef USE_SPL
 	int s;
 #endif
 	if (fr_control_forwarding & 2)
 		ipforwarding = 0;
 
 	SPL_NET(s);
 
 #if (__FreeBSD_version >= 300000)
 	if (fr_slowtimer_ch.callout != NULL)
 		untimeout(fr_slowtimer, NULL, fr_slowtimer_ch);
 	bzero(&fr_slowtimer_ch, sizeof(fr_slowtimer_ch));
 #else
 	untimeout(fr_slowtimer, NULL);
 #endif /* FreeBSD */
 
 #ifndef NETBSD_PF
 	if (fr_checkp != NULL)
 		fr_checkp = fr_savep;
 	fr_savep = NULL;
 #endif
 
 	fr_deinitialise();
 
 	fr_running = -2;
 
 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE);
 
 	if (ipf_locks_done == 1) {
 		MUTEX_DESTROY(&ipf_timeoutlock);
 		MUTEX_DESTROY(&ipf_rw);
 		RW_DESTROY(&ipf_ipidfrag);
 		RW_DESTROY(&ipf_tokens);
 		ipf_locks_done = 0;
 	}
 
 	SPL_X(s);
 
 	return 0;
 }
 
 
 /*
  * Filter ioctl interface.
  */
 int iplioctl(dev, cmd, data, mode
 # if defined(_KERNEL) && ((BSD >= 199506) || (__FreeBSD_version >= 220000))
 , p)
 #  if (__FreeBSD_version >= 500024)
 struct thread *p;
 #   if (__FreeBSD_version >= 500043)
 #    define	p_uid	td_ucred->cr_ruid
 #   else
 #    define	p_uid	t_proc->p_cred->p_ruid
 #   endif
 #  else
 struct proc *p;
 #   define	p_uid	p_cred->p_ruid
 #  endif /* __FreeBSD_version >= 500024 */
 # else
 )
 # endif
 #if defined(_KERNEL) && (__FreeBSD_version >= 502116)
 struct cdev *dev;
 #else
 dev_t dev;
 #endif
 ioctlcmd_t cmd;
 caddr_t data;
 int mode;
 {
 	int error = 0, unit = 0;
 	SPL_INT(s);
 
 #if (BSD >= 199306) && defined(_KERNEL)
 	if ((securelevel >= 3) && (mode & FWRITE))
 		return EPERM;
 #endif
 
 	unit = GET_MINOR(dev);
 	if ((IPL_LOGMAX < unit) || (unit < 0))
 		return ENXIO;
 
 	if (fr_running <= 0) {
 		if (unit != IPL_LOGIPF)
 			return EIO;
 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 		    cmd != SIOCGETFS && cmd != SIOCGETFF)
 			return EIO;
 	}
 
 	SPL_NET(s);
 
 	error = fr_ioctlswitch(unit, data, cmd, mode, p->p_uid, p);
 	if (error != -1) {
 		SPL_X(s);
 		return error;
 	}
 
 	SPL_X(s);
 
 	return error;
 }
 
 
 #if 0
 void fr_forgetifp(ifp)
 void *ifp;
 {
 	register frentry_t *f;
 
 	WRITE_ENTER(&ipf_mutex);
 	for (f = ipacct[0][fr_active]; (f != NULL); f = f->fr_next)
 		if (f->fr_ifa == ifp)
 			f->fr_ifa = (void *)-1;
 	for (f = ipacct[1][fr_active]; (f != NULL); f = f->fr_next)
 		if (f->fr_ifa == ifp)
 			f->fr_ifa = (void *)-1;
 	for (f = ipfilter[0][fr_active]; (f != NULL); f = f->fr_next)
 		if (f->fr_ifa == ifp)
 			f->fr_ifa = (void *)-1;
 	for (f = ipfilter[1][fr_active]; (f != NULL); f = f->fr_next)
 		if (f->fr_ifa == ifp)
 			f->fr_ifa = (void *)-1;
 #ifdef USE_INET6
 	for (f = ipacct6[0][fr_active]; (f != NULL); f = f->fr_next)
 		if (f->fr_ifa == ifp)
 			f->fr_ifa = (void *)-1;
 	for (f = ipacct6[1][fr_active]; (f != NULL); f = f->fr_next)
 		if (f->fr_ifa == ifp)
 			f->fr_ifa = (void *)-1;
 	for (f = ipfilter6[0][fr_active]; (f != NULL); f = f->fr_next)
 		if (f->fr_ifa == ifp)
 			f->fr_ifa = (void *)-1;
 	for (f = ipfilter6[1][fr_active]; (f != NULL); f = f->fr_next)
 		if (f->fr_ifa == ifp)
 			f->fr_ifa = (void *)-1;
 #endif
 	RWLOCK_EXIT(&ipf_mutex);
 	fr_natsync(ifp);
 }
 #endif
 
 
 /*
  * routines below for saving IP headers to buffer
  */
 int iplopen(dev, flags
 #if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) && defined(_KERNEL)
 , devtype, p)
 int devtype;
 # if (__FreeBSD_version >= 500024)
 struct thread *p;
 # else
 struct proc *p;
 # endif /* __FreeBSD_version >= 500024 */
 #else
 )
 #endif
 #if defined(_KERNEL) && (__FreeBSD_version >= 502116)
 struct cdev *dev;
 #else
 dev_t dev;
 #endif
 int flags;
 {
 	u_int min = GET_MINOR(dev);
 
 	if (IPL_LOGMAX < min)
 		min = ENXIO;
 	else
 		min = 0;
 	return min;
 }
 
 
 int iplclose(dev, flags
 #if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) && defined(_KERNEL)
 , devtype, p)
 int devtype;
 # if (__FreeBSD_version >= 500024)
 struct thread *p;
 # else
 struct proc *p;
 # endif /* __FreeBSD_version >= 500024 */
 #else
 )
 #endif
 #if defined(_KERNEL) && (__FreeBSD_version >= 502116)
 struct cdev *dev;
 #else
 dev_t dev;
 #endif
 int flags;
 {
 	u_int	min = GET_MINOR(dev);
 
 	if (IPL_LOGMAX < min)
 		min = ENXIO;
 	else
 		min = 0;
 	return min;
 }
 
 /*
  * iplread/ipllog
  * both of these must operate with at least splnet() lest they be
  * called during packet processing and cause an inconsistancy to appear in
  * the filter lists.
  */
 #if (BSD >= 199306)
 int iplread(dev, uio, ioflag)
 int ioflag;
 #else
 int iplread(dev, uio)
 #endif
 #if defined(_KERNEL) && (__FreeBSD_version >= 502116)
 struct cdev *dev;
 #else
 dev_t dev;
 #endif
 register struct uio *uio;
 {
 	u_int	xmin = GET_MINOR(dev);
 
 	if (fr_running < 1)
 		return EIO;
 
 	if (xmin < 0)
 		return ENXIO;
 
 # ifdef	IPFILTER_SYNC
 	if (xmin == IPL_LOGSYNC)
 		return ipfsync_read(uio);
 # endif
 
 #ifdef IPFILTER_LOG
 	return ipflog_read(xmin, uio);
 #else
 	return ENXIO;
 #endif
 }
 
 
 /*
  * iplwrite
  * both of these must operate with at least splnet() lest they be
  * called during packet processing and cause an inconsistancy to appear in
  * the filter lists.
  */
 #if (BSD >= 199306)
 int iplwrite(dev, uio, ioflag)
 int ioflag;
 #else
 int iplwrite(dev, uio)
 #endif
 #if defined(_KERNEL) && (__FreeBSD_version >= 502116)
 struct cdev *dev;
 #else
 dev_t dev;
 #endif
 register struct uio *uio;
 {
 
 	if (fr_running < 1)
 		return EIO;
 
 #ifdef	IPFILTER_SYNC
 	if (GET_MINOR(dev) == IPL_LOGSYNC)
 		return ipfsync_write(uio);
 #endif
 	return ENXIO;
 }
 
 
 /*
  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
  * requires a large amount of setting up and isn't any more efficient.
  */
 int fr_send_reset(fin)
 fr_info_t *fin;
 {
 	struct tcphdr *tcp, *tcp2;
 	int tlen = 0, hlen;
 	struct mbuf *m;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip;
 
 	tcp = fin->fin_dp;
 	if (tcp->th_flags & TH_RST)
 		return -1;		/* feedback loop */
 
 	if (fr_checkl4sum(fin) == -1)
 		return -1;
 
 	tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
 			((tcp->th_flags & TH_SYN) ? 1 : 0) +
 			((tcp->th_flags & TH_FIN) ? 1 : 0);
 
 #ifdef USE_INET6
 	hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t);
 #else
 	hlen = sizeof(ip_t);
 #endif
 #ifdef MGETHDR
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
 #else
 	MGET(m, M_DONTWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	if (sizeof(*tcp2) + hlen > MLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			FREE_MB_T(m);
 			return -1;
 		}
 	}
 
 	m->m_len = sizeof(*tcp2) + hlen;
 #if (BSD >= 199103)
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #endif
 	ip = mtod(m, struct ip *);
 	bzero((char *)ip, hlen);
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 #endif
 	tcp2 = (struct tcphdr *)((char *)ip + hlen);
 	tcp2->th_sport = tcp->th_dport;
 	tcp2->th_dport = tcp->th_sport;
 
 	if (tcp->th_flags & TH_ACK) {
 		tcp2->th_seq = tcp->th_ack;
 		tcp2->th_flags = TH_RST;
 		tcp2->th_ack = 0;
 	} else {
 		tcp2->th_seq = 0;
 		tcp2->th_ack = ntohl(tcp->th_seq);
 		tcp2->th_ack += tlen;
 		tcp2->th_ack = htonl(tcp2->th_ack);
 		tcp2->th_flags = TH_RST|TH_ACK;
 	}
 	TCP_X2_A(tcp2, 0);
 	TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2);
 	tcp2->th_win = tcp->th_win;
 	tcp2->th_sum = 0;
 	tcp2->th_urp = 0;
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = fin->fin_dst6;
 		ip6->ip6_dst = fin->fin_src6;
 		tcp2->th_sum = in6_cksum(m, IPPROTO_TCP,
 					 sizeof(*ip6), sizeof(*tcp2));
 		return fr_send_ip(fin, m, &m);
 	}
 #endif
 	ip->ip_p = IPPROTO_TCP;
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	ip->ip_src.s_addr = fin->fin_daddr;
 	ip->ip_dst.s_addr = fin->fin_saddr;
 	tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2));
 	ip->ip_len = hlen + sizeof(*tcp2);
 	return fr_send_ip(fin, m, &m);
 }
 
 
 static int fr_send_ip(fin, m, mpp)
 fr_info_t *fin;
 mb_t *m, **mpp;
 {
 	fr_info_t fnew;
 	ip_t *ip, *oip;
 	int hlen;
 
 	ip = mtod(m, ip_t *);
 	bzero((char *)&fnew, sizeof(fnew));
 
 	IP_V_A(ip, fin->fin_v);
 	switch (fin->fin_v)
 	{
 	case 4 :
 		fnew.fin_v = 4;
 		oip = fin->fin_ip;
 		IP_HL_A(ip, sizeof(*oip) >> 2);
 		ip->ip_tos = oip->ip_tos;
 		ip->ip_id = fin->fin_ip->ip_id;
 #if (__FreeBSD_version > 460000)
 		ip->ip_off = path_mtu_discovery ? IP_DF : 0;
 #else
 		ip->ip_off = 0;
 #endif
 		ip->ip_ttl = ip_defttl;
 		ip->ip_sum = 0;
 		hlen = sizeof(*oip);
 		break;
 #ifdef USE_INET6
 	case 6 :
 	{
 		ip6_t *ip6 = (ip6_t *)ip;
 
 		ip6->ip6_vfc = 0x60;
 		ip6->ip6_hlim = IPDEFTTL;
 
 		fnew.fin_v = 6;
 		hlen = sizeof(*ip6);
 		break;
 	}
 #endif
 	default :
 		return EINVAL;
 	}
 #ifdef IPSEC
 	m->m_pkthdr.rcvif = NULL;
 #endif
 
 	fnew.fin_ifp = fin->fin_ifp;
 	fnew.fin_flx = FI_NOCKSUM;
 	fnew.fin_m = m;
 	fnew.fin_ip = ip;
 	fnew.fin_mp = mpp;
 	fnew.fin_hlen = hlen;
 	fnew.fin_dp = (char *)ip + hlen;
 	(void) fr_makefrip(hlen, ip, &fnew);
 
 	return fr_fastroute(m, mpp, &fnew, NULL);
 }
 
 
 int fr_send_icmp_err(type, fin, dst)
 int type;
 fr_info_t *fin;
 int dst;
 {
 	int err, hlen, xtra, iclen, ohlen, avail, code;
 	struct in_addr dst4;
 	struct icmp *icmp;
 	struct mbuf *m;
 	void *ifp;
 #ifdef USE_INET6
 	ip6_t *ip6;
 	struct in6_addr dst6;
 #endif
 	ip_t *ip, *ip2;
 
 	if ((type < 0) || (type >= ICMP_MAXTYPE))
 		return -1;
 
 	code = fin->fin_icode;
 #ifdef USE_INET6
 	if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
 		return -1;
 #endif
 
 	if (fr_checkl4sum(fin) == -1)
 		return -1;
 #ifdef MGETHDR
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
 #else
 	MGET(m, M_DONTWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	avail = MHLEN;
 
 	xtra = 0;
 	hlen = 0;
 	ohlen = 0;
 	ifp = fin->fin_ifp;
 	if (fin->fin_v == 4) {
 		if ((fin->fin_p == IPPROTO_ICMP) &&
 		    !(fin->fin_flx & FI_SHORT))
 			switch (ntohs(fin->fin_data[0]) >> 8)
 			{
 			case ICMP_ECHO :
 			case ICMP_TSTAMP :
 			case ICMP_IREQ :
 			case ICMP_MASKREQ :
 				break;
 			default :
 				FREE_MB_T(m);
 				return 0;
 			}
 
 		if (dst == 0) {
 			if (fr_ifpaddr(4, FRI_NORMAL, ifp,
 				       &dst4, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 		} else
 			dst4.s_addr = fin->fin_daddr;
 
 		hlen = sizeof(ip_t);
 		ohlen = fin->fin_hlen;
 		if (fin->fin_hlen < fin->fin_plen)
 			xtra = MIN(fin->fin_dlen, 8);
 		else
 			xtra = 0;
 	}
 
 #ifdef USE_INET6
 	else if (fin->fin_v == 6) {
 		hlen = sizeof(ip6_t);
 		ohlen = sizeof(ip6_t);
 		type = icmptoicmp6types[type];
 		if (type == ICMP6_DST_UNREACH)
 			code = icmptoicmp6unreach[code];
 
 		if (hlen + sizeof(*icmp) + max_linkhdr +
 		    fin->fin_plen > avail) {
 			MCLGET(m, M_DONTWAIT);
 			if ((m->m_flags & M_EXT) == 0) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			avail = MCLBYTES;
 		}
 		xtra = MIN(fin->fin_plen,
 			   avail - hlen - sizeof(*icmp) - max_linkhdr);
 		if (dst == 0) {
 			if (fr_ifpaddr(6, FRI_NORMAL, ifp,
 				       (struct in_addr *)&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 		} else
 			dst6 = fin->fin_dst6;
 	}
 #endif
 	else {
 		FREE_MB_T(m);
 		return -1;
 	}
 
 	iclen = hlen + sizeof(*icmp);
 	avail -= (max_linkhdr + iclen);
 	if (avail < 0) {
 		FREE_MB_T(m);
 		return -1;
 	}
 	if (xtra > avail)
 		xtra = avail;
 	iclen += xtra;
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = iclen;
 	m->m_len = iclen;
 	ip = mtod(m, ip_t *);
 	icmp = (struct icmp *)((char *)ip + hlen);
 	ip2 = (ip_t *)&icmp->icmp_ip;
 
 	icmp->icmp_type = type;
 	icmp->icmp_code = fin->fin_icode;
 	icmp->icmp_cksum = 0;
 #ifdef icmp_nextmtu
 	if (type == ICMP_UNREACH &&
 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG && ifp)
 		icmp->icmp_nextmtu = htons(((struct ifnet *)ifp)->if_mtu);
 #endif
 
 	bcopy((char *)fin->fin_ip, (char *)ip2, ohlen);
 
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(iclen - hlen);
 		ip6->ip6_nxt = IPPROTO_ICMPV6;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = dst6;
 		ip6->ip6_dst = fin->fin_src6;
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 					     sizeof(*ip6), iclen - hlen);
 	} else
 #endif
 	{
 		ip2->ip_len = htons(ip2->ip_len);
 		ip2->ip_off = htons(ip2->ip_off);
 		ip->ip_p = IPPROTO_ICMP;
 		ip->ip_src.s_addr = dst4.s_addr;
 		ip->ip_dst.s_addr = fin->fin_saddr;
 
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
 					     sizeof(*icmp) + 8);
 		ip->ip_len = iclen;
 		ip->ip_p = IPPROTO_ICMP;
 	}
 	err = fr_send_ip(fin, m, &m);
 	return err;
 }
 
 
 #if !defined(IPFILTER_LKM) && (__FreeBSD_version < 300000)
 # if	(BSD < 199306)
 int iplinit __P((void));
 
 int
 # else
 void iplinit __P((void));
 
 void
 # endif
 iplinit()
 {
 	if (ipfattach() != 0)
 		printf("IP Filter failed to attach\n");
 	ip_init();
 }
 #endif /* __FreeBSD_version < 300000 */
 
 
 /*
  * m0 - pointer to mbuf where the IP packet starts
  * mpp - pointer to the mbuf pointer that is the start of the mbuf chain
  */
 int fr_fastroute(m0, mpp, fin, fdp)
 mb_t *m0, **mpp;
 fr_info_t *fin;
 frdest_t *fdp;
 {
 	register struct ip *ip, *mhip;
 	register struct mbuf *m = *mpp;
 	register struct route *ro;
 	int len, off, error = 0, hlen, code;
 	struct ifnet *ifp, *sifp;
 	struct sockaddr_in *dst;
 	struct route iproute;
 	u_short ip_off;
 	frentry_t *fr;
 
 	ro = NULL;
 
 #ifdef M_WRITABLE
 	/*
 	* HOT FIX/KLUDGE:
 	*
 	* If the mbuf we're about to send is not writable (because of
 	* a cluster reference, for example) we'll need to make a copy
 	* of it since this routine modifies the contents.
 	*
 	* If you have non-crappy network hardware that can transmit data
 	* from the mbuf, rather than making a copy, this is gonna be a
 	* problem.
 	*/
 	if (M_WRITABLE(m) == 0) {
 		m0 = m_dup(m, M_DONTWAIT);
 		if (m0 != 0) {
 			FREE_MB_T(m);
 			m = m0;
 			*mpp = m;
 		} else {
 			error = ENOBUFS;
 			FREE_MB_T(m);
 			goto done;
 		}
 	}
 #endif
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		/*
 		 * currently "to <if>" and "to <if>:ip#" are not supported
 		 * for IPv6
 		 */
 #if  (__FreeBSD_version >= 490000)
 		return ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 #else
 		return ip6_output(m0, NULL, NULL, 0, NULL, NULL);
 #endif
 	}
 #endif
 
 	hlen = fin->fin_hlen;
 	ip = mtod(m0, struct ip *);
 
 	/*
 	 * Route packet.
 	 */
 	ro = &iproute;
 	bzero((caddr_t)ro, sizeof (*ro));
 	dst = (struct sockaddr_in *)&ro->ro_dst;
 	dst->sin_family = AF_INET;
 	dst->sin_addr = ip->ip_dst;
 
 	fr = fin->fin_fr;
 	if (fdp != NULL)
 		ifp = fdp->fd_ifp;
 	else
 		ifp = fin->fin_ifp;
 
 	if ((ifp == NULL) && (!fr || !(fr->fr_flags & FR_FASTROUTE))) {
 		error = -2;
 		goto bad;
 	}
 
 	if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0))
 		dst->sin_addr = fdp->fd_ip;
 
 	dst->sin_len = sizeof(*dst);
-	rtalloc(ro);
+	in_rtalloc(ro, 0);
 
 	if ((ifp == NULL) && (ro->ro_rt != NULL))
 		ifp = ro->ro_rt->rt_ifp;
 
 	if ((ro->ro_rt == NULL) || (ifp == NULL)) {
 		if (in_localaddr(ip->ip_dst))
 			error = EHOSTUNREACH;
 		else
 			error = ENETUNREACH;
 		goto bad;
 	}
 	if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 		dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
 	if (ro->ro_rt)
 		ro->ro_rt->rt_use++;
 
 	/*
 	 * For input packets which are being "fastrouted", they won't
 	 * go back through output filtering and miss their chance to get
 	 * NAT'd and counted.  Duplicated packets aren't considered to be
 	 * part of the normal packet stream, so do not NAT them or pass
 	 * them through stateful checking, etc.
 	 */
 	if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) {
 		sifp = fin->fin_ifp;
 		fin->fin_ifp = ifp;
 		fin->fin_out = 1;
 		(void) fr_acctpkt(fin, NULL);
 		fin->fin_fr = NULL;
 		if (!fr || !(fr->fr_flags & FR_RETMASK)) {
 			u_32_t pass;
 
 			if (fr_checkstate(fin, &pass) != NULL)
 				fr_statederef((ipstate_t **)&fin->fin_state);
 		}
 
 		switch (fr_checknatout(fin, NULL))
 		{
 		case 0 :
 			break;
 		case 1 :
 			fr_natderef((nat_t **)&fin->fin_nat);
 			ip->ip_sum = 0;
 			break;
 		case -1 :
 			error = -1;
 			goto bad;
 			break;
 		}
 
 		fin->fin_ifp = sifp;
 		fin->fin_out = 0;
 	} else
 		ip->ip_sum = 0;
 	/*
 	 * If small enough for interface, can just send directly.
 	 */
 	if (ip->ip_len <= ifp->if_mtu) {
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 
 		if (!ip->ip_sum)
 			ip->ip_sum = in_cksum(m, hlen);
 		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst,
 					  ro->ro_rt);
 		goto done;
 	}
 	/*
 	 * Too large for interface; fragment if possible.
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	ip_off = ntohs(ip->ip_off);
 	if (ip_off & IP_DF) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 	len = (ifp->if_mtu - hlen) &~ 7;
 	if (len < 8) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 
     {
 	int mhlen, firstlen = len;
 	struct mbuf **mnext = &m->m_act;
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 */
 	m0 = m;
 	mhlen = sizeof (struct ip);
 	for (off = hlen + len; off < ip->ip_len; off += len) {
 #ifdef MGETHDR
 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
 #else
 		MGET(m, M_DONTWAIT, MT_HEADER);
 #endif
 		if (m == 0) {
 			m = m0;
 			error = ENOBUFS;
 			goto bad;
 		}
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		bcopy((char *)ip, (char *)mhip, sizeof(*ip));
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			IP_HL_A(mhip, mhlen >> 2);
 		}
 		m->m_len = mhlen;
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ip->ip_len)
 			len = ip->ip_len - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		*mnext = m;
 		m->m_next = m_copy(m0, off, len);
 		if (m->m_next == 0) {
 			error = ENOBUFS;	/* ??? */
 			goto sendorfree;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 		mhip->ip_off = htons((u_short)mhip->ip_off);
 		mhip->ip_sum = 0;
 		mhip->ip_sum = in_cksum(m, mhlen);
 		mnext = &m->m_act;
 	}
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header, then send each fragment (in order).
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	ip->ip_len = htons((u_short)(hlen + firstlen));
 	ip->ip_off = htons((u_short)IP_MF);
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(m0, hlen);
 sendorfree:
 	for (m = m0; m; m = m0) {
 		m0 = m->m_act;
 		m->m_act = 0;
 		if (error == 0)
 			error = (*ifp->if_output)(ifp, m,
 			    (struct sockaddr *)dst, ro->ro_rt);
 		else
 			FREE_MB_T(m);
 	}
     }	
 done:
 	if (!error)
 		fr_frouteok[0]++;
 	else
 		fr_frouteok[1]++;
 
 	if ((ro != NULL) && (ro->ro_rt != NULL)) {
 		RTFREE(ro->ro_rt);
 	}
 	*mpp = NULL;
 	return 0;
 bad:
 	if (error == EMSGSIZE) {
 		sifp = fin->fin_ifp;
 		code = fin->fin_icode;
 		fin->fin_icode = ICMP_UNREACH_NEEDFRAG;
 		fin->fin_ifp = ifp;
 		(void) fr_send_icmp_err(ICMP_UNREACH, fin, 1);
 		fin->fin_ifp = sifp;
 		fin->fin_icode = code;
 	}
 	FREE_MB_T(m);
 	goto done;
 }
 
 
 int fr_verifysrc(fin)
 fr_info_t *fin;
 {
 	struct sockaddr_in *dst;
 	struct route iproute;
 
 	bzero((char *)&iproute, sizeof(iproute));
 	dst = (struct sockaddr_in *)&iproute.ro_dst;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_family = AF_INET;
 	dst->sin_addr = fin->fin_src;
-	rtalloc(&iproute);
+	in_rtalloc(&iproute, 0);
 	if (iproute.ro_rt == NULL)
 		return 0;
 	return (fin->fin_ifp == iproute.ro_rt->rt_ifp);
 }
 
 
 /*
  * return the first IP Address associated with an interface
  */
 int fr_ifpaddr(v, atype, ifptr, inp, inpmask)
 int v, atype;
 void *ifptr;
 struct in_addr *inp, *inpmask;
 {
 #ifdef USE_INET6
 	struct in6_addr *inp6 = NULL;
 #endif
 	struct sockaddr *sock, *mask;
 	struct sockaddr_in *sin;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 
 	if ((ifptr == NULL) || (ifptr == (void *)-1))
 		return -1;
 
 	sin = NULL;
 	ifp = ifptr;
 
 	if (v == 4)
 		inp->s_addr = 0;
 #ifdef USE_INET6
 	else if (v == 6)
 		bzero((char *)inp, sizeof(struct in6_addr));
 #endif
 #if  (__FreeBSD_version >= 300000)
 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
 #else
 	ifa = ifp->if_addrlist;
 #endif /* __FreeBSD_version >= 300000 */
 
 	sock = ifa->ifa_addr;
 	while (sock != NULL && ifa != NULL) {
 		sin = (struct sockaddr_in *)sock;
 		if ((v == 4) && (sin->sin_family == AF_INET))
 			break;
 #ifdef USE_INET6
 		if ((v == 6) && (sin->sin_family == AF_INET6)) {
 			inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr;
 			if (!IN6_IS_ADDR_LINKLOCAL(inp6) &&
 			    !IN6_IS_ADDR_LOOPBACK(inp6))
 				break;
 		}
 #endif
 #if (__FreeBSD_version >= 300000)
 		ifa = TAILQ_NEXT(ifa, ifa_link);
 #else
 		ifa = ifa->ifa_next;
 #endif /* __FreeBSD_version >= 300000 */
 		if (ifa != NULL)
 			sock = ifa->ifa_addr;
 	}
 
 	if (ifa == NULL || sin == NULL)
 		return -1;
 
 	mask = ifa->ifa_netmask;
 	if (atype == FRI_BROADCAST)
 		sock = ifa->ifa_broadaddr;
 	else if (atype == FRI_PEERADDR)
 		sock = ifa->ifa_dstaddr;
 
 	if (sock == NULL)
 		return -1;
 
 #ifdef USE_INET6
 	if (v == 6) {
 		return fr_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock,
 					(struct sockaddr_in6 *)mask,
 					inp, inpmask);
 	}
 #endif
 	return fr_ifpfillv4addr(atype, (struct sockaddr_in *)sock,
 				(struct sockaddr_in *)mask, inp, inpmask);
 }
 
 
 u_32_t fr_newisn(fin)
 fr_info_t *fin;
 {
 	u_32_t newiss;
 #if  (__FreeBSD_version >= 400000)
 	newiss = arc4random();
 #else
 	static iss_seq_off = 0;
 	u_char hash[16];
 	MD5_CTX ctx;
 
 	/*
 	 * Compute the base value of the ISS.  It is a hash
 	 * of (saddr, sport, daddr, dport, secret).
 	 */
 	MD5Init(&ctx);
 
 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
 		  sizeof(fin->fin_fi.fi_src));
 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
 		  sizeof(fin->fin_fi.fi_dst));
 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
 
 	MD5Update(&ctx, ipf_iss_secret, sizeof(ipf_iss_secret));
 
 	MD5Final(hash, &ctx);
 
 	memcpy(&newiss, hash, sizeof(newiss));
 
 	/*
 	 * Now increment our "timer", and add it in to
 	 * the computed value.
 	 *
 	 * XXX Use `addin'?
 	 * XXX TCP_ISSINCR too large to use?
 	 */
 	iss_seq_off += 0x00010000;
 	newiss += iss_seq_off;
 #endif
 	return newiss;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    fr_nextipid                                                 */
 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
 /* Parameters:  fin(I) - pointer to packet information                      */
 /*                                                                          */
 /* Returns the next IPv4 ID to use for this packet.                         */
 /* ------------------------------------------------------------------------ */
 u_short fr_nextipid(fin)
 fr_info_t *fin;
 {
 #ifndef	RANDOM_IP_ID
 	static u_short ipid = 0;
 	u_short id;
 
 	MUTEX_ENTER(&ipf_rw);
 	id = ipid++;
 	MUTEX_EXIT(&ipf_rw);
 #else
 	u_short id;
 
 	id = ip_randomid();
 #endif
 
 	return id;
 }
 
 
 INLINE void fr_checkv4sum(fin)
 fr_info_t *fin;
 {
 #ifdef CSUM_DATA_VALID
 	int manual = 0;
 	u_short sum;
 	ip_t *ip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_NOCKSUM) != 0)
 		return;
 
 	if (fin->fin_cksum != 0)
 		return;
 
 	m = fin->fin_m;
 	if (m == NULL) {
 		manual = 1;
 		goto skipauto;
 	}
 	ip = fin->fin_ip;
 
 	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			sum = m->m_pkthdr.csum_data;
 		else
 			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 					htonl(m->m_pkthdr.csum_data +
 					fin->fin_ip->ip_len + fin->fin_p));
 		sum ^= 0xffff;
 		if (sum != 0) {
 			fin->fin_flx |= FI_BAD;
 			fin->fin_cksum = -1;
 		} else {
 			fin->fin_cksum = 1;
 		}
 	} else
 		manual = 1;
 skipauto:
 # ifdef IPFILTER_CKSUM
 	if (manual != 0)
 		if (fr_checkl4sum(fin) == -1)
 			fin->fin_flx |= FI_BAD;
 # else
 	;
 # endif
 #else
 # ifdef IPFILTER_CKSUM
 	if (fr_checkl4sum(fin) == -1)
 		fin->fin_flx |= FI_BAD;
 # endif
 #endif
 }
 
 
 #ifdef USE_INET6
 INLINE void fr_checkv6sum(fin)
 fr_info_t *fin;
 {
 # ifdef IPFILTER_CKSUM
 	if (fr_checkl4sum(fin) == -1)
 		fin->fin_flx |= FI_BAD;
 # endif
 }
 #endif /* USE_INET6 */
 
 
 size_t mbufchainlen(m0)
 struct mbuf *m0;
 {
 	size_t len;
 
 	if ((m0->m_flags & M_PKTHDR) != 0) {
 		len = m0->m_pkthdr.len;
 	} else {
 		struct mbuf *m;
 
 		for (m = m0, len = 0; m != NULL; m = m->m_next)
 			len += m->m_len;
 	}
 	return len;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    fr_pullup                                                   */
 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
 /*              fin(I) - pointer to packet information                      */
 /*              len(I) - number of bytes to pullup                          */
 /*                                                                          */
 /* Attempt to move at least len bytes (from the start of the buffer) into a */
 /* single buffer for ease of access.  Operating system native functions are */
 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
 /* and ONLY if the pullup succeeds.                                         */
 /*                                                                          */
 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
 /* of buffers that starts at *fin->fin_mp.                                  */
 /* ------------------------------------------------------------------------ */
 void *fr_pullup(min, fin, len)
 mb_t *min;
 fr_info_t *fin;
 int len;
 {
 	int out = fin->fin_out, dpoff, ipoff;
 	mb_t *m = min;
 	char *ip;
 
 	if (m == NULL)
 		return NULL;
 
 	ip = (char *)fin->fin_ip;
 	if ((fin->fin_flx & FI_COALESCE) != 0)
 		return ip;
 
 	ipoff = fin->fin_ipoff;
 	if (fin->fin_dp != NULL)
 		dpoff = (char *)fin->fin_dp - (char *)ip;
 	else
 		dpoff = 0;
 
 	if (M_LEN(m) < len) {
 #ifdef MHLEN
 		/*
 		 * Assume that M_PKTHDR is set and just work with what is left
 		 * rather than check..
 		 * Should not make any real difference, anyway.
 		 */
 		if (len > MHLEN)
 #else
 		if (len > MLEN)
 #endif
 		{
 #ifdef HAVE_M_PULLDOWN
 			if (m_pulldown(m, 0, len, NULL) == NULL)
 				m = NULL;
 #else
 			FREE_MB_T(*fin->fin_mp);
 			m = NULL;
 #endif
 		} else
 		{
 			m = m_pullup(m, len);
 		}
 		*fin->fin_mp = m;
 		if (m == NULL) {
 			fin->fin_m = NULL;
 			ATOMIC_INCL(frstats[out].fr_pull[1]);
 			return NULL;
 		}
 
 		while (M_LEN(m) == 0) {
 			m = m->m_next;
 		}
 		fin->fin_m = m;
 		ip = MTOD(m, char *) + ipoff;
 	}
 
 	ATOMIC_INCL(frstats[out].fr_pull[0]);
 	fin->fin_ip = (ip_t *)ip;
 	if (fin->fin_dp != NULL)
 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
 
 	if (len == fin->fin_plen)
 		fin->fin_flx |= FI_COALESCE;
 	return ip;
 }
 
 
 int ipf_inject(fin, m)
 fr_info_t *fin;
 mb_t *m;
 {
 	int error = 0;
 
 	if (fin->fin_out == 0) {
 #if (__FreeBSD_version >= 501000)
 		netisr_dispatch(NETISR_IP, m);
 #else
 		struct ifqueue *ifq;
 
 		ifq = &ipintrq;
 
 # ifdef _IF_QFULL
 		if (_IF_QFULL(ifq))
 # else
 		if (IF_QFULL(ifq))
 # endif
 		{
 # ifdef _IF_DROP
 			_IF_DROP(ifq);
 # else
 			IF_DROP(ifq);
 # endif
 			FREE_MB_T(m);
 			error = ENOBUFS;
 		} else {
 			IF_ENQUEUE(ifq, m);
 		}
 #endif
 	} else {
 		fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len);
 		fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off);
 #if (__FreeBSD_version >= 470102)
 		error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 #else
 		error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL);
 #endif
 	}
 
 	return error;
 }
 
 int ipf_pfil_unhook(void) {
 #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011)
 # if __FreeBSD_version >= 501108
 	struct pfil_head *ph_inet;
 #  ifdef USE_INET6
 	struct pfil_head *ph_inet6;
 #  endif
 # endif
 #endif
 
 #ifdef NETBSD_PF
 # if (__FreeBSD_version >= 500011)
 #  if (__FreeBSD_version >= 501108)
 	ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 	if (ph_inet != NULL)
 		pfil_remove_hook((void *)fr_check_wrapper, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet);
 #  else
 	pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT|PFIL_WAITOK,
 	    &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
 #  endif
 # else
 	pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT|PFIL_WAITOK);
 # endif
 # ifdef USE_INET6
 #  if (__FreeBSD_version >= 501108)
 	ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 	if (ph_inet6 != NULL)
 		pfil_remove_hook((void *)fr_check_wrapper6, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6);
 #  else
 	pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT|PFIL_WAITOK,
 				 &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh);
 #  endif
 # endif
 #endif
 
 	return (0);
 }
 
 int ipf_pfil_hook(void) {
 #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011)
 # if __FreeBSD_version >= 501108
 	struct pfil_head *ph_inet;
 #  ifdef USE_INET6
 	struct pfil_head *ph_inet6;
 #  endif
 # endif
 #endif
 
 # ifdef NETBSD_PF
 #  if __FreeBSD_version >= 500011
 #   if __FreeBSD_version >= 501108
 	ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 #    ifdef USE_INET6
 	ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 #    endif
 	if (ph_inet == NULL
 #    ifdef USE_INET6
 	    && ph_inet6 == NULL
 #    endif
 	   )
 		return ENODEV;
 
 	if (ph_inet != NULL)
 		pfil_add_hook((void *)fr_check_wrapper, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet);
 #  else
 	pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT|PFIL_WAITOK,
 			      &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
 #  endif
 #  else
 	pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT|PFIL_WAITOK);
 #  endif
 #  ifdef USE_INET6
 #   if __FreeBSD_version >= 501108
 	if (ph_inet6 != NULL)
 		pfil_add_hook((void *)fr_check_wrapper6, NULL,
 				      PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6);
 #   else
 	pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT|PFIL_WAITOK,
 			      &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh);
 #   endif
 #  endif
 # endif
 	return (0);
 }
 
 void
 ipf_event_reg(void)
 {
 #if (__FreeBSD_version >= 502103)
 	ipf_arrivetag =  EVENTHANDLER_REGISTER(ifnet_arrival_event, \
 					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_departtag =  EVENTHANDLER_REGISTER(ifnet_departure_event, \
 					       ipf_ifevent, NULL, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_clonetag =  EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \
 					      NULL, EVENTHANDLER_PRI_ANY);
 #endif
 }
 
 void
 ipf_event_dereg(void)
 {
 #if (__FreeBSD_version >= 502103)
 	if (ipf_arrivetag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipf_arrivetag);
 	}
 	if (ipf_departtag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag);
 	}
 	if (ipf_clonetag != NULL) {
 		EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag);
 	}
 #endif
 }
Index: head/sys/contrib/pf/net/pf.c
===================================================================
--- head/sys/contrib/pf/net/pf.c	(revision 178887)
+++ head/sys/contrib/pf/net/pf.c	(revision 178888)
@@ -1,7567 +1,7595 @@
 /*	$OpenBSD: pf.c,v 1.527 2007/02/22 15:23:23 pyr Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
  * Copyright (c) 2002,2003 Henning Brauer
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Effort sponsored in part by the Defense Advanced Research Projects
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
  */
 
 #ifdef __FreeBSD__
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #endif
 
 #ifdef __FreeBSD__
 #include "opt_mac.h"
 #include "opt_bpf.h"
 #include "opt_pf.h"
 
 #ifdef DEV_BPF
 #define	NBPFILTER	DEV_BPF
 #else
 #define	NBPFILTER	0
 #endif
 
 #ifdef DEV_PFLOG
 #define	NPFLOG		DEV_PFLOG
 #else
 #define	NPFLOG		0
 #endif
 
 #ifdef DEV_PFSYNC
 #define	NPFSYNC		DEV_PFSYNC
 #else
 #define	NPFSYNC		0
 #endif
 
 #else
 #include "bpfilter.h"
 #include "pflog.h"
 #include "pfsync.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/filio.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
 #include <sys/time.h>
 #ifdef __FreeBSD__
 #include <sys/sysctl.h>
 #include <sys/endian.h>
 #else
 #include <sys/pool.h>
 #endif
 #include <sys/proc.h>
 #ifdef __FreeBSD__
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #else
 #include <sys/rwlock.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/route.h>
 #ifndef __FreeBSD__
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/udp.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp_var.h>
 #include <netinet/icmp_var.h>
 #include <netinet/if_ether.h>
 
 #ifndef __FreeBSD__
 #include <dev/rndvar.h>
 #endif
 #include <net/pfvar.h>
 #include <net/if_pflog.h>
 
 #if NPFSYNC > 0
 #include <net/if_pfsync.h>
 #endif /* NPFSYNC > 0 */
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/in_pcb.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 #ifdef __FreeBSD__
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #endif
 #endif /* INET6 */
 
 #ifdef __FreeBSD__
 #include <machine/in_cksum.h>
 #include <sys/limits.h>
 #include <sys/ucred.h>
 #include <security/mac/mac_framework.h>
 
 extern int ip_optcopy(struct ip *, struct ip *);
 extern int debug_pfugidhack;
 #endif
 
 #define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
 
 /*
  * Global variables
  */
 
 struct pf_altqqueue	 pf_altqs[2];
 struct pf_palist	 pf_pabuf;
 struct pf_altqqueue	*pf_altqs_active;
 struct pf_altqqueue	*pf_altqs_inactive;
 struct pf_status	 pf_status;
 
 u_int32_t		 ticket_altqs_active;
 u_int32_t		 ticket_altqs_inactive;
 int			 altqs_inactive_open;
 u_int32_t		 ticket_pabuf;
 
 struct pf_anchor_stackframe {
 	struct pf_ruleset			*rs;
 	struct pf_rule				*r;
 	struct pf_anchor_node			*parent;
 	struct pf_anchor			*child;
 } pf_anchor_stack[64];
 
 #ifdef __FreeBSD__
 uma_zone_t		 pf_src_tree_pl, pf_rule_pl;
 uma_zone_t		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
 #else
 struct pool		 pf_src_tree_pl, pf_rule_pl;
 struct pool		 pf_state_pl, pf_altq_pl, pf_pooladdr_pl;
 #endif
 
 void			 pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
 
 void			 pf_init_threshold(struct pf_threshold *, u_int32_t,
 			    u_int32_t);
 void			 pf_add_threshold(struct pf_threshold *);
 int			 pf_check_threshold(struct pf_threshold *);
 
 void			 pf_change_ap(struct pf_addr *, u_int16_t *,
 			    u_int16_t *, u_int16_t *, struct pf_addr *,
 			    u_int16_t, u_int8_t, sa_family_t);
 int			 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
 			    struct tcphdr *, struct pf_state_peer *);
 #ifdef INET6
 void			 pf_change_a6(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, u_int8_t);
 #endif /* INET6 */
 void			 pf_change_icmp(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, struct pf_addr *, u_int16_t,
 			    u_int16_t *, u_int16_t *, u_int16_t *,
 			    u_int16_t *, u_int8_t, sa_family_t);
 #ifdef __FreeBSD__
 void			 pf_send_tcp(struct mbuf *,
 			    const struct pf_rule *, sa_family_t,
 #else
 void			 pf_send_tcp(const struct pf_rule *, sa_family_t,
 #endif
 			    const struct pf_addr *, const struct pf_addr *,
 			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
 			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
 			    u_int16_t, struct ether_header *, struct ifnet *);
 void			 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
 			    sa_family_t, struct pf_rule *);
 struct pf_rule		*pf_match_translation(struct pf_pdesc *, struct mbuf *,
 			    int, int, struct pfi_kif *,
 			    struct pf_addr *, u_int16_t, struct pf_addr *,
 			    u_int16_t, int);
 struct pf_rule		*pf_get_translation(struct pf_pdesc *, struct mbuf *,
 			    int, int, struct pfi_kif *, struct pf_src_node **,
 			    struct pf_addr *, u_int16_t,
 			    struct pf_addr *, u_int16_t,
 			    struct pf_addr *, u_int16_t *);
 int			 pf_test_tcp(struct pf_rule **, struct pf_state **,
 			    int, struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, struct pf_rule **,
 #ifdef __FreeBSD__
 			    struct pf_ruleset **, struct ifqueue *,
 			    struct inpcb *);
 #else
 			    struct pf_ruleset **, struct ifqueue *);
 #endif
 int			 pf_test_udp(struct pf_rule **, struct pf_state **,
 			    int, struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, struct pf_rule **,
 #ifdef __FreeBSD__
 			    struct pf_ruleset **, struct ifqueue *,
 			    struct inpcb *);
 #else
 			    struct pf_ruleset **, struct ifqueue *);
 #endif
 int			 pf_test_icmp(struct pf_rule **, struct pf_state **,
 			    int, struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **, struct ifqueue *);
 int			 pf_test_other(struct pf_rule **, struct pf_state **,
 			    int, struct pfi_kif *, struct mbuf *, int, void *,
 			    struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **, struct ifqueue *);
 int			 pf_test_fragment(struct pf_rule **, int,
 			    struct pfi_kif *, struct mbuf *, void *,
 			    struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **);
 int			 pf_test_state_tcp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
 int			 pf_test_state_udp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *);
 int			 pf_test_state_icmp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
 int			 pf_test_state_other(struct pf_state **, int,
 			    struct pfi_kif *, struct pf_pdesc *);
 int			 pf_match_tag(struct mbuf *, struct pf_rule *,
 			     struct pf_mtag *, int *);
 int			 pf_step_out_of_anchor(int *, struct pf_ruleset **,
 			     int, struct pf_rule **, struct pf_rule **,
 			     int *);
 void			 pf_hash(struct pf_addr *, struct pf_addr *,
 			    struct pf_poolhashkey *, sa_family_t);
 int			 pf_map_addr(u_int8_t, struct pf_rule *,
 			    struct pf_addr *, struct pf_addr *,
 			    struct pf_addr *, struct pf_src_node **);
 int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
 			    struct pf_addr *, struct pf_addr *, u_int16_t,
 			    struct pf_addr *, u_int16_t*, u_int16_t, u_int16_t,
 			    struct pf_src_node **);
 void			 pf_route(struct mbuf **, struct pf_rule *, int,
 			    struct ifnet *, struct pf_state *,
 			    struct pf_pdesc *);
 void			 pf_route6(struct mbuf **, struct pf_rule *, int,
 			    struct ifnet *, struct pf_state *,
 			    struct pf_pdesc *);
 #ifdef __FreeBSD__
 /* XXX: import */
 #else
 int			 pf_socket_lookup(int, struct pf_pdesc *);
 #endif
 u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
 			    sa_family_t);
 u_int16_t		 pf_get_mss(struct mbuf *, int, u_int16_t,
 			    sa_family_t);
 u_int16_t		 pf_calc_mss(struct pf_addr *, sa_family_t,
 				u_int16_t);
 void			 pf_set_rt_ifp(struct pf_state *,
 			    struct pf_addr *);
 int			 pf_check_proto_cksum(struct mbuf *, int, int,
 			    u_int8_t, sa_family_t);
 int			 pf_addr_wrap_neq(struct pf_addr_wrap *,
 			    struct pf_addr_wrap *);
 struct pf_state		*pf_find_state_recurse(struct pfi_kif *,
 			    struct pf_state_cmp *, u_int8_t);
 int			 pf_src_connlimit(struct pf_state **);
 int			 pf_check_congestion(struct ifqueue *);
 
 #ifdef __FreeBSD__
 int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
 
 extern int pf_end_threads;
 
 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX];
 #else
 extern struct pool pfr_ktable_pl;
 extern struct pool pfr_kentry_pl;
 
 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
 	{ &pf_state_pl, PFSTATE_HIWAT },
 	{ &pf_src_tree_pl, PFSNODE_HIWAT },
 	{ &pf_frent_pl, PFFRAG_FRENT_HIWAT },
 	{ &pfr_ktable_pl, PFR_KTABLE_HIWAT },
 	{ &pfr_kentry_pl, PFR_KENTRY_HIWAT }
 };
 #endif
 
 #define STATE_LOOKUP()							\
 	do {								\
 		if (direction == PF_IN)					\
 			*state = pf_find_state_recurse(			\
 			    kif, &key, PF_EXT_GWY);			\
 		else							\
 			*state = pf_find_state_recurse(			\
 			    kif, &key, PF_LAN_EXT);			\
 		if (*state == NULL || (*state)->timeout == PFTM_PURGE)	\
 			return (PF_DROP);				\
 		if (direction == PF_OUT &&				\
 		    (((*state)->rule.ptr->rt == PF_ROUTETO &&		\
 		    (*state)->rule.ptr->direction == PF_OUT) ||		\
 		    ((*state)->rule.ptr->rt == PF_REPLYTO &&		\
 		    (*state)->rule.ptr->direction == PF_IN)) &&		\
 		    (*state)->rt_kif != NULL &&				\
 		    (*state)->rt_kif != kif)				\
 			return (PF_PASS);				\
 	} while (0)
 
 #define	STATE_TRANSLATE(s) \
 	(s)->lan.addr.addr32[0] != (s)->gwy.addr.addr32[0] || \
 	((s)->af == AF_INET6 && \
 	((s)->lan.addr.addr32[1] != (s)->gwy.addr.addr32[1] || \
 	(s)->lan.addr.addr32[2] != (s)->gwy.addr.addr32[2] || \
 	(s)->lan.addr.addr32[3] != (s)->gwy.addr.addr32[3])) || \
 	(s)->lan.port != (s)->gwy.port
 
 #define BOUND_IFACE(r, k) \
 	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
 
 #define STATE_INC_COUNTERS(s)				\
 	do {						\
 		s->rule.ptr->states++;			\
 		if (s->anchor.ptr != NULL)		\
 			s->anchor.ptr->states++;	\
 		if (s->nat_rule.ptr != NULL)		\
 			s->nat_rule.ptr->states++;	\
 	} while (0)
 
 #define STATE_DEC_COUNTERS(s)				\
 	do {						\
 		if (s->nat_rule.ptr != NULL)		\
 			s->nat_rule.ptr->states--;	\
 		if (s->anchor.ptr != NULL)		\
 			s->anchor.ptr->states--;	\
 		s->rule.ptr->states--;			\
 	} while (0)
 
 struct pf_src_tree tree_src_tracking;
 
 struct pf_state_tree_id tree_id;
 struct pf_state_queue state_list;
 
 #ifdef __FreeBSD__
 static int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
 static int pf_state_compare_lan_ext(struct pf_state *, struct pf_state *);
 static int pf_state_compare_ext_gwy(struct pf_state *, struct pf_state *);
 static int pf_state_compare_id(struct pf_state *, struct pf_state *);
 #endif
 
 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
 RB_GENERATE(pf_state_tree_lan_ext, pf_state,
     u.s.entry_lan_ext, pf_state_compare_lan_ext);
 RB_GENERATE(pf_state_tree_ext_gwy, pf_state,
     u.s.entry_ext_gwy, pf_state_compare_ext_gwy);
 RB_GENERATE(pf_state_tree_id, pf_state,
     u.s.entry_id, pf_state_compare_id);
 
 #ifdef __FreeBSD__
 static int
 #else
 static __inline int
 #endif
 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
 {
 	int	diff;
 
 	if (a->rule.ptr > b->rule.ptr)
 		return (1);
 	if (a->rule.ptr < b->rule.ptr)
 		return (-1);
 	if ((diff = a->af - b->af) != 0)
 		return (diff);
 	switch (a->af) {
 #ifdef INET
 	case AF_INET:
 		if (a->addr.addr32[0] > b->addr.addr32[0])
 			return (1);
 		if (a->addr.addr32[0] < b->addr.addr32[0])
 			return (-1);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (a->addr.addr32[3] > b->addr.addr32[3])
 			return (1);
 		if (a->addr.addr32[3] < b->addr.addr32[3])
 			return (-1);
 		if (a->addr.addr32[2] > b->addr.addr32[2])
 			return (1);
 		if (a->addr.addr32[2] < b->addr.addr32[2])
 			return (-1);
 		if (a->addr.addr32[1] > b->addr.addr32[1])
 			return (1);
 		if (a->addr.addr32[1] < b->addr.addr32[1])
 			return (-1);
 		if (a->addr.addr32[0] > b->addr.addr32[0])
 			return (1);
 		if (a->addr.addr32[0] < b->addr.addr32[0])
 			return (-1);
 		break;
 #endif /* INET6 */
 	}
 	return (0);
 }
 
 #ifdef __FreeBSD__
 static int
 #else
 static __inline int
 #endif
 pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b)
 {
 	int	diff;
 
 	if ((diff = a->proto - b->proto) != 0)
 		return (diff);
 	if ((diff = a->af - b->af) != 0)
 		return (diff);
 	switch (a->af) {
 #ifdef INET
 	case AF_INET:
 		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
 			return (1);
 		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
 			return (-1);
 		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
 			return (1);
 		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
 			return (-1);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3])
 			return (1);
 		if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3])
 			return (-1);
 		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
 			return (1);
 		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
 			return (-1);
 		if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2])
 			return (1);
 		if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2])
 			return (-1);
 		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
 			return (1);
 		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
 			return (-1);
 		if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1])
 			return (1);
 		if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1])
 			return (-1);
 		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
 			return (1);
 		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
 			return (-1);
 		if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0])
 			return (1);
 		if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0])
 			return (-1);
 		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
 			return (1);
 		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
 			return (-1);
 		break;
 #endif /* INET6 */
 	}
 
 	if ((diff = a->lan.port - b->lan.port) != 0)
 		return (diff);
 	if ((diff = a->ext.port - b->ext.port) != 0)
 		return (diff);
 
 	return (0);
 }
 
 #ifdef __FreeBSD__
 static int
 #else
 static __inline int
 #endif
 pf_state_compare_ext_gwy(struct pf_state *a, struct pf_state *b)
 {
 	int	diff;
 
 	if ((diff = a->proto - b->proto) != 0)
 		return (diff);
 	if ((diff = a->af - b->af) != 0)
 		return (diff);
 	switch (a->af) {
 #ifdef INET
 	case AF_INET:
 		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
 			return (1);
 		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
 			return (-1);
 		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
 			return (1);
 		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
 			return (-1);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3])
 			return (1);
 		if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3])
 			return (-1);
 		if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3])
 			return (1);
 		if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3])
 			return (-1);
 		if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2])
 			return (1);
 		if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2])
 			return (-1);
 		if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2])
 			return (1);
 		if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2])
 			return (-1);
 		if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1])
 			return (1);
 		if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1])
 			return (-1);
 		if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1])
 			return (1);
 		if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1])
 			return (-1);
 		if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0])
 			return (1);
 		if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0])
 			return (-1);
 		if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0])
 			return (1);
 		if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0])
 			return (-1);
 		break;
 #endif /* INET6 */
 	}
 
 	if ((diff = a->ext.port - b->ext.port) != 0)
 		return (diff);
 	if ((diff = a->gwy.port - b->gwy.port) != 0)
 		return (diff);
 
 	return (0);
 }
 
 #ifdef __FreeBSD__
 static int
 #else
 static __inline int
 #endif
 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
 {
 	if (a->id > b->id)
 		return (1);
 	if (a->id < b->id)
 		return (-1);
 	if (a->creatorid > b->creatorid)
 		return (1);
 	if (a->creatorid < b->creatorid)
 		return (-1);
 
 	return (0);
 }
 
 #ifdef INET6
 void
 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		dst->addr32[0] = src->addr32[0];
 		break;
 #endif /* INET */
 	case AF_INET6:
 		dst->addr32[0] = src->addr32[0];
 		dst->addr32[1] = src->addr32[1];
 		dst->addr32[2] = src->addr32[2];
 		dst->addr32[3] = src->addr32[3];
 		break;
 	}
 }
 #endif /* INET6 */
 
 struct pf_state *
 pf_find_state_byid(struct pf_state_cmp *key)
 {
 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
 	return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
 }
 
 struct pf_state *
 pf_find_state_recurse(struct pfi_kif *kif, struct pf_state_cmp *key, u_int8_t tree)
 {
 	struct pf_state *s;
 
 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
 
 	switch (tree) {
 	case PF_LAN_EXT:
 		if ((s = RB_FIND(pf_state_tree_lan_ext, &kif->pfik_lan_ext,
 		    (struct pf_state *)key)) != NULL)
 			return (s);
 		if ((s = RB_FIND(pf_state_tree_lan_ext, &pfi_all->pfik_lan_ext,
 		    (struct pf_state *)key)) != NULL)
 			return (s);
 		return (NULL);
 	case PF_EXT_GWY:
 		if ((s = RB_FIND(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy,
 		    (struct pf_state *)key)) != NULL)
 			return (s);
 		if ((s = RB_FIND(pf_state_tree_ext_gwy, &pfi_all->pfik_ext_gwy,
 		    (struct pf_state *)key)) != NULL)
 			return (s);
 		return (NULL);
 	default:
 		panic("pf_find_state_recurse");
 	}
 }
 
 struct pf_state *
 pf_find_state_all(struct pf_state_cmp *key, u_int8_t tree, int *more)
 {
 	struct pf_state *s, *ss = NULL;
 	struct pfi_kif	*kif;
 
 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
 
 	switch (tree) {
 	case PF_LAN_EXT:
 		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
 			s = RB_FIND(pf_state_tree_lan_ext,
 			    &kif->pfik_lan_ext, (struct pf_state *)key);
 			if (s == NULL)
 				continue;
 			if (more == NULL)
 				return (s);
 			ss = s;
 			(*more)++;
 		}
 		return (ss);
 	case PF_EXT_GWY:
 		TAILQ_FOREACH(kif, &pfi_statehead, pfik_w_states) {
 			s = RB_FIND(pf_state_tree_ext_gwy,
 			    &kif->pfik_ext_gwy, (struct pf_state *)key);
 			if (s == NULL)
 				continue;
 			if (more == NULL)
 				return (s);
 			ss = s;
 			(*more)++;
 		}
 		return (ss);
 	default:
 		panic("pf_find_state_all");
 	}
 }
 
 void
 pf_init_threshold(struct pf_threshold *threshold,
     u_int32_t limit, u_int32_t seconds)
 {
 	threshold->limit = limit * PF_THRESHOLD_MULT;
 	threshold->seconds = seconds;
 	threshold->count = 0;
 	threshold->last = time_second;
 }
 
 void
 pf_add_threshold(struct pf_threshold *threshold)
 {
 	u_int32_t t = time_second, diff = t - threshold->last;
 
 	if (diff >= threshold->seconds)
 		threshold->count = 0;
 	else
 		threshold->count -= threshold->count * diff /
 		    threshold->seconds;
 	threshold->count += PF_THRESHOLD_MULT;
 	threshold->last = t;
 }
 
 int
 pf_check_threshold(struct pf_threshold *threshold)
 {
 	return (threshold->count > threshold->limit);
 }
 
 int
 pf_src_connlimit(struct pf_state **state)
 {
 	struct pf_state	*s;
 	int bad = 0;
 
 	(*state)->src_node->conn++;
 	(*state)->src.tcp_est = 1;
 	pf_add_threshold(&(*state)->src_node->conn_rate);
 
 	if ((*state)->rule.ptr->max_src_conn &&
 	    (*state)->rule.ptr->max_src_conn <
 	    (*state)->src_node->conn) {
 		pf_status.lcounters[LCNT_SRCCONN]++;
 		bad++;
 	}
 
 	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
 	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
 		pf_status.lcounters[LCNT_SRCCONNRATE]++;
 		bad++;
 	}
 
 	if (!bad)
 		return (0);
 
 	if ((*state)->rule.ptr->overload_tbl) {
 		struct pfr_addr p;
 		u_int32_t	killed = 0;
 
 		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
 		if (pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf_src_connlimit: blocking address ");
 			pf_print_host(&(*state)->src_node->addr, 0,
 			    (*state)->af);
 		}
 
 		bzero(&p, sizeof(p));
 		p.pfra_af = (*state)->af;
 		switch ((*state)->af) {
 #ifdef INET
 		case AF_INET:
 			p.pfra_net = 32;
 			p.pfra_ip4addr = (*state)->src_node->addr.v4;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			p.pfra_net = 128;
 			p.pfra_ip6addr = (*state)->src_node->addr.v6;
 			break;
 #endif /* INET6 */
 		}
 
 		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
 		    &p, time_second);
 
 		/* kill existing states if that's required. */
 		if ((*state)->rule.ptr->flush) {
 			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
 
 			RB_FOREACH(s, pf_state_tree_id, &tree_id) {
 				/*
 				 * Kill states from this source.  (Only those
 				 * from the same rule if PF_FLUSH_GLOBAL is not
 				 * set)
 				 */
 				if (s->af == (*state)->af &&
 				    (((*state)->direction == PF_OUT &&
 				    PF_AEQ(&(*state)->src_node->addr,
 				    &s->lan.addr, s->af)) ||
 				    ((*state)->direction == PF_IN &&
 				    PF_AEQ(&(*state)->src_node->addr,
 				    &s->ext.addr, s->af))) &&
 				    ((*state)->rule.ptr->flush &
 				    PF_FLUSH_GLOBAL ||
 				    (*state)->rule.ptr == s->rule.ptr)) {
 					s->timeout = PFTM_PURGE;
 					s->src.state = s->dst.state =
 					    TCPS_CLOSED;
 					killed++;
 				}
 			}
 			if (pf_status.debug >= PF_DEBUG_MISC)
 				printf(", %u states killed", killed);
 		}
 		if (pf_status.debug >= PF_DEBUG_MISC)
 			printf("\n");
 	}
 
 	/* kill this state */
 	(*state)->timeout = PFTM_PURGE;
 	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
 	return (1);
 }
 
 int
 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
     struct pf_addr *src, sa_family_t af)
 {
 	struct pf_src_node	k;
 
 	if (*sn == NULL) {
 		k.af = af;
 		PF_ACPY(&k.addr, src, af);
 		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
 		    rule->rpool.opts & PF_POOL_STICKYADDR)
 			k.rule.ptr = rule;
 		else
 			k.rule.ptr = NULL;
 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
 	}
 	if (*sn == NULL) {
 		if (!rule->max_src_nodes ||
 		    rule->src_nodes < rule->max_src_nodes)
 			(*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT);
 		else
 			pf_status.lcounters[LCNT_SRCNODES]++;
 		if ((*sn) == NULL)
 			return (-1);
 		bzero(*sn, sizeof(struct pf_src_node));
 
 		pf_init_threshold(&(*sn)->conn_rate,
 		    rule->max_src_conn_rate.limit,
 		    rule->max_src_conn_rate.seconds);
 
 		(*sn)->af = af;
 		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
 		    rule->rpool.opts & PF_POOL_STICKYADDR)
 			(*sn)->rule.ptr = rule;
 		else
 			(*sn)->rule.ptr = NULL;
 		PF_ACPY(&(*sn)->addr, src, af);
 		if (RB_INSERT(pf_src_tree,
 		    &tree_src_tracking, *sn) != NULL) {
 			if (pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pf: src_tree insert failed: ");
 				pf_print_host(&(*sn)->addr, 0, af);
 				printf("\n");
 			}
 			pool_put(&pf_src_tree_pl, *sn);
 			return (-1);
 		}
 		(*sn)->creation = time_second;
 		(*sn)->ruletype = rule->action;
 		if ((*sn)->rule.ptr != NULL)
 			(*sn)->rule.ptr->src_nodes++;
 		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
 		pf_status.src_nodes++;
 	} else {
 		if (rule->max_src_states &&
 		    (*sn)->states >= rule->max_src_states) {
 			pf_status.lcounters[LCNT_SRCSTATES]++;
 			return (-1);
 		}
 	}
 	return (0);
 }
 
 int
 pf_insert_state(struct pfi_kif *kif, struct pf_state *state)
 {
 	/* Thou MUST NOT insert multiple duplicate keys */
 	state->u.s.kif = kif;
 	if (RB_INSERT(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state)) {
 		if (pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: state insert failed: tree_lan_ext");
 			printf(" lan: ");
 			pf_print_host(&state->lan.addr, state->lan.port,
 			    state->af);
 			printf(" gwy: ");
 			pf_print_host(&state->gwy.addr, state->gwy.port,
 			    state->af);
 			printf(" ext: ");
 			pf_print_host(&state->ext.addr, state->ext.port,
 			    state->af);
 			if (state->sync_flags & PFSTATE_FROMSYNC)
 				printf(" (from sync)");
 			printf("\n");
 		}
 		return (-1);
 	}
 
 	if (RB_INSERT(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state)) {
 		if (pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: state insert failed: tree_ext_gwy");
 			printf(" lan: ");
 			pf_print_host(&state->lan.addr, state->lan.port,
 			    state->af);
 			printf(" gwy: ");
 			pf_print_host(&state->gwy.addr, state->gwy.port,
 			    state->af);
 			printf(" ext: ");
 			pf_print_host(&state->ext.addr, state->ext.port,
 			    state->af);
 			if (state->sync_flags & PFSTATE_FROMSYNC)
 				printf(" (from sync)");
 			printf("\n");
 		}
 		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
 		return (-1);
 	}
 
 	if (state->id == 0 && state->creatorid == 0) {
 		state->id = htobe64(pf_status.stateid++);
 		state->creatorid = pf_status.hostid;
 	}
 	if (RB_INSERT(pf_state_tree_id, &tree_id, state) != NULL) {
 		if (pf_status.debug >= PF_DEBUG_MISC) {
 #ifdef __FreeBSD__
 			printf("pf: state insert failed: "
 			    "id: %016llx creatorid: %08x",
 			    (long long)be64toh(state->id),
 			    ntohl(state->creatorid));
 #else
 			printf("pf: state insert failed: "
 			    "id: %016llx creatorid: %08x",
 			    betoh64(state->id), ntohl(state->creatorid));
 #endif
 			if (state->sync_flags & PFSTATE_FROMSYNC)
 				printf(" (from sync)");
 			printf("\n");
 		}
 		RB_REMOVE(pf_state_tree_lan_ext, &kif->pfik_lan_ext, state);
 		RB_REMOVE(pf_state_tree_ext_gwy, &kif->pfik_ext_gwy, state);
 		return (-1);
 	}
 	TAILQ_INSERT_TAIL(&state_list, state, u.s.entry_list);
 	pf_status.fcounters[FCNT_STATE_INSERT]++;
 	pf_status.states++;
 	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
 #if NPFSYNC
 	pfsync_insert_state(state);
 #endif
 	return (0);
 }
 
 void
 pf_purge_thread(void *v)
 {
 	int nloops = 0, s;
 
 	for (;;) {
 		tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
 
 #ifdef __FreeBSD__
 		sx_slock(&pf_consistency_lock);
 		PF_LOCK();
 
 		if (pf_end_threads) {
 			pf_purge_expired_states(pf_status.states);
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes(0);
 			pf_end_threads++;
 
 			sx_sunlock(&pf_consistency_lock);
 			PF_UNLOCK();
 			wakeup(pf_purge_thread);
 			kproc_exit(0);
 		}
 #endif
 		s = splsoftnet();
 
 		/* process a fraction of the state table every second */
 		pf_purge_expired_states(1 + (pf_status.states
 		    / pf_default_rule.timeout[PFTM_INTERVAL]));
 
 		/* purge other expired types every PFTM_INTERVAL seconds */
 		if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes(0);
 			nloops = 0;
 		}
 
 		splx(s);
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 		sx_sunlock(&pf_consistency_lock);
 #endif
 	}
 }
 
 u_int32_t
 pf_state_expires(const struct pf_state *state)
 {
 	u_int32_t	timeout;
 	u_int32_t	start;
 	u_int32_t	end;
 	u_int32_t	states;
 
 	/* handle all PFTM_* > PFTM_MAX here */
 	if (state->timeout == PFTM_PURGE)
 		return (time_second);
 	if (state->timeout == PFTM_UNTIL_PACKET)
 		return (0);
 #ifdef __FreeBSD__	
 	KASSERT(state->timeout != PFTM_UNLINKED,
 	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
 	KASSERT((state->timeout < PFTM_MAX), 
 	    ("pf_state_expires: timeout > PFTM_MAX"));
 #else
 	KASSERT(state->timeout != PFTM_UNLINKED);
 	KASSERT(state->timeout < PFTM_MAX);
 #endif
 	timeout = state->rule.ptr->timeout[state->timeout];
 	if (!timeout)
 		timeout = pf_default_rule.timeout[state->timeout];
 	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
 	if (start) {
 		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
 		states = state->rule.ptr->states;
 	} else {
 		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
 		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
 		states = pf_status.states;
 	}
 	if (end && states > start && start < end) {
 		if (states < end)
 			return (state->expire + timeout * (end - states) /
 			    (end - start));
 		else
 			return (time_second);
 	}
 	return (state->expire + timeout);
 }
 
 void
 pf_purge_expired_src_nodes(int waslocked)
 {
 	 struct pf_src_node		*cur, *next;
 	 int				 locked = waslocked;
 
 	 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
 		 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
 
 		 if (cur->states <= 0 && cur->expire <= time_second) {
 			 if (! locked) {
 #ifdef __FreeBSD__
 				 if (!sx_try_upgrade(&pf_consistency_lock)) {
 					 PF_UNLOCK();
 					 sx_sunlock(&pf_consistency_lock);
 					 sx_xlock(&pf_consistency_lock);
 					 PF_LOCK();
 				 }
 #else
 				 rw_enter_write(&pf_consistency_lock);
 #endif
 			 	 next = RB_NEXT(pf_src_tree,
 				     &tree_src_tracking, cur);
 				 locked = 1;
 			 }
 			 if (cur->rule.ptr != NULL) {
 				 cur->rule.ptr->src_nodes--;
 				 if (cur->rule.ptr->states <= 0 &&
 				     cur->rule.ptr->max_src_nodes <= 0)
 					 pf_rm_rule(NULL, cur->rule.ptr);
 			 }
 			 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
 			 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 			 pf_status.src_nodes--;
 			 pool_put(&pf_src_tree_pl, cur);
 		 }
 	 }
 
 	 if (locked && !waslocked)
 #ifdef __FreeBSD__
 		sx_downgrade(&pf_consistency_lock);
 #else
 		rw_exit_write(&pf_consistency_lock);
 #endif
 }
 
 void
 pf_src_tree_remove_state(struct pf_state *s)
 {
 	u_int32_t timeout;
 
 	if (s->src_node != NULL) {
 		if (s->proto == IPPROTO_TCP) {
 			if (s->src.tcp_est)
 				--s->src_node->conn;
 		}
 		if (--s->src_node->states <= 0) {
 			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
 			if (!timeout)
 				timeout =
 				    pf_default_rule.timeout[PFTM_SRC_NODE];
 			s->src_node->expire = time_second + timeout;
 		}
 	}
 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
 		if (--s->nat_src_node->states <= 0) {
 			timeout = s->rule.ptr->timeout[PFTM_SRC_NODE];
 			if (!timeout)
 				timeout =
 				    pf_default_rule.timeout[PFTM_SRC_NODE];
 			s->nat_src_node->expire = time_second + timeout;
 		}
 	}
 	s->src_node = s->nat_src_node = NULL;
 }
 
 /* callers should be at splsoftnet */
 void
 pf_unlink_state(struct pf_state *cur)
 {
 #ifdef __FreeBSD__
 	if (cur->local_flags & PFSTATE_EXPIRING)
 		return;
 	cur->local_flags |= PFSTATE_EXPIRING;
 #endif
 	if (cur->src.state == PF_TCPS_PROXY_DST) {
 #ifdef __FreeBSD__
 		pf_send_tcp(NULL, cur->rule.ptr, cur->af,
 #else
 		pf_send_tcp(cur->rule.ptr, cur->af,
 #endif
 		    &cur->ext.addr, &cur->lan.addr,
 		    cur->ext.port, cur->lan.port,
 		    cur->src.seqhi, cur->src.seqlo + 1,
 		    TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
 	}
 	RB_REMOVE(pf_state_tree_ext_gwy,
 	    &cur->u.s.kif->pfik_ext_gwy, cur);
 	RB_REMOVE(pf_state_tree_lan_ext,
 	    &cur->u.s.kif->pfik_lan_ext, cur);
 	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
 #if NPFSYNC
 	if (cur->creatorid == pf_status.hostid)
 		pfsync_delete_state(cur);
 #endif
 	cur->timeout = PFTM_UNLINKED;
 	pf_src_tree_remove_state(cur);
 }
 
 /* callers should be at splsoftnet and hold the
  * write_lock on pf_consistency_lock */
 void
 pf_free_state(struct pf_state *cur)
 {
 #if NPFSYNC
 	if (pfsyncif != NULL &&
 	    (pfsyncif->sc_bulk_send_next == cur ||
 	    pfsyncif->sc_bulk_terminator == cur))
 		return;
 #endif
 #ifdef __FreeBSD__
 	KASSERT(cur->timeout == PFTM_UNLINKED,
 	    ("pf_free_state: cur->timeout != PFTM_UNLINKED"));
 #else
 	KASSERT(cur->timeout == PFTM_UNLINKED);
 #endif
 	if (--cur->rule.ptr->states <= 0 &&
 	    cur->rule.ptr->src_nodes <= 0)
 		pf_rm_rule(NULL, cur->rule.ptr);
 	if (cur->nat_rule.ptr != NULL)
 		if (--cur->nat_rule.ptr->states <= 0 &&
 			cur->nat_rule.ptr->src_nodes <= 0)
 			pf_rm_rule(NULL, cur->nat_rule.ptr);
 	if (cur->anchor.ptr != NULL)
 		if (--cur->anchor.ptr->states <= 0)
 			pf_rm_rule(NULL, cur->anchor.ptr);
 	pf_normalize_tcp_cleanup(cur);
 	pfi_kif_unref(cur->u.s.kif, PFI_KIF_REF_STATE);
 	TAILQ_REMOVE(&state_list, cur, u.s.entry_list);
 	if (cur->tag)
 		pf_tag_unref(cur->tag);
 	pool_put(&pf_state_pl, cur);
 	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
 	pf_status.states--;
 }
 
 void
 pf_purge_expired_states(u_int32_t maxcheck)
 {
 	static struct pf_state	*cur = NULL;
 	struct pf_state		*next;
 	int 			 locked = 0;
 
 	while (maxcheck--) {
 		/* wrap to start of list when we hit the end */
 		if (cur == NULL) {
 			cur = TAILQ_FIRST(&state_list);
 			if (cur == NULL)
 				break;	/* list empty */
 		}
 
 		/* get next state, as cur may get deleted */
 		next = TAILQ_NEXT(cur, u.s.entry_list);
 
 		if (cur->timeout == PFTM_UNLINKED) {
 			/* free unlinked state */
 			if (! locked) {
 #ifdef __FreeBSD__
 				 if (!sx_try_upgrade(&pf_consistency_lock)) {
 					 PF_UNLOCK();
 					 sx_sunlock(&pf_consistency_lock);
 					 sx_xlock(&pf_consistency_lock);
 					 PF_LOCK();
 				 }
 #else
 				rw_enter_write(&pf_consistency_lock);
 #endif
 				locked = 1;
 			}
 			pf_free_state(cur);
 		} else if (pf_state_expires(cur) <= time_second) {
 			/* unlink and free expired state */
 			pf_unlink_state(cur);
 			if (! locked) {
 #ifdef __FreeBSD__
 				 if (!sx_try_upgrade(&pf_consistency_lock)) {
 					 PF_UNLOCK();
 					 sx_sunlock(&pf_consistency_lock);
 					 sx_xlock(&pf_consistency_lock);
 					 PF_LOCK();
 				 }
 #else
 				rw_enter_write(&pf_consistency_lock);
 #endif
 				locked = 1;
 			}
 			pf_free_state(cur);
 		}
 		cur = next;
 	}
 
 	if (locked)
 #ifdef __FreeBSD__
 		sx_downgrade(&pf_consistency_lock);
 #else
 		rw_exit_write(&pf_consistency_lock);
 #endif
 }
 
 int
 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
 {
 	if (aw->type != PF_ADDR_TABLE)
 		return (0);
 	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL)
 		return (1);
 	return (0);
 }
 
 void
 pf_tbladdr_remove(struct pf_addr_wrap *aw)
 {
 	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
 		return;
 	pfr_detach_table(aw->p.tbl);
 	aw->p.tbl = NULL;
 }
 
 void
 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
 {
 	struct pfr_ktable *kt = aw->p.tbl;
 
 	if (aw->type != PF_ADDR_TABLE || kt == NULL)
 		return;
 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
 		kt = kt->pfrkt_root;
 	aw->p.tbl = NULL;
 	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
 		kt->pfrkt_cnt : -1;
 }
 
 void
 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		u_int32_t a = ntohl(addr->addr32[0]);
 		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
 		    (a>>8)&255, a&255);
 		if (p) {
 			p = ntohs(p);
 			printf(":%u", p);
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		u_int16_t b;
 		u_int8_t i, curstart = 255, curend = 0,
 		    maxstart = 0, maxend = 0;
 		for (i = 0; i < 8; i++) {
 			if (!addr->addr16[i]) {
 				if (curstart == 255)
 					curstart = i;
 				else
 					curend = i;
 			} else {
 				if (curstart) {
 					if ((curend - curstart) >
 					    (maxend - maxstart)) {
 						maxstart = curstart;
 						maxend = curend;
 						curstart = 255;
 					}
 				}
 			}
 		}
 		for (i = 0; i < 8; i++) {
 			if (i >= maxstart && i <= maxend) {
 				if (maxend != 7) {
 					if (i == maxstart)
 						printf(":");
 				} else {
 					if (i == maxend)
 						printf(":");
 				}
 			} else {
 				b = ntohs(addr->addr16[i]);
 				printf("%x", b);
 				if (i < 7)
 					printf(":");
 			}
 		}
 		if (p) {
 			p = ntohs(p);
 			printf("[%u]", p);
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 }
 
 void
 pf_print_state(struct pf_state *s)
 {
 	switch (s->proto) {
 	case IPPROTO_TCP:
 		printf("TCP ");
 		break;
 	case IPPROTO_UDP:
 		printf("UDP ");
 		break;
 	case IPPROTO_ICMP:
 		printf("ICMP ");
 		break;
 	case IPPROTO_ICMPV6:
 		printf("ICMPV6 ");
 		break;
 	default:
 		printf("%u ", s->proto);
 		break;
 	}
 	pf_print_host(&s->lan.addr, s->lan.port, s->af);
 	printf(" ");
 	pf_print_host(&s->gwy.addr, s->gwy.port, s->af);
 	printf(" ");
 	pf_print_host(&s->ext.addr, s->ext.port, s->af);
 	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
 	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
 	if (s->src.wscale && s->dst.wscale)
 		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
 	printf("]");
 	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
 	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
 	if (s->src.wscale && s->dst.wscale)
 		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
 	printf("]");
 	printf(" %u:%u", s->src.state, s->dst.state);
 }
 
 void
 pf_print_flags(u_int8_t f)
 {
 	if (f)
 		printf(" ");
 	if (f & TH_FIN)
 		printf("F");
 	if (f & TH_SYN)
 		printf("S");
 	if (f & TH_RST)
 		printf("R");
 	if (f & TH_PUSH)
 		printf("P");
 	if (f & TH_ACK)
 		printf("A");
 	if (f & TH_URG)
 		printf("U");
 	if (f & TH_ECE)
 		printf("E");
 	if (f & TH_CWR)
 		printf("W");
 }
 
 #define	PF_SET_SKIP_STEPS(i)					\
 	do {							\
 		while (head[i] != cur) {			\
 			head[i]->skip[i].ptr = cur;		\
 			head[i] = TAILQ_NEXT(head[i], entries);	\
 		}						\
 	} while (0)
 
 void
 pf_calc_skip_steps(struct pf_rulequeue *rules)
 {
 	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
 	int i;
 
 	cur = TAILQ_FIRST(rules);
 	prev = cur;
 	for (i = 0; i < PF_SKIP_COUNT; ++i)
 		head[i] = cur;
 	while (cur != NULL) {
 
 		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
 			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
 		if (cur->direction != prev->direction)
 			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
 		if (cur->af != prev->af)
 			PF_SET_SKIP_STEPS(PF_SKIP_AF);
 		if (cur->proto != prev->proto)
 			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
 		if (cur->src.neg != prev->src.neg ||
 		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
 		if (cur->src.port[0] != prev->src.port[0] ||
 		    cur->src.port[1] != prev->src.port[1] ||
 		    cur->src.port_op != prev->src.port_op)
 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
 		if (cur->dst.neg != prev->dst.neg ||
 		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
 			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
 		if (cur->dst.port[0] != prev->dst.port[0] ||
 		    cur->dst.port[1] != prev->dst.port[1] ||
 		    cur->dst.port_op != prev->dst.port_op)
 			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
 
 		prev = cur;
 		cur = TAILQ_NEXT(cur, entries);
 	}
 	for (i = 0; i < PF_SKIP_COUNT; ++i)
 		PF_SET_SKIP_STEPS(i);
 }
 
 int
 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
 {
 	if (aw1->type != aw2->type)
 		return (1);
 	switch (aw1->type) {
 	case PF_ADDR_ADDRMASK:
 		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0))
 			return (1);
 		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0))
 			return (1);
 		return (0);
 	case PF_ADDR_DYNIFTL:
 		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
 	case PF_ADDR_NOROUTE:
 	case PF_ADDR_URPFFAILED:
 		return (0);
 	case PF_ADDR_TABLE:
 		return (aw1->p.tbl != aw2->p.tbl);
 	case PF_ADDR_RTLABEL:
 		return (aw1->v.rtlabel != aw2->v.rtlabel);
 	default:
 		printf("invalid address type: %d\n", aw1->type);
 		return (1);
 	}
 }
 
 u_int16_t
 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
 {
 	u_int32_t	l;
 
 	if (udp && !cksum)
 		return (0x0000);
 	l = cksum + old - new;
 	l = (l >> 16) + (l & 65535);
 	l = l & 65535;
 	if (udp && !l)
 		return (0xFFFF);
 	return (l);
 }
 
 void
 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
     struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
 {
 	struct pf_addr	ao;
 	u_int16_t	po = *p;
 
 	PF_ACPY(&ao, a, af);
 	PF_ACPY(a, an, af);
 
 	*p = pn;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    ao.addr16[0], an->addr16[0], 0),
 		    ao.addr16[1], an->addr16[1], 0);
 		*p = pn;
 		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u),
 		    po, pn, u);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u),
 		    ao.addr16[2], an->addr16[2], u),
 		    ao.addr16[3], an->addr16[3], u),
 		    ao.addr16[4], an->addr16[4], u),
 		    ao.addr16[5], an->addr16[5], u),
 		    ao.addr16[6], an->addr16[6], u),
 		    ao.addr16[7], an->addr16[7], u),
 		    po, pn, u);
 		break;
 #endif /* INET6 */
 	}
 }
 
 
 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
 void
 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
 {
 	u_int32_t	ao;
 
 	memcpy(&ao, a, sizeof(ao));
 	memcpy(a, &an, sizeof(u_int32_t));
 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
 	    ao % 65536, an % 65536, u);
 }
 
 #ifdef INET6
 void
 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
 {
 	struct pf_addr	ao;
 
 	PF_ACPY(&ao, a, AF_INET6);
 	PF_ACPY(a, an, AF_INET6);
 
 	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 	    pf_cksum_fixup(pf_cksum_fixup(*c,
 	    ao.addr16[0], an->addr16[0], u),
 	    ao.addr16[1], an->addr16[1], u),
 	    ao.addr16[2], an->addr16[2], u),
 	    ao.addr16[3], an->addr16[3], u),
 	    ao.addr16[4], an->addr16[4], u),
 	    ao.addr16[5], an->addr16[5], u),
 	    ao.addr16[6], an->addr16[6], u),
 	    ao.addr16[7], an->addr16[7], u);
 }
 #endif /* INET6 */
 
 void
 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
 {
 	struct pf_addr	oia, ooa;
 
 	PF_ACPY(&oia, ia, af);
 	PF_ACPY(&ooa, oa, af);
 
 	/* Change inner protocol port, fix inner protocol checksum. */
 	if (ip != NULL) {
 		u_int16_t	oip = *ip;
 		u_int32_t	opc = 0;	/* make the compiler happy */
 
 		if (pc != NULL)
 			opc = *pc;
 		*ip = np;
 		if (pc != NULL)
 			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
 		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
 		if (pc != NULL)
 			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
 	}
 	/* Change inner ip address, fix inner ip and icmp checksums. */
 	PF_ACPY(ia, na, af);
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		u_int32_t	 oh2c = *h2c;
 
 		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
 		    oia.addr16[0], ia->addr16[0], 0),
 		    oia.addr16[1], ia->addr16[1], 0);
 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    oia.addr16[0], ia->addr16[0], 0),
 		    oia.addr16[1], ia->addr16[1], 0);
 		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    oia.addr16[0], ia->addr16[0], u),
 		    oia.addr16[1], ia->addr16[1], u),
 		    oia.addr16[2], ia->addr16[2], u),
 		    oia.addr16[3], ia->addr16[3], u),
 		    oia.addr16[4], ia->addr16[4], u),
 		    oia.addr16[5], ia->addr16[5], u),
 		    oia.addr16[6], ia->addr16[6], u),
 		    oia.addr16[7], ia->addr16[7], u);
 		break;
 #endif /* INET6 */
 	}
 	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
 	PF_ACPY(oa, na, af);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
 		    ooa.addr16[0], oa->addr16[0], 0),
 		    ooa.addr16[1], oa->addr16[1], 0);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    ooa.addr16[0], oa->addr16[0], u),
 		    ooa.addr16[1], oa->addr16[1], u),
 		    ooa.addr16[2], oa->addr16[2], u),
 		    ooa.addr16[3], oa->addr16[3], u),
 		    ooa.addr16[4], oa->addr16[4], u),
 		    ooa.addr16[5], oa->addr16[5], u),
 		    ooa.addr16[6], oa->addr16[6], u),
 		    ooa.addr16[7], oa->addr16[7], u);
 		break;
 #endif /* INET6 */
 	}
 }
 
 
 /*
  * Need to modulate the sequence numbers in the TCP SACK option
  * (credits to Krzysztof Pfaff for report and patch)
  */
 int
 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
     struct tcphdr *th, struct pf_state_peer *dst)
 {
 	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
 #ifdef __FreeBSD__
 	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
 #else
 	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
 #endif
 	int copyback = 0, i, olen;
 	struct sackblk sack;
 
 #define TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
 	if (hlen < TCPOLEN_SACKLEN ||
 	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
 		return 0;
 
 	while (hlen >= TCPOLEN_SACKLEN) {
 		olen = opt[1];
 		switch (*opt) {
 		case TCPOPT_EOL:	/* FALLTHROUGH */
 		case TCPOPT_NOP:
 			opt++;
 			hlen--;
 			break;
 		case TCPOPT_SACK:
 			if (olen > hlen)
 				olen = hlen;
 			if (olen >= TCPOLEN_SACKLEN) {
 				for (i = 2; i + TCPOLEN_SACK <= olen;
 				    i += TCPOLEN_SACK) {
 					memcpy(&sack, &opt[i], sizeof(sack));
 					pf_change_a(&sack.start, &th->th_sum,
 					    htonl(ntohl(sack.start) -
 					    dst->seqdiff), 0);
 					pf_change_a(&sack.end, &th->th_sum,
 					    htonl(ntohl(sack.end) -
 					    dst->seqdiff), 0);
 					memcpy(&opt[i], &sack, sizeof(sack));
 				}
 				copyback = 1;
 			}
 			/* FALLTHROUGH */
 		default:
 			if (olen < 2)
 				olen = 2;
 			hlen -= olen;
 			opt += olen;
 		}
 	}
 
 	if (copyback)
 #ifdef __FreeBSD__
 		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
 #else
 		m_copyback(m, off + sizeof(*th), thoptlen, opts);
 #endif
 	return (copyback);
 }
 
 void
 #ifdef __FreeBSD__
 pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
 #else
 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
 #endif
     const struct pf_addr *saddr, const struct pf_addr *daddr,
     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
     u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
 {
 	struct mbuf	*m;
 	int		 len, tlen;
 #ifdef INET
 	struct ip	*h;
 #endif /* INET */
 #ifdef INET6
 	struct ip6_hdr	*h6;
 #endif /* INET6 */
 	struct tcphdr	*th;
 	char		*opt;
 	struct pf_mtag	*pf_mtag;
 
 #ifdef __FreeBSD__
 	KASSERT(
 #ifdef INET
 	    af == AF_INET
 #else
 	    0
 #endif
 	    ||
 #ifdef INET6
 	    af == AF_INET6
 #else
 	    0
 #endif
 	    , ("Unsupported AF %d", af));
 	len = 0;
 	th = NULL;
 #ifdef INET
 	h = NULL;
 #endif
 #ifdef INET6
 	h6 = NULL;
 #endif
 #endif
 
 	/* maximum segment size tcp option */
 	tlen = sizeof(struct tcphdr);
 	if (mss)
 		tlen += 4;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		len = sizeof(struct ip) + tlen;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		len = sizeof(struct ip6_hdr) + tlen;
 		break;
 #endif /* INET6 */
 	}
 
 	/* create outgoing mbuf */
 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
 	if (m == NULL)
 		return;
 #ifdef __FreeBSD__
 #ifdef MAC
 	if (replyto)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;
 #endif
 #endif
 	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
 		m_freem(m);
 		return;
 	}
 	if (tag)
 #ifdef __FreeBSD__
 		m->m_flags |= M_SKIP_FIREWALL;
 #else
 		pf_mtag->flags |= PF_TAG_GENERATED;
 #endif
 
 	pf_mtag->tag = rtag;
 
 	if (r != NULL && r->rtableid >= 0)
+#ifdef __FreeBSD__
+	{
+		M_SETFIB(m, r->rtableid);
+#endif
 		pf_mtag->rtableid = r->rtableid;
+#ifdef __FreeBSD__
+	}
+#endif
 #ifdef ALTQ
 	if (r != NULL && r->qid) {
 		pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pf_mtag->af = af;
 		pf_mtag->hdr = mtod(m, struct ip *);
 	}
 #endif /* ALTQ */
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, len);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		h = mtod(m, struct ip *);
 
 		/* IP header fields included in the TCP checksum */
 		h->ip_p = IPPROTO_TCP;
 		h->ip_len = htons(tlen);
 		h->ip_src.s_addr = saddr->v4.s_addr;
 		h->ip_dst.s_addr = daddr->v4.s_addr;
 
 		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		h6 = mtod(m, struct ip6_hdr *);
 
 		/* IP header fields included in the TCP checksum */
 		h6->ip6_nxt = IPPROTO_TCP;
 		h6->ip6_plen = htons(tlen);
 		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
 		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
 
 		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
 		break;
 #endif /* INET6 */
 	}
 
 	/* TCP header */
 	th->th_sport = sport;
 	th->th_dport = dport;
 	th->th_seq = htonl(seq);
 	th->th_ack = htonl(ack);
 	th->th_off = tlen >> 2;
 	th->th_flags = flags;
 	th->th_win = htons(win);
 
 	if (mss) {
 		opt = (char *)(th + 1);
 		opt[0] = TCPOPT_MAXSEG;
 		opt[1] = 4;
 		HTONS(mss);
 		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
 	}
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		/* TCP checksum */
 		th->th_sum = in_cksum(m, len);
 
 		/* Finish the IP header */
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 #ifdef __FreeBSD__
 		h->ip_off = path_mtu_discovery ? IP_DF : 0;
 		h->ip_len = len;
 #else
 		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
 		h->ip_len = htons(len);
 #endif
 		h->ip_ttl = ttl ? ttl : ip_defttl;
 		h->ip_sum = 0;
 		if (eh == NULL) {
 #ifdef __FreeBSD__
 			PF_UNLOCK();
 			ip_output(m, (void *)NULL, (void *)NULL, 0,
 			    (void *)NULL, (void *)NULL);
 			PF_LOCK();
 #else /* ! __FreeBSD__ */
 			ip_output(m, (void *)NULL, (void *)NULL, 0,
 			    (void *)NULL, (void *)NULL);
 #endif
 		} else {
 			struct route		 ro;
 			struct rtentry		 rt;
 			struct ether_header	*e = (void *)ro.ro_dst.sa_data;
 
 			if (ifp == NULL) {
 				m_freem(m);
 				return;
 			}
 			rt.rt_ifp = ifp;
 			ro.ro_rt = &rt;
 			ro.ro_dst.sa_len = sizeof(ro.ro_dst);
 			ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT;
 			bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN);
 			bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN);
 			e->ether_type = eh->ether_type;
 #ifdef __FreeBSD__
 			PF_UNLOCK();
 			/* XXX_IMPORT: later */
 			ip_output(m, (void *)NULL, &ro, 0,
 			    (void *)NULL, (void *)NULL);
 			PF_LOCK();
 #else /* ! __FreeBSD__ */
 			ip_output(m, (void *)NULL, &ro, IP_ROUTETOETHER,
 			    (void *)NULL, (void *)NULL);
 #endif
 		}
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		/* TCP checksum */
 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
 		    sizeof(struct ip6_hdr), tlen);
 
 		h6->ip6_vfc |= IPV6_VERSION;
 		h6->ip6_hlim = IPV6_DEFHLIM;
 
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 		ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 		PF_LOCK();
 #else
 		ip6_output(m, NULL, NULL, 0, NULL, NULL);
 #endif
 		break;
 #endif /* INET6 */
 	}
 }
 
 void
 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
     struct pf_rule *r)
 {
 	struct pf_mtag	*pf_mtag;
 	struct mbuf	*m0;
 #ifdef __FreeBSD__
 	struct ip *ip;
 #endif
 
 #ifdef __FreeBSD__
 	m0 = m_copypacket(m, M_DONTWAIT);
 	if (m0 == NULL)
 		return;
 #else
 	m0 = m_copy(m, 0, M_COPYALL);
 #endif
 	if ((pf_mtag = pf_get_mtag(m0)) == NULL)
 		return;
 #ifdef __FreeBSD__
 	/* XXX: revisit */
 	m0->m_flags |= M_SKIP_FIREWALL;
 #else
 	pf_mtag->flags |= PF_TAG_GENERATED;
 #endif
 
 	if (r->rtableid >= 0)
+#ifdef __FreeBSD__
+	{
+		M_SETFIB(m0, r->rtableid);
+#endif
 		pf_mtag->rtableid = r->rtableid;
+#ifdef __FreeBSD__
+	}
+#endif
 
 #ifdef ALTQ
 	if (r->qid) {
 		pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pf_mtag->af = af;
 		pf_mtag->hdr = mtod(m0, struct ip *);
 	}
 #endif /* ALTQ */
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 #ifdef __FreeBSD__
 		/* icmp_error() expects host byte ordering */
 		ip = mtod(m0, struct ip *);
 		NTOHS(ip->ip_len);
 		NTOHS(ip->ip_off);
 		PF_UNLOCK();
 		icmp_error(m0, type, code, 0, 0);
 		PF_LOCK();
 #else
 		icmp_error(m0, type, code, 0, 0);
 #endif
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		icmp6_error(m0, type, code, 0);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 		break;
 #endif /* INET6 */
 	}
 }
 
 /*
  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
  * If n is 0, they match if they are equal. If n is != 0, they match if they
  * are different.
  */
 int
 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
     struct pf_addr *b, sa_family_t af)
 {
 	int	match = 0;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if ((a->addr32[0] & m->addr32[0]) ==
 		    (b->addr32[0] & m->addr32[0]))
 			match++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (((a->addr32[0] & m->addr32[0]) ==
 		     (b->addr32[0] & m->addr32[0])) &&
 		    ((a->addr32[1] & m->addr32[1]) ==
 		     (b->addr32[1] & m->addr32[1])) &&
 		    ((a->addr32[2] & m->addr32[2]) ==
 		     (b->addr32[2] & m->addr32[2])) &&
 		    ((a->addr32[3] & m->addr32[3]) ==
 		     (b->addr32[3] & m->addr32[3])))
 			match++;
 		break;
 #endif /* INET6 */
 	}
 	if (match) {
 		if (n)
 			return (0);
 		else
 			return (1);
 	} else {
 		if (n)
 			return (1);
 		else
 			return (0);
 	}
 }
 
 int
 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
 {
 	switch (op) {
 	case PF_OP_IRG:
 		return ((p > a1) && (p < a2));
 	case PF_OP_XRG:
 		return ((p < a1) || (p > a2));
 	case PF_OP_RRG:
 		return ((p >= a1) && (p <= a2));
 	case PF_OP_EQ:
 		return (p == a1);
 	case PF_OP_NE:
 		return (p != a1);
 	case PF_OP_LT:
 		return (p < a1);
 	case PF_OP_LE:
 		return (p <= a1);
 	case PF_OP_GT:
 		return (p > a1);
 	case PF_OP_GE:
 		return (p >= a1);
 	}
 	return (0); /* never reached */
 }
 
 int
 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
 {
 	NTOHS(a1);
 	NTOHS(a2);
 	NTOHS(p);
 	return (pf_match(op, a1, a2, p));
 }
 
 int
 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
 {
 	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
 		return (0);
 	return (pf_match(op, a1, a2, u));
 }
 
 int
 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
 {
 	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
 		return (0);
 	return (pf_match(op, a1, a2, g));
 }
 
 #ifndef __FreeBSD__
 struct pf_mtag *
 pf_find_mtag(struct mbuf *m)
 {
 	struct m_tag	*mtag;
 
 	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL)
 		return (NULL);
 
 	return ((struct pf_mtag *)(mtag + 1));
 }
 
 struct pf_mtag *
 pf_get_mtag(struct mbuf *m)
 {
 	struct m_tag	*mtag;
 
 	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) == NULL) {
 		mtag = m_tag_get(PACKET_TAG_PF, sizeof(struct pf_mtag),
 		    M_NOWAIT);
 		if (mtag == NULL)
 			return (NULL);
 		bzero(mtag + 1, sizeof(struct pf_mtag));
 		m_tag_prepend(m, mtag);
 	}
 
 	return ((struct pf_mtag *)(mtag + 1));
 }
 #endif
 
 int
 pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag,
     int *tag)
 {
 	if (*tag == -1)
 		*tag = pf_mtag->tag;
 
 	return ((!r->match_tag_not && r->match_tag == *tag) ||
 	    (r->match_tag_not && r->match_tag != *tag));
 }
 
 int
 pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, int rtableid)
 {
 	if (tag <= 0 && rtableid < 0)
 		return (0);
 
 	if (pf_mtag == NULL)
 		if ((pf_mtag = pf_get_mtag(m)) == NULL)
 			return (1);
 	if (tag > 0)
 		pf_mtag->tag = tag;
 	if (rtableid >= 0)
+#ifdef __FreeBSD__
+	{
+		M_SETFIB(m, rtableid);
+#endif
 		pf_mtag->rtableid = rtableid;
+#ifdef __FreeBSD__
+	}
+#endif
 
 	return (0);
 }
 
 static void
 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
     struct pf_rule **r, struct pf_rule **a,  int *match)
 {
 	struct pf_anchor_stackframe	*f;
 
 	(*r)->anchor->match = 0;
 	if (match)
 		*match = 0;
 	if (*depth >= sizeof(pf_anchor_stack) /
 	    sizeof(pf_anchor_stack[0])) {
 		printf("pf_step_into_anchor: stack overflow\n");
 		*r = TAILQ_NEXT(*r, entries);
 		return;
 	} else if (*depth == 0 && a != NULL)
 		*a = *r;
 	f = pf_anchor_stack + (*depth)++;
 	f->rs = *rs;
 	f->r = *r;
 	if ((*r)->anchor_wildcard) {
 		f->parent = &(*r)->anchor->children;
 		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
 		    NULL) {
 			*r = NULL;
 			return;
 		}
 		*rs = &f->child->ruleset;
 	} else {
 		f->parent = NULL;
 		f->child = NULL;
 		*rs = &(*r)->anchor->ruleset;
 	}
 	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
 }
 
 int
 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
     struct pf_rule **r, struct pf_rule **a, int *match)
 {
 	struct pf_anchor_stackframe	*f;
 	int quick = 0;
 
 	do {
 		if (*depth <= 0)
 			break;
 		f = pf_anchor_stack + *depth - 1;
 		if (f->parent != NULL && f->child != NULL) {
 			if (f->child->match ||
 			    (match != NULL && *match)) {
 				f->r->anchor->match = 1;
 				*match = 0;
 			}
 			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
 			if (f->child != NULL) {
 				*rs = &f->child->ruleset;
 				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
 				if (*r == NULL)
 					continue;
 				else
 					break;
 			}
 		}
 		(*depth)--;
 		if (*depth == 0 && a != NULL)
 			*a = NULL;
 		*rs = f->rs;
 		if (f->r->anchor->match || (match  != NULL && *match))
 			quick = f->r->quick;
 		*r = TAILQ_NEXT(f->r, entries);
 	} while (*r == NULL);
 
 	return (quick);
 }
 
 #ifdef INET6
 void
 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 		break;
 #endif /* INET */
 	case AF_INET6:
 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
 		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
 		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
 		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
 		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
 		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
 		break;
 	}
 }
 
 void
 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
 		break;
 #endif /* INET */
 	case AF_INET6:
 		if (addr->addr32[3] == 0xffffffff) {
 			addr->addr32[3] = 0;
 			if (addr->addr32[2] == 0xffffffff) {
 				addr->addr32[2] = 0;
 				if (addr->addr32[1] == 0xffffffff) {
 					addr->addr32[1] = 0;
 					addr->addr32[0] =
 					    htonl(ntohl(addr->addr32[0]) + 1);
 				} else
 					addr->addr32[1] =
 					    htonl(ntohl(addr->addr32[1]) + 1);
 			} else
 				addr->addr32[2] =
 				    htonl(ntohl(addr->addr32[2]) + 1);
 		} else
 			addr->addr32[3] =
 			    htonl(ntohl(addr->addr32[3]) + 1);
 		break;
 	}
 }
 #endif /* INET6 */
 
 #define mix(a,b,c) \
 	do {					\
 		a -= b; a -= c; a ^= (c >> 13);	\
 		b -= c; b -= a; b ^= (a << 8);	\
 		c -= a; c -= b; c ^= (b >> 13);	\
 		a -= b; a -= c; a ^= (c >> 12);	\
 		b -= c; b -= a; b ^= (a << 16);	\
 		c -= a; c -= b; c ^= (b >> 5);	\
 		a -= b; a -= c; a ^= (c >> 3);	\
 		b -= c; b -= a; b ^= (a << 10);	\
 		c -= a; c -= b; c ^= (b >> 15);	\
 	} while (0)
 
 /*
  * hash function based on bridge_hash in if_bridge.c
  */
 void
 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
     struct pf_poolhashkey *key, sa_family_t af)
 {
 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		a += inaddr->addr32[0];
 		b += key->key32[1];
 		mix(a, b, c);
 		hash->addr32[0] = c + key->key32[2];
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		a += inaddr->addr32[0];
 		b += inaddr->addr32[2];
 		mix(a, b, c);
 		hash->addr32[0] = c;
 		a += inaddr->addr32[1];
 		b += inaddr->addr32[3];
 		c += key->key32[1];
 		mix(a, b, c);
 		hash->addr32[1] = c;
 		a += inaddr->addr32[2];
 		b += inaddr->addr32[1];
 		c += key->key32[2];
 		mix(a, b, c);
 		hash->addr32[2] = c;
 		a += inaddr->addr32[3];
 		b += inaddr->addr32[0];
 		c += key->key32[3];
 		mix(a, b, c);
 		hash->addr32[3] = c;
 		break;
 #endif /* INET6 */
 	}
 }
 
 int
 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
 {
 	unsigned char		 hash[16];
 	struct pf_pool		*rpool = &r->rpool;
 	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
 	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
 	struct pf_pooladdr	*acur = rpool->cur;
 	struct pf_src_node	 k;
 
 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
 		k.af = af;
 		PF_ACPY(&k.addr, saddr, af);
 		if (r->rule_flag & PFRULE_RULESRCTRACK ||
 		    r->rpool.opts & PF_POOL_STICKYADDR)
 			k.rule.ptr = r;
 		else
 			k.rule.ptr = NULL;
 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
 		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
 			PF_ACPY(naddr, &(*sn)->raddr, af);
 			if (pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pf_map_addr: src tracking maps ");
 				pf_print_host(&k.addr, 0, af);
 				printf(" to ");
 				pf_print_host(naddr, 0, af);
 				printf("\n");
 			}
 			return (0);
 		}
 	}
 
 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
 		return (1);
 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
 			    (rpool->opts & PF_POOL_TYPEMASK) !=
 			    PF_POOL_ROUNDROBIN)
 				return (1);
 			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
 			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
 			    (rpool->opts & PF_POOL_TYPEMASK) !=
 			    PF_POOL_ROUNDROBIN)
 				return (1);
 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
 			break;
 #endif /* INET6 */
 		}
 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
 			return (1); /* unsupported */
 	} else {
 		raddr = &rpool->cur->addr.v.a.addr;
 		rmask = &rpool->cur->addr.v.a.mask;
 	}
 
 	switch (rpool->opts & PF_POOL_TYPEMASK) {
 	case PF_POOL_NONE:
 		PF_ACPY(naddr, raddr, af);
 		break;
 	case PF_POOL_BITMASK:
 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
 		break;
 	case PF_POOL_RANDOM:
 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				rpool->counter.addr32[0] = htonl(arc4random());
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				if (rmask->addr32[3] != 0xffffffff)
 					rpool->counter.addr32[3] =
 					    htonl(arc4random());
 				else
 					break;
 				if (rmask->addr32[2] != 0xffffffff)
 					rpool->counter.addr32[2] =
 					    htonl(arc4random());
 				else
 					break;
 				if (rmask->addr32[1] != 0xffffffff)
 					rpool->counter.addr32[1] =
 					    htonl(arc4random());
 				else
 					break;
 				if (rmask->addr32[0] != 0xffffffff)
 					rpool->counter.addr32[0] =
 					    htonl(arc4random());
 				break;
 #endif /* INET6 */
 			}
 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
 			PF_ACPY(init_addr, naddr, af);
 
 		} else {
 			PF_AINC(&rpool->counter, af);
 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
 		}
 		break;
 	case PF_POOL_SRCHASH:
 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
 		break;
 	case PF_POOL_ROUNDROBIN:
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
 			    &rpool->tblidx, &rpool->counter,
 			    &raddr, &rmask, af))
 				goto get_addr;
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
 			    &rpool->tblidx, &rpool->counter,
 			    &raddr, &rmask, af))
 				goto get_addr;
 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
 			goto get_addr;
 
 	try_next:
 		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
 			rpool->cur = TAILQ_FIRST(&rpool->list);
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			rpool->tblidx = -1;
 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
 			    &rpool->tblidx, &rpool->counter,
 			    &raddr, &rmask, af)) {
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
 				return (1);
 			}
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 			rpool->tblidx = -1;
 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
 			    &rpool->tblidx, &rpool->counter,
 			    &raddr, &rmask, af)) {
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
 				return (1);
 			}
 		} else {
 			raddr = &rpool->cur->addr.v.a.addr;
 			rmask = &rpool->cur->addr.v.a.mask;
 			PF_ACPY(&rpool->counter, raddr, af);
 		}
 
 	get_addr:
 		PF_ACPY(naddr, &rpool->counter, af);
 		if (init_addr != NULL && PF_AZERO(init_addr, af))
 			PF_ACPY(init_addr, naddr, af);
 		PF_AINC(&rpool->counter, af);
 		break;
 	}
 	if (*sn != NULL)
 		PF_ACPY(&(*sn)->raddr, naddr, af);
 
 	if (pf_status.debug >= PF_DEBUG_MISC &&
 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
 		printf("pf_map_addr: selected address ");
 		pf_print_host(naddr, 0, af);
 		printf("\n");
 	}
 
 	return (0);
 }
 
 int
 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
     struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
     struct pf_src_node **sn)
 {
 	struct pf_state_cmp	key;
 	struct pf_addr		init_addr;
 	u_int16_t		cut;
 
 	bzero(&init_addr, sizeof(init_addr));
 	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
 		return (1);
 
 	if (proto == IPPROTO_ICMP) {
 		low = 1;
 		high = 65535;
 	}
 
 	do {
 		key.af = af;
 		key.proto = proto;
 		PF_ACPY(&key.ext.addr, daddr, key.af);
 		PF_ACPY(&key.gwy.addr, naddr, key.af);
 		key.ext.port = dport;
 
 		/*
 		 * port search; start random, step;
 		 * similar 2 portloop in in_pcbbind
 		 */
 		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
 		    proto == IPPROTO_ICMP)) {
 			key.gwy.port = dport;
 			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
 				return (0);
 		} else if (low == 0 && high == 0) {
 			key.gwy.port = *nport;
 			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL)
 				return (0);
 		} else if (low == high) {
 			key.gwy.port = htons(low);
 			if (pf_find_state_all(&key, PF_EXT_GWY, NULL) == NULL) {
 				*nport = htons(low);
 				return (0);
 			}
 		} else {
 			u_int16_t tmp;
 
 			if (low > high) {
 				tmp = low;
 				low = high;
 				high = tmp;
 			}
 			/* low < high */
 			cut = htonl(arc4random()) % (1 + high - low) + low;
 			/* low <= cut <= high */
 			for (tmp = cut; tmp <= high; ++(tmp)) {
 				key.gwy.port = htons(tmp);
 				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
 				    NULL) {
 					*nport = htons(tmp);
 					return (0);
 				}
 			}
 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
 				key.gwy.port = htons(tmp);
 				if (pf_find_state_all(&key, PF_EXT_GWY, NULL) ==
 				    NULL) {
 					*nport = htons(tmp);
 					return (0);
 				}
 			}
 		}
 
 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
 		case PF_POOL_RANDOM:
 		case PF_POOL_ROUNDROBIN:
 			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn))
 				return (1);
 			break;
 		case PF_POOL_NONE:
 		case PF_POOL_SRCHASH:
 		case PF_POOL_BITMASK:
 		default:
 			return (1);
 		}
 	} while (! PF_AEQ(&init_addr, naddr, af) );
 
 	return (1);					/* none available */
 }
 
 struct pf_rule *
 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
     int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport,
     struct pf_addr *daddr, u_int16_t dport, int rs_num)
 {
 	struct pf_rule		*r, *rm = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	int			 tag = -1;
 	int			 rtableid = -1;
 	int			 asd = 0;
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
 	while (r && rm == NULL) {
 		struct pf_rule_addr	*src = NULL, *dst = NULL;
 		struct pf_addr_wrap	*xdst = NULL;
 
 		if (r->action == PF_BINAT && direction == PF_IN) {
 			src = &r->dst;
 			if (r->rpool.cur != NULL)
 				xdst = &r->rpool.cur->addr;
 		} else {
 			src = &r->src;
 			dst = &r->dst;
 		}
 
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != pd->af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
 		    src->neg, kif))
 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
 			    PF_SKIP_DST_ADDR].ptr;
 		else if (src->port_op && !pf_match_port(src->port_op,
 		    src->port[0], src->port[1], sport))
 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
 			    PF_SKIP_DST_PORT].ptr;
 		else if (dst != NULL &&
 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
 		    0, NULL))
 			r = TAILQ_NEXT(r, entries);
 		else if (dst != NULL && dst->port_op &&
 		    !pf_match_port(dst->port_op, dst->port[0],
 		    dst->port[1], dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
 		    off, pd->hdr.tcp), r->os_fingerprint)))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				rm = r;
 			} else
 				pf_step_into_anchor(&asd, &ruleset, rs_num,
 				    &r, NULL, NULL);
 		}
 		if (r == NULL)
 			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
 			    NULL, NULL);
 	}
 	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid))
 		return (NULL);
 	if (rm != NULL && (rm->action == PF_NONAT ||
 	    rm->action == PF_NORDR || rm->action == PF_NOBINAT))
 		return (NULL);
 	return (rm);
 }
 
 struct pf_rule *
 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction,
     struct pfi_kif *kif, struct pf_src_node **sn,
     struct pf_addr *saddr, u_int16_t sport,
     struct pf_addr *daddr, u_int16_t dport,
     struct pf_addr *naddr, u_int16_t *nport)
 {
 	struct pf_rule	*r = NULL;
 
 	if (direction == PF_OUT) {
 		r = pf_match_translation(pd, m, off, direction, kif, saddr,
 		    sport, daddr, dport, PF_RULESET_BINAT);
 		if (r == NULL)
 			r = pf_match_translation(pd, m, off, direction, kif,
 			    saddr, sport, daddr, dport, PF_RULESET_NAT);
 	} else {
 		r = pf_match_translation(pd, m, off, direction, kif, saddr,
 		    sport, daddr, dport, PF_RULESET_RDR);
 		if (r == NULL)
 			r = pf_match_translation(pd, m, off, direction, kif,
 			    saddr, sport, daddr, dport, PF_RULESET_BINAT);
 	}
 
 	if (r != NULL) {
 		switch (r->action) {
 		case PF_NONAT:
 		case PF_NOBINAT:
 		case PF_NORDR:
 			return (NULL);
 		case PF_NAT:
 			if (pf_get_sport(pd->af, pd->proto, r, saddr,
 			    daddr, dport, naddr, nport, r->rpool.proxy_port[0],
 			    r->rpool.proxy_port[1], sn)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: NAT proxy port allocation "
 				    "(%u-%u) failed\n",
 				    r->rpool.proxy_port[0],
 				    r->rpool.proxy_port[1]));
 				return (NULL);
 			}
 			break;
 		case PF_BINAT:
 			switch (direction) {
 			case PF_OUT:
 				if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
 					switch (pd->af) {
 #ifdef INET
 					case AF_INET:
 						if (r->rpool.cur->addr.p.dyn->
 						    pfid_acnt4 < 1)
 							return (NULL);
 						PF_POOLMASK(naddr,
 						    &r->rpool.cur->addr.p.dyn->
 						    pfid_addr4,
 						    &r->rpool.cur->addr.p.dyn->
 						    pfid_mask4,
 						    saddr, AF_INET);
 						break;
 #endif /* INET */
 #ifdef INET6
 					case AF_INET6:
 						if (r->rpool.cur->addr.p.dyn->
 						    pfid_acnt6 < 1)
 							return (NULL);
 						PF_POOLMASK(naddr,
 						    &r->rpool.cur->addr.p.dyn->
 						    pfid_addr6,
 						    &r->rpool.cur->addr.p.dyn->
 						    pfid_mask6,
 						    saddr, AF_INET6);
 						break;
 #endif /* INET6 */
 					}
 				} else
 					PF_POOLMASK(naddr,
 					    &r->rpool.cur->addr.v.a.addr,
 					    &r->rpool.cur->addr.v.a.mask,
 					    saddr, pd->af);
 				break;
 			case PF_IN:
 				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
 					switch (pd->af) {
 #ifdef INET
 					case AF_INET:
 						if (r->src.addr.p.dyn->
 						    pfid_acnt4 < 1)
 							return (NULL);
 						PF_POOLMASK(naddr,
 						    &r->src.addr.p.dyn->
 						    pfid_addr4,
 						    &r->src.addr.p.dyn->
 						    pfid_mask4,
 						    daddr, AF_INET);
 						break;
 #endif /* INET */
 #ifdef INET6
 					case AF_INET6:
 						if (r->src.addr.p.dyn->
 						    pfid_acnt6 < 1)
 							return (NULL);
 						PF_POOLMASK(naddr,
 						    &r->src.addr.p.dyn->
 						    pfid_addr6,
 						    &r->src.addr.p.dyn->
 						    pfid_mask6,
 						    daddr, AF_INET6);
 						break;
 #endif /* INET6 */
 					}
 				} else
 					PF_POOLMASK(naddr,
 					    &r->src.addr.v.a.addr,
 					    &r->src.addr.v.a.mask, daddr,
 					    pd->af);
 				break;
 			}
 			break;
 		case PF_RDR: {
 			if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn))
 				return (NULL);
 			if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
 			    PF_POOL_BITMASK)
 				PF_POOLMASK(naddr, naddr,
 				    &r->rpool.cur->addr.v.a.mask, daddr,
 				    pd->af);
 
 			if (r->rpool.proxy_port[1]) {
 				u_int32_t	tmp_nport;
 
 				tmp_nport = ((ntohs(dport) -
 				    ntohs(r->dst.port[0])) %
 				    (r->rpool.proxy_port[1] -
 				    r->rpool.proxy_port[0] + 1)) +
 				    r->rpool.proxy_port[0];
 
 				/* wrap around if necessary */
 				if (tmp_nport > 65535)
 					tmp_nport -= 65535;
 				*nport = htons((u_int16_t)tmp_nport);
 			} else if (r->rpool.proxy_port[0])
 				*nport = htons(r->rpool.proxy_port[0]);
 			break;
 		}
 		default:
 			return (NULL);
 		}
 	}
 
 	return (r);
 }
 
 int
 #ifdef __FreeBSD__
 pf_socket_lookup(int direction, struct pf_pdesc *pd, struct inpcb *inp_arg)
 #else
 pf_socket_lookup(int direction, struct pf_pdesc *pd)
 #endif
 {
 	struct pf_addr		*saddr, *daddr;
 	u_int16_t		 sport, dport;
 #ifdef __FreeBSD__
 	struct inpcbinfo	*pi;
 #else
 	struct inpcbtable	*tb;
 #endif
 	struct inpcb		*inp;
 
 	if (pd == NULL)
 		return (-1);
 	pd->lookup.uid = UID_MAX;
 	pd->lookup.gid = GID_MAX;
 	pd->lookup.pid = NO_PID;		/* XXX: revisit */
 #ifdef __FreeBSD__
 	if (inp_arg != NULL) {
 		INP_LOCK_ASSERT(inp_arg);
 		if (inp_arg->inp_socket) {
 			pd->lookup.uid = inp_arg->inp_socket->so_cred->cr_uid;
 			pd->lookup.gid =
 			    inp_arg->inp_socket->so_cred->cr_groups[0];
 			return (1);
 		} else
 			return (-1);
 	}
 #endif
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		if (pd->hdr.tcp == NULL)
 			return (-1);
 		sport = pd->hdr.tcp->th_sport;
 		dport = pd->hdr.tcp->th_dport;
 #ifdef __FreeBSD__
 		pi = &tcbinfo;
 #else
 		tb = &tcbtable;
 #endif
 		break;
 	case IPPROTO_UDP:
 		if (pd->hdr.udp == NULL)
 			return (-1);
 		sport = pd->hdr.udp->uh_sport;
 		dport = pd->hdr.udp->uh_dport;
 #ifdef __FreeBSD__
 		pi = &udbinfo;
 #else
 		tb = &udbtable;
 #endif
 		break;
 	default:
 		return (-1);
 	}
 	if (direction == PF_IN) {
 		saddr = pd->src;
 		daddr = pd->dst;
 	} else {
 		u_int16_t	p;
 
 		p = sport;
 		sport = dport;
 		dport = p;
 		saddr = pd->dst;
 		daddr = pd->src;
 	}
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET:
 #ifdef __FreeBSD__
 		INP_INFO_RLOCK(pi);	/* XXX LOR */
 		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4,
 			dport, 0, NULL);
 		if (inp == NULL) {
 			inp = in_pcblookup_hash(pi, saddr->v4, sport,
 			   daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
 			if(inp == NULL) {
 				INP_INFO_RUNLOCK(pi);
 				return (-1);
 			}
 		}
 #else
 		inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport);
 		if (inp == NULL) {
 			inp = in_pcblookup_listen(tb, daddr->v4, dport, 0);
 			if (inp == NULL)
 				return (-1);
 		}
 #endif
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 #ifdef __FreeBSD__
 		INP_INFO_RLOCK(pi);
 		inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
 			&daddr->v6, dport, 0, NULL);
 		if (inp == NULL) {
 			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
 			&daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
 			if (inp == NULL) {
 				INP_INFO_RUNLOCK(pi);
 				return (-1);
 			}
 		}
 #else
 		inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6,
 		    dport);
 		if (inp == NULL) {
 			inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0);
 			if (inp == NULL)
 				return (-1);
 		}
 #endif
 		break;
 #endif /* INET6 */
 
 	default:
 		return (-1);
 	}
 #ifdef __FreeBSD__
 	INP_RLOCK(inp);
 	INP_INFO_RUNLOCK(pi);
 	if ((inp->inp_socket == NULL) || (inp->inp_socket->so_cred == NULL)) {
 		INP_RUNLOCK(inp);
 		return (-1);
 	}
 	pd->lookup.uid = inp->inp_socket->so_cred->cr_uid;
 	pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0];
 	INP_RUNLOCK(inp);
 #else
 	pd->lookup.uid = inp->inp_socket->so_euid;
 	pd->lookup.gid = inp->inp_socket->so_egid;
 	pd->lookup.pid = inp->inp_socket->so_cpid;
 #endif
 	return (1);
 }
 
 u_int8_t
 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
 {
 	int		 hlen;
 	u_int8_t	 hdr[60];
 	u_int8_t	*opt, optlen;
 	u_int8_t	 wscale = 0;
 
 	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
 	if (hlen <= sizeof(struct tcphdr))
 		return (0);
 	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
 		return (0);
 	opt = hdr + sizeof(struct tcphdr);
 	hlen -= sizeof(struct tcphdr);
 	while (hlen >= 3) {
 		switch (*opt) {
 		case TCPOPT_EOL:
 		case TCPOPT_NOP:
 			++opt;
 			--hlen;
 			break;
 		case TCPOPT_WINDOW:
 			wscale = opt[2];
 			if (wscale > TCP_MAX_WINSHIFT)
 				wscale = TCP_MAX_WINSHIFT;
 			wscale |= PF_WSCALE_FLAG;
 			/* FALLTHROUGH */
 		default:
 			optlen = opt[1];
 			if (optlen < 2)
 				optlen = 2;
 			hlen -= optlen;
 			opt += optlen;
 			break;
 		}
 	}
 	return (wscale);
 }
 
 u_int16_t
 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
 {
 	int		 hlen;
 	u_int8_t	 hdr[60];
 	u_int8_t	*opt, optlen;
 	u_int16_t	 mss = tcp_mssdflt;
 
 	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
 	if (hlen <= sizeof(struct tcphdr))
 		return (0);
 	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
 		return (0);
 	opt = hdr + sizeof(struct tcphdr);
 	hlen -= sizeof(struct tcphdr);
 	while (hlen >= TCPOLEN_MAXSEG) {
 		switch (*opt) {
 		case TCPOPT_EOL:
 		case TCPOPT_NOP:
 			++opt;
 			--hlen;
 			break;
 		case TCPOPT_MAXSEG:
 			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
 			NTOHS(mss);
 			/* FALLTHROUGH */
 		default:
 			optlen = opt[1];
 			if (optlen < 2)
 				optlen = 2;
 			hlen -= optlen;
 			opt += optlen;
 			break;
 		}
 	}
 	return (mss);
 }
 
 u_int16_t
 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
 {
 #ifdef INET
 	struct sockaddr_in	*dst;
 	struct route		 ro;
 #endif /* INET */
 #ifdef INET6
 	struct sockaddr_in6	*dst6;
 	struct route_in6	 ro6;
 #endif /* INET6 */
 	struct rtentry		*rt = NULL;
 	int			 hlen = 0;	/* make the compiler happy */
 	u_int16_t		 mss = tcp_mssdflt;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		hlen = sizeof(struct ip);
 		bzero(&ro, sizeof(ro));
 		dst = (struct sockaddr_in *)&ro.ro_dst;
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = addr->v4;
 #ifdef __FreeBSD__
 #ifdef RTF_PRCLONING
 		rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING));
 #else /* !RTF_PRCLONING */
-		rtalloc_ign(&ro, RTF_CLONING);
+		in_rtalloc_ign(&ro, RTF_CLONING, 0);
 #endif
 #else /* ! __FreeBSD__ */
 		rtalloc_noclone(&ro, NO_CLONING);
 #endif
 		rt = ro.ro_rt;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		hlen = sizeof(struct ip6_hdr);
 		bzero(&ro6, sizeof(ro6));
 		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_len = sizeof(*dst6);
 		dst6->sin6_addr = addr->v6;
 #ifdef __FreeBSD__
 #ifdef RTF_PRCLONING
 		rtalloc_ign((struct route *)&ro6,
 		    (RTF_CLONING | RTF_PRCLONING));
 #else /* !RTF_PRCLONING */
 		rtalloc_ign((struct route *)&ro6, RTF_CLONING);
 #endif
 #else /* ! __FreeBSD__ */
 		rtalloc_noclone((struct route *)&ro6, NO_CLONING);
 #endif
 		rt = ro6.ro_rt;
 		break;
 #endif /* INET6 */
 	}
 
 	if (rt && rt->rt_ifp) {
 		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
 		mss = max(tcp_mssdflt, mss);
 		RTFREE(rt);
 	}
 	mss = min(mss, offer);
 	mss = max(mss, 64);		/* sanity - at least max opt space */
 	return (mss);
 }
 
 void
 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr)
 {
 	struct pf_rule *r = s->rule.ptr;
 
 	s->rt_kif = NULL;
 	if (!r->rt || r->rt == PF_FASTROUTE)
 		return;
 	switch (s->af) {
 #ifdef INET
 	case AF_INET:
 		pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL,
 		    &s->nat_src_node);
 		s->rt_kif = r->rpool.cur->kif;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL,
 		    &s->nat_src_node);
 		s->rt_kif = r->rpool.cur->kif;
 		break;
 #endif /* INET6 */
 	}
 }
 
 int
 pf_test_tcp(struct pf_rule **rm, struct pf_state **sm, int direction,
     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
 #ifdef __FreeBSD__
     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
     struct ifqueue *ifq, struct inpcb *inp)
 #else
     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
     struct ifqueue *ifq)
 #endif
 {
 	struct pf_rule		*nr = NULL;
 	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
 	struct tcphdr		*th = pd->hdr.tcp;
 	u_int16_t		 bport, nport = 0;
 	sa_family_t		 af = pd->af;
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_src_node	*nsn = NULL;
 	u_short			 reason;
 	int			 rewrite = 0;
 	int			 tag = -1, rtableid = -1;
 	u_int16_t		 mss = tcp_mssdflt;
 	int			 asd = 0;
 	int			 match = 0;
 
 	if (pf_check_congestion(ifq)) {
 		REASON_SET(&reason, PFRES_CONGEST);
 		return (PF_DROP);
 	}
 
 #ifdef __FreeBSD__
 	if (inp != NULL)
 		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
 	else if (debug_pfugidhack) {
 		PF_UNLOCK();
 		DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
 		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
 		PF_LOCK();
 	}
 #endif
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
 	if (direction == PF_OUT) {
 		bport = nport = th->th_sport;
 		/* check outgoing packet for BINAT/NAT */
 		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
 		    saddr, th->th_sport, daddr, th->th_dport,
 		    &pd->naddr, &nport)) != NULL) {
 			PF_ACPY(&pd->baddr, saddr, af);
 			pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
 			    &th->th_sum, &pd->naddr, nport, 0, af);
 			rewrite++;
 			if (nr->natpass)
 				r = NULL;
 			pd->nat_rule = nr;
 		}
 	} else {
 		bport = nport = th->th_dport;
 		/* check incoming packet for BINAT/RDR */
 		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
 		    saddr, th->th_sport, daddr, th->th_dport,
 		    &pd->naddr, &nport)) != NULL) {
 			PF_ACPY(&pd->baddr, daddr, af);
 			pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
 			    &th->th_sum, &pd->naddr, nport, 0, af);
 			rewrite++;
 			if (nr->natpass)
 				r = NULL;
 			pd->nat_rule = nr;
 		}
 	}
 
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != IPPROTO_TCP)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
 		    r->src.neg, kif))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
 		    r->src.port[0], r->src.port[1], th->th_sport))
 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
 		    r->dst.neg, NULL))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 		    r->dst.port[0], r->dst.port[1], th->th_dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->rule_flag & PFRULE_FRAGMENT)
 			r = TAILQ_NEXT(r, entries);
 		else if ((r->flagset & th->th_flags) != r->flags)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
 #ifdef __FreeBSD__
 		    pf_socket_lookup(direction, pd, inp), 1)) &&
 #else
 		    pf_socket_lookup(direction, pd), 1)) &&
 #endif
 		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
 		    pd->lookup.uid))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
 #ifdef __FreeBSD__
 		    pf_socket_lookup(direction, pd, inp), 1)) &&
 #else
 		    pf_socket_lookup(direction, pd), 1)) &&
 #endif
 		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
 		    pd->lookup.gid))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob && r->prob <= arc4random())
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
 		    pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(&asd, &ruleset,
 				    PF_RULESET_FILTER, &r, &a, &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
 		    PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
 		if (rewrite)
 #ifdef __FreeBSD__
 			m_copyback(m, off, sizeof(*th), (caddr_t)th);
 #else
 			m_copyback(m, off, sizeof(*th), th);
 #endif
 		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
 		    a, ruleset, pd);
 	}
 
 	if ((r->action == PF_DROP) &&
 	    ((r->rule_flag & PFRULE_RETURNRST) ||
 	    (r->rule_flag & PFRULE_RETURNICMP) ||
 	    (r->rule_flag & PFRULE_RETURN))) {
 		/* undo NAT changes, if they have taken place */
 		if (nr != NULL) {
 			if (direction == PF_OUT) {
 				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
 				    &th->th_sum, &pd->baddr, bport, 0, af);
 				rewrite++;
 			} else {
 				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
 				    &th->th_sum, &pd->baddr, bport, 0, af);
 				rewrite++;
 			}
 		}
 		if (((r->rule_flag & PFRULE_RETURNRST) ||
 		    (r->rule_flag & PFRULE_RETURN)) &&
 		    !(th->th_flags & TH_RST)) {
 			u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
 
 			if (th->th_flags & TH_SYN)
 				ack++;
 			if (th->th_flags & TH_FIN)
 				ack++;
 #ifdef __FreeBSD__
 			pf_send_tcp(m, r, af, pd->dst,
 #else
 			pf_send_tcp(r, af, pd->dst,
 #endif
 			    pd->src, th->th_dport, th->th_sport,
 			    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
 			    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
 		} else if ((af == AF_INET) && r->return_icmp)
 			pf_send_icmp(m, r->return_icmp >> 8,
 			    r->return_icmp & 255, af, r);
 		else if ((af == AF_INET6) && r->return_icmp6)
 			pf_send_icmp(m, r->return_icmp6 >> 8,
 			    r->return_icmp6 & 255, af, r);
 	}
 
 	if (r->action == PF_DROP)
 		return (PF_DROP);
 
 	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		return (PF_DROP);
 	}
 
 	if (r->keep_state || nr != NULL ||
 	    (pd->flags & PFDESC_TCP_NORM)) {
 		/* create new state */
 		u_int16_t	 len;
 		struct pf_state	*s = NULL;
 		struct pf_src_node *sn = NULL;
 
 		len = pd->tot_len - off - (th->th_off << 2);
 
 		/* check maximums */
 		if (r->max_states && (r->states >= r->max_states)) {
 			pf_status.lcounters[LCNT_STATES]++;
 			REASON_SET(&reason, PFRES_MAXSTATES);
 			goto cleanup;
 		}
 		/* src node for filter rule */
 		if ((r->rule_flag & PFRULE_SRCTRACK ||
 		    r->rpool.opts & PF_POOL_STICKYADDR) &&
 		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
 			REASON_SET(&reason, PFRES_SRCLIMIT);
 			goto cleanup;
 		}
 		/* src node for translation rule */
 		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
 		    ((direction == PF_OUT &&
 		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
 		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
 			REASON_SET(&reason, PFRES_SRCLIMIT);
 			goto cleanup;
 		}
 		s = pool_get(&pf_state_pl, PR_NOWAIT);
 		if (s == NULL) {
 			REASON_SET(&reason, PFRES_MEMORY);
 cleanup:
 			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
 				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 				pf_status.src_nodes--;
 				pool_put(&pf_src_tree_pl, sn);
 			}
 			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
 			    nsn->expire == 0) {
 				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 				pf_status.src_nodes--;
 				pool_put(&pf_src_tree_pl, nsn);
 			}
 			return (PF_DROP);
 		}
 		bzero(s, sizeof(*s));
 		s->rule.ptr = r;
 		s->nat_rule.ptr = nr;
 		s->anchor.ptr = a;
 		STATE_INC_COUNTERS(s);
 		s->allow_opts = r->allow_opts;
 		s->log = r->log & PF_LOG_ALL;
 		if (nr != NULL)
 			s->log |= nr->log & PF_LOG_ALL;
 		s->proto = IPPROTO_TCP;
 		s->direction = direction;
 		s->af = af;
 		if (direction == PF_OUT) {
 			PF_ACPY(&s->gwy.addr, saddr, af);
 			s->gwy.port = th->th_sport;		/* sport */
 			PF_ACPY(&s->ext.addr, daddr, af);
 			s->ext.port = th->th_dport;
 			if (nr != NULL) {
 				PF_ACPY(&s->lan.addr, &pd->baddr, af);
 				s->lan.port = bport;
 			} else {
 				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
 				s->lan.port = s->gwy.port;
 			}
 		} else {
 			PF_ACPY(&s->lan.addr, daddr, af);
 			s->lan.port = th->th_dport;
 			PF_ACPY(&s->ext.addr, saddr, af);
 			s->ext.port = th->th_sport;
 			if (nr != NULL) {
 				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
 				s->gwy.port = bport;
 			} else {
 				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
 				s->gwy.port = s->lan.port;
 			}
 		}
 
 		s->src.seqlo = ntohl(th->th_seq);
 		s->src.seqhi = s->src.seqlo + len + 1;
 		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
 		    r->keep_state == PF_STATE_MODULATE) {
 			/* Generate sequence number modulator */
 #ifdef __FreeBSD__
 			while ((s->src.seqdiff =
 			    pf_new_isn(s) - s->src.seqlo) == 0)
 				;	
 #else
 			while ((s->src.seqdiff =
 			    tcp_rndiss_next() - s->src.seqlo) == 0)
 				;
 #endif
 			pf_change_a(&th->th_seq, &th->th_sum,
 			    htonl(s->src.seqlo + s->src.seqdiff), 0);
 			rewrite = 1;
 		} else
 			s->src.seqdiff = 0;
 		if (th->th_flags & TH_SYN) {
 			s->src.seqhi++;
 			s->src.wscale = pf_get_wscale(m, off, th->th_off, af);
 		}
 		s->src.max_win = MAX(ntohs(th->th_win), 1);
 		if (s->src.wscale & PF_WSCALE_MASK) {
 			/* Remove scale factor from initial window */
 			int win = s->src.max_win;
 			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
 			s->src.max_win = (win - 1) >>
 			    (s->src.wscale & PF_WSCALE_MASK);
 		}
 		if (th->th_flags & TH_FIN)
 			s->src.seqhi++;
 		s->dst.seqhi = 1;
 		s->dst.max_win = 1;
 		s->src.state = TCPS_SYN_SENT;
 		s->dst.state = TCPS_CLOSED;
 		s->creation = time_second;
 		s->expire = time_second;
 		s->timeout = PFTM_TCP_FIRST_PACKET;
 		pf_set_rt_ifp(s, saddr);
 		if (sn != NULL) {
 			s->src_node = sn;
 			s->src_node->states++;
 		}
 		if (nsn != NULL) {
 			PF_ACPY(&nsn->raddr, &pd->naddr, af);
 			s->nat_src_node = nsn;
 			s->nat_src_node->states++;
 		}
 		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
 		    off, pd, th, &s->src, &s->dst)) {
 			REASON_SET(&reason, PFRES_MEMORY);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			pool_put(&pf_state_pl, s);
 			return (PF_DROP);
 		}
 		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
 		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
 		    &s->src, &s->dst, &rewrite)) {
 			/* This really shouldn't happen!!! */
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("pf_normalize_tcp_stateful failed on first pkt"));
 			pf_normalize_tcp_cleanup(s);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			pool_put(&pf_state_pl, s);
 			return (PF_DROP);
 		}
 		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
 			pf_normalize_tcp_cleanup(s);
 			REASON_SET(&reason, PFRES_STATEINS);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			pool_put(&pf_state_pl, s);
 			return (PF_DROP);
 		} else
 			*sm = s;
 		if (tag > 0) {
 			pf_tag_ref(tag);
 			s->tag = tag;
 		}
 		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
 		    r->keep_state == PF_STATE_SYNPROXY) {
 			s->src.state = PF_TCPS_PROXY_SRC;
 			if (nr != NULL) {
 				if (direction == PF_OUT) {
 					pf_change_ap(saddr, &th->th_sport,
 					    pd->ip_sum, &th->th_sum, &pd->baddr,
 					    bport, 0, af);
 				} else {
 					pf_change_ap(daddr, &th->th_dport,
 					    pd->ip_sum, &th->th_sum, &pd->baddr,
 					    bport, 0, af);
 				}
 			}
 			s->src.seqhi = htonl(arc4random());
 			/* Find mss option */
 			mss = pf_get_mss(m, off, th->th_off, af);
 			mss = pf_calc_mss(saddr, af, mss);
 			mss = pf_calc_mss(daddr, af, mss);
 			s->src.mss = mss;
 #ifdef __FreeBSD__
 			pf_send_tcp(NULL, r, af, daddr, saddr, th->th_dport,
 #else
 			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
 #endif
 			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
 			    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
 			REASON_SET(&reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 	}
 
 	/* copy back packet headers if we performed NAT operations */
 	if (rewrite)
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 
 	return (PF_PASS);
 }
 
 int
 pf_test_udp(struct pf_rule **rm, struct pf_state **sm, int direction,
     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
 #ifdef __FreeBSD__
     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
     struct ifqueue *ifq, struct inpcb *inp)
 #else
     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
     struct ifqueue *ifq)
 #endif
 {
 	struct pf_rule		*nr = NULL;
 	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
 	struct udphdr		*uh = pd->hdr.udp;
 	u_int16_t		 bport, nport = 0;
 	sa_family_t		 af = pd->af;
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_src_node	*nsn = NULL;
 	u_short			 reason;
 	int			 rewrite = 0;
 	int			 tag = -1, rtableid = -1;
 	int			 asd = 0;
 	int			 match = 0;
 
 	if (pf_check_congestion(ifq)) {
 		REASON_SET(&reason, PFRES_CONGEST);
 		return (PF_DROP);
 	}
 
 #ifdef __FreeBSD__
 	if (inp != NULL)
 		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
 	else if (debug_pfugidhack) {
 		PF_UNLOCK();
 		DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n"));
 		pd->lookup.done = pf_socket_lookup(direction, pd, inp);
 		PF_LOCK();
 	}
 #endif
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
 	if (direction == PF_OUT) {
 		bport = nport = uh->uh_sport;
 		/* check outgoing packet for BINAT/NAT */
 		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
 		    saddr, uh->uh_sport, daddr, uh->uh_dport,
 		    &pd->naddr, &nport)) != NULL) {
 			PF_ACPY(&pd->baddr, saddr, af);
 			pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
 			    &uh->uh_sum, &pd->naddr, nport, 1, af);
 			rewrite++;
 			if (nr->natpass)
 				r = NULL;
 			pd->nat_rule = nr;
 		}
 	} else {
 		bport = nport = uh->uh_dport;
 		/* check incoming packet for BINAT/RDR */
 		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
 		    saddr, uh->uh_sport, daddr, uh->uh_dport, &pd->naddr,
 		    &nport)) != NULL) {
 			PF_ACPY(&pd->baddr, daddr, af);
 			pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
 			    &uh->uh_sum, &pd->naddr, nport, 1, af);
 			rewrite++;
 			if (nr->natpass)
 				r = NULL;
 			pd->nat_rule = nr;
 		}
 	}
 
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != IPPROTO_UDP)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
 		    r->src.neg, kif))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
 		    r->src.port[0], r->src.port[1], uh->uh_sport))
 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
 		    r->dst.neg, NULL))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 		    r->dst.port[0], r->dst.port[1], uh->uh_dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->rule_flag & PFRULE_FRAGMENT)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
 #ifdef __FreeBSD__
 		    pf_socket_lookup(direction, pd, inp), 1)) &&
 #else
 		    pf_socket_lookup(direction, pd), 1)) &&
 #endif
 		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
 		    pd->lookup.uid))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
 #ifdef __FreeBSD__
 		    pf_socket_lookup(direction, pd, inp), 1)) &&
 #else
 		    pf_socket_lookup(direction, pd), 1)) &&
 #endif
 		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
 		    pd->lookup.gid))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob && r->prob <= arc4random())
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY)
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(&asd, &ruleset,
 				    PF_RULESET_FILTER, &r, &a, &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
 		    PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
 		if (rewrite)
 #ifdef __FreeBSD__
 			m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
 #else
 			m_copyback(m, off, sizeof(*uh), uh);
 #endif
 		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
 		    a, ruleset, pd);
 	}
 
 	if ((r->action == PF_DROP) &&
 	    ((r->rule_flag & PFRULE_RETURNICMP) ||
 	    (r->rule_flag & PFRULE_RETURN))) {
 		/* undo NAT changes, if they have taken place */
 		if (nr != NULL) {
 			if (direction == PF_OUT) {
 				pf_change_ap(saddr, &uh->uh_sport, pd->ip_sum,
 				    &uh->uh_sum, &pd->baddr, bport, 1, af);
 				rewrite++;
 			} else {
 				pf_change_ap(daddr, &uh->uh_dport, pd->ip_sum,
 				    &uh->uh_sum, &pd->baddr, bport, 1, af);
 				rewrite++;
 			}
 		}
 		if ((af == AF_INET) && r->return_icmp)
 			pf_send_icmp(m, r->return_icmp >> 8,
 			    r->return_icmp & 255, af, r);
 		else if ((af == AF_INET6) && r->return_icmp6)
 			pf_send_icmp(m, r->return_icmp6 >> 8,
 			    r->return_icmp6 & 255, af, r);
 	}
 
 	if (r->action == PF_DROP)
 		return (PF_DROP);
 
 	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		return (PF_DROP);
 	}
 
 	if (r->keep_state || nr != NULL) {
 		/* create new state */
 		struct pf_state	*s = NULL;
 		struct pf_src_node *sn = NULL;
 
 		/* check maximums */
 		if (r->max_states && (r->states >= r->max_states)) {
 			pf_status.lcounters[LCNT_STATES]++;
 			REASON_SET(&reason, PFRES_MAXSTATES);
 			goto cleanup;
 		}
 		/* src node for filter rule */
 		if ((r->rule_flag & PFRULE_SRCTRACK ||
 		    r->rpool.opts & PF_POOL_STICKYADDR) &&
 		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
 			REASON_SET(&reason, PFRES_SRCLIMIT);
 			goto cleanup;
 		}
 		/* src node for translation rule */
 		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
 		    ((direction == PF_OUT &&
 		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
 		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
 			REASON_SET(&reason, PFRES_SRCLIMIT);
 			goto cleanup;
 		}
 		s = pool_get(&pf_state_pl, PR_NOWAIT);
 		if (s == NULL) {
 			REASON_SET(&reason, PFRES_MEMORY);
 cleanup:
 			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
 				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 				pf_status.src_nodes--;
 				pool_put(&pf_src_tree_pl, sn);
 			}
 			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
 			    nsn->expire == 0) {
 				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 				pf_status.src_nodes--;
 				pool_put(&pf_src_tree_pl, nsn);
 			}
 			return (PF_DROP);
 		}
 		bzero(s, sizeof(*s));
 		s->rule.ptr = r;
 		s->nat_rule.ptr = nr;
 		s->anchor.ptr = a;
 		STATE_INC_COUNTERS(s);
 		s->allow_opts = r->allow_opts;
 		s->log = r->log & PF_LOG_ALL;
 		if (nr != NULL)
 			s->log |= nr->log & PF_LOG_ALL;
 		s->proto = IPPROTO_UDP;
 		s->direction = direction;
 		s->af = af;
 		if (direction == PF_OUT) {
 			PF_ACPY(&s->gwy.addr, saddr, af);
 			s->gwy.port = uh->uh_sport;
 			PF_ACPY(&s->ext.addr, daddr, af);
 			s->ext.port = uh->uh_dport;
 			if (nr != NULL) {
 				PF_ACPY(&s->lan.addr, &pd->baddr, af);
 				s->lan.port = bport;
 			} else {
 				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
 				s->lan.port = s->gwy.port;
 			}
 		} else {
 			PF_ACPY(&s->lan.addr, daddr, af);
 			s->lan.port = uh->uh_dport;
 			PF_ACPY(&s->ext.addr, saddr, af);
 			s->ext.port = uh->uh_sport;
 			if (nr != NULL) {
 				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
 				s->gwy.port = bport;
 			} else {
 				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
 				s->gwy.port = s->lan.port;
 			}
 		}
 		s->src.state = PFUDPS_SINGLE;
 		s->dst.state = PFUDPS_NO_TRAFFIC;
 		s->creation = time_second;
 		s->expire = time_second;
 		s->timeout = PFTM_UDP_FIRST_PACKET;
 		pf_set_rt_ifp(s, saddr);
 		if (sn != NULL) {
 			s->src_node = sn;
 			s->src_node->states++;
 		}
 		if (nsn != NULL) {
 			PF_ACPY(&nsn->raddr, &pd->naddr, af);
 			s->nat_src_node = nsn;
 			s->nat_src_node->states++;
 		}
 		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
 			REASON_SET(&reason, PFRES_STATEINS);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			pool_put(&pf_state_pl, s);
 			return (PF_DROP);
 		} else
 			*sm = s;
 		if (tag > 0) {
 			pf_tag_ref(tag);
 			s->tag = tag;
 		}
 	}
 
 	/* copy back packet headers if we performed NAT operations */
 	if (rewrite)
 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
 
 	return (PF_PASS);
 }
 
 int
 pf_test_icmp(struct pf_rule **rm, struct pf_state **sm, int direction,
     struct pfi_kif *kif, struct mbuf *m, int off, void *h,
     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
     struct ifqueue *ifq)
 {
 	struct pf_rule		*nr = NULL;
 	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_src_node	*nsn = NULL;
 	u_short			 reason;
 	u_int16_t		 icmpid = 0, bport, nport = 0;
 	sa_family_t		 af = pd->af;
 	u_int8_t		 icmptype = 0;	/* make the compiler happy */
 	u_int8_t		 icmpcode = 0;	/* make the compiler happy */
 	int			 state_icmp = 0;
 	int			 tag = -1, rtableid = -1;
 #ifdef INET6
 	int			 rewrite = 0;
 #endif /* INET6 */
 	int			 asd = 0;
 	int			 match = 0;
 
 	if (pf_check_congestion(ifq)) {
 		REASON_SET(&reason, PFRES_CONGEST);
 		return (PF_DROP);
 	}
 
 	switch (pd->proto) {
 #ifdef INET
 	case IPPROTO_ICMP:
 		icmptype = pd->hdr.icmp->icmp_type;
 		icmpcode = pd->hdr.icmp->icmp_code;
 		icmpid = pd->hdr.icmp->icmp_id;
 
 		if (icmptype == ICMP_UNREACH ||
 		    icmptype == ICMP_SOURCEQUENCH ||
 		    icmptype == ICMP_REDIRECT ||
 		    icmptype == ICMP_TIMXCEED ||
 		    icmptype == ICMP_PARAMPROB)
 			state_icmp++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 		icmptype = pd->hdr.icmp6->icmp6_type;
 		icmpcode = pd->hdr.icmp6->icmp6_code;
 		icmpid = pd->hdr.icmp6->icmp6_id;
 
 		if (icmptype == ICMP6_DST_UNREACH ||
 		    icmptype == ICMP6_PACKET_TOO_BIG ||
 		    icmptype == ICMP6_TIME_EXCEEDED ||
 		    icmptype == ICMP6_PARAM_PROB)
 			state_icmp++;
 		break;
 #endif /* INET6 */
 	}
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
 	if (direction == PF_OUT) {
 		bport = nport = icmpid;
 		/* check outgoing packet for BINAT/NAT */
 		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
 		    saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
 		    NULL) {
 			PF_ACPY(&pd->baddr, saddr, af);
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
 				    pd->naddr.v4.s_addr, 0);
 				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
 				    pd->hdr.icmp->icmp_cksum, icmpid, nport, 0);
 				pd->hdr.icmp->icmp_id = nport;
 				m_copyback(m, off, ICMP_MINLEN,
 				    (caddr_t)pd->hdr.icmp);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
 				    &pd->naddr, 0);
 				rewrite++;
 				break;
 #endif /* INET6 */
 			}
 			if (nr->natpass)
 				r = NULL;
 			pd->nat_rule = nr;
 		}
 	} else {
 		bport = nport = icmpid;
 		/* check incoming packet for BINAT/RDR */
 		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
 		    saddr, icmpid, daddr, icmpid, &pd->naddr, &nport)) !=
 		    NULL) {
 			PF_ACPY(&pd->baddr, daddr, af);
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				pf_change_a(&daddr->v4.s_addr,
 				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
 				    &pd->naddr, 0);
 				rewrite++;
 				break;
 #endif /* INET6 */
 			}
 			if (nr->natpass)
 				r = NULL;
 			pd->nat_rule = nr;
 		}
 	}
 
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
 		    r->src.neg, kif))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
 		    r->dst.neg, NULL))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->type && r->type != icmptype + 1)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->code && r->code != icmpcode + 1)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->rule_flag & PFRULE_FRAGMENT)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob && r->prob <= arc4random())
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY)
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(&asd, &ruleset,
 				    PF_RULESET_FILTER, &r, &a, &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
 		    PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log || (nr != NULL && nr->natpass && nr->log)) {
 #ifdef INET6
 		if (rewrite)
 			m_copyback(m, off, sizeof(struct icmp6_hdr),
 			    (caddr_t)pd->hdr.icmp6);
 #endif /* INET6 */
 		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
 		    a, ruleset, pd);
 	}
 
 	if (r->action != PF_PASS)
 		return (PF_DROP);
 
 	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		return (PF_DROP);
 	}
 
 	if (!state_icmp && (r->keep_state || nr != NULL)) {
 		/* create new state */
 		struct pf_state	*s = NULL;
 		struct pf_src_node *sn = NULL;
 
 		/* check maximums */
 		if (r->max_states && (r->states >= r->max_states)) {
 			pf_status.lcounters[LCNT_STATES]++;
 			REASON_SET(&reason, PFRES_MAXSTATES);
 			goto cleanup;
 		}
 		/* src node for filter rule */
 		if ((r->rule_flag & PFRULE_SRCTRACK ||
 		    r->rpool.opts & PF_POOL_STICKYADDR) &&
 		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
 			REASON_SET(&reason, PFRES_SRCLIMIT);
 			goto cleanup;
 		}
 		/* src node for translation rule */
 		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
 		    ((direction == PF_OUT &&
 		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
 		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
 			REASON_SET(&reason, PFRES_SRCLIMIT);
 			goto cleanup;
 		}
 		s = pool_get(&pf_state_pl, PR_NOWAIT);
 		if (s == NULL) {
 			REASON_SET(&reason, PFRES_MEMORY);
 cleanup:
 			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
 				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 				pf_status.src_nodes--;
 				pool_put(&pf_src_tree_pl, sn);
 			}
 			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
 			    nsn->expire == 0) {
 				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 				pf_status.src_nodes--;
 				pool_put(&pf_src_tree_pl, nsn);
 			}
 			return (PF_DROP);
 		}
 		bzero(s, sizeof(*s));
 		s->rule.ptr = r;
 		s->nat_rule.ptr = nr;
 		s->anchor.ptr = a;
 		STATE_INC_COUNTERS(s);
 		s->allow_opts = r->allow_opts;
 		s->log = r->log & PF_LOG_ALL;
 		if (nr != NULL)
 			s->log |= nr->log & PF_LOG_ALL;
 		s->proto = pd->proto;
 		s->direction = direction;
 		s->af = af;
 		if (direction == PF_OUT) {
 			PF_ACPY(&s->gwy.addr, saddr, af);
 			s->gwy.port = nport;
 			PF_ACPY(&s->ext.addr, daddr, af);
 			s->ext.port = 0;
 			if (nr != NULL) {
 				PF_ACPY(&s->lan.addr, &pd->baddr, af);
 				s->lan.port = bport;
 			} else {
 				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
 				s->lan.port = s->gwy.port;
 			}
 		} else {
 			PF_ACPY(&s->lan.addr, daddr, af);
 			s->lan.port = nport;
 			PF_ACPY(&s->ext.addr, saddr, af);
 			s->ext.port = 0; 
 			if (nr != NULL) {
 				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
 				s->gwy.port = bport;
 			} else {
 				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
 				s->gwy.port = s->lan.port;
 			}
 		}
 		s->creation = time_second;
 		s->expire = time_second;
 		s->timeout = PFTM_ICMP_FIRST_PACKET;
 		pf_set_rt_ifp(s, saddr);
 		if (sn != NULL) {
 			s->src_node = sn;
 			s->src_node->states++;
 		}
 		if (nsn != NULL) {
 			PF_ACPY(&nsn->raddr, &pd->naddr, af);
 			s->nat_src_node = nsn;
 			s->nat_src_node->states++;
 		}
 		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
 			REASON_SET(&reason, PFRES_STATEINS);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			pool_put(&pf_state_pl, s);
 			return (PF_DROP);
 		} else
 			*sm = s;
 		if (tag > 0) {
 			pf_tag_ref(tag);
 			s->tag = tag;
 		}
 	}
 
 #ifdef INET6
 	/* copy back packet headers if we performed IPv6 NAT operations */
 	if (rewrite)
 		m_copyback(m, off, sizeof(struct icmp6_hdr),
 		    (caddr_t)pd->hdr.icmp6);
 #endif /* INET6 */
 
 	return (PF_PASS);
 }
 
 int
 pf_test_other(struct pf_rule **rm, struct pf_state **sm, int direction,
     struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
     struct pf_rule **am, struct pf_ruleset **rsm, struct ifqueue *ifq)
 {
 	struct pf_rule		*nr = NULL;
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_src_node	*nsn = NULL;
 	struct pf_addr		*saddr = pd->src, *daddr = pd->dst;
 	sa_family_t		 af = pd->af;
 	u_short			 reason;
 	int			 tag = -1, rtableid = -1;
 	int			 asd = 0;
 	int			 match = 0;
 
 	if (pf_check_congestion(ifq)) {
 		REASON_SET(&reason, PFRES_CONGEST);
 		return (PF_DROP);
 	}
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
 	if (direction == PF_OUT) {
 		/* check outgoing packet for BINAT/NAT */
 		if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn,
 		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
 			PF_ACPY(&pd->baddr, saddr, af);
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
 				    pd->naddr.v4.s_addr, 0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				PF_ACPY(saddr, &pd->naddr, af);
 				break;
 #endif /* INET6 */
 			}
 			if (nr->natpass)
 				r = NULL;
 			pd->nat_rule = nr;
 		}
 	} else {
 		/* check incoming packet for BINAT/RDR */
 		if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn,
 		    saddr, 0, daddr, 0, &pd->naddr, NULL)) != NULL) {
 			PF_ACPY(&pd->baddr, daddr, af);
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				pf_change_a(&daddr->v4.s_addr,
 				    pd->ip_sum, pd->naddr.v4.s_addr, 0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				PF_ACPY(daddr, &pd->naddr, af);
 				break;
 #endif /* INET6 */
 			}
 			if (nr->natpass)
 				r = NULL;
 			pd->nat_rule = nr;
 		}
 	}
 
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 		    r->src.neg, kif))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 		    r->dst.neg, NULL))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->rule_flag & PFRULE_FRAGMENT)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob && r->prob <= arc4random())
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY)
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(&asd, &ruleset,
 				    PF_RULESET_FILTER, &r, &a, &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
 		    PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log || (nr != NULL && nr->natpass && nr->log))
 		PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr,
 		    a, ruleset, pd);
 
 	if ((r->action == PF_DROP) &&
 	    ((r->rule_flag & PFRULE_RETURNICMP) ||
 	    (r->rule_flag & PFRULE_RETURN))) {
 		struct pf_addr *a = NULL;
 
 		if (nr != NULL) {
 			if (direction == PF_OUT)
 				a = saddr;
 			else
 				a = daddr;
 		}
 		if (a != NULL) {
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				pf_change_a(&a->v4.s_addr, pd->ip_sum,
 				    pd->baddr.v4.s_addr, 0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				PF_ACPY(a, &pd->baddr, af);
 				break;
 #endif /* INET6 */
 			}
 		}
 		if ((af == AF_INET) && r->return_icmp)
 			pf_send_icmp(m, r->return_icmp >> 8,
 			    r->return_icmp & 255, af, r);
 		else if ((af == AF_INET6) && r->return_icmp6)
 			pf_send_icmp(m, r->return_icmp6 >> 8,
 			    r->return_icmp6 & 255, af, r);
 	}
 
 	if (r->action != PF_PASS)
 		return (PF_DROP);
 
 	if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		return (PF_DROP);
 	}
 
 	if (r->keep_state || nr != NULL) {
 		/* create new state */
 		struct pf_state	*s = NULL;
 		struct pf_src_node *sn = NULL;
 
 		/* check maximums */
 		if (r->max_states && (r->states >= r->max_states)) {
 			pf_status.lcounters[LCNT_STATES]++;
 			REASON_SET(&reason, PFRES_MAXSTATES);
 			goto cleanup;
 		}
 		/* src node for filter rule */
 		if ((r->rule_flag & PFRULE_SRCTRACK ||
 		    r->rpool.opts & PF_POOL_STICKYADDR) &&
 		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
 			REASON_SET(&reason, PFRES_SRCLIMIT);
 			goto cleanup;
 		}
 		/* src node for translation rule */
 		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
 		    ((direction == PF_OUT &&
 		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
 		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
 			REASON_SET(&reason, PFRES_SRCLIMIT);
 			goto cleanup;
 		}
 		s = pool_get(&pf_state_pl, PR_NOWAIT);
 		if (s == NULL) {
 			REASON_SET(&reason, PFRES_MEMORY);
 cleanup:
 			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
 				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 				pf_status.src_nodes--;
 				pool_put(&pf_src_tree_pl, sn);
 			}
 			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
 			    nsn->expire == 0) {
 				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
 				pf_status.src_nodes--;
 				pool_put(&pf_src_tree_pl, nsn);
 			}
 			return (PF_DROP);
 		}
 		bzero(s, sizeof(*s));
 		s->rule.ptr = r;
 		s->nat_rule.ptr = nr;
 		s->anchor.ptr = a;
 		STATE_INC_COUNTERS(s);
 		s->allow_opts = r->allow_opts;
 		s->log = r->log & PF_LOG_ALL;
 		if (nr != NULL)
 			s->log |= nr->log & PF_LOG_ALL;
 		s->proto = pd->proto;
 		s->direction = direction;
 		s->af = af;
 		if (direction == PF_OUT) {
 			PF_ACPY(&s->gwy.addr, saddr, af);
 			PF_ACPY(&s->ext.addr, daddr, af);
 			if (nr != NULL)
 				PF_ACPY(&s->lan.addr, &pd->baddr, af);
 			else
 				PF_ACPY(&s->lan.addr, &s->gwy.addr, af);
 		} else {
 			PF_ACPY(&s->lan.addr, daddr, af);
 			PF_ACPY(&s->ext.addr, saddr, af);
 			if (nr != NULL)
 				PF_ACPY(&s->gwy.addr, &pd->baddr, af);
 			else
 				PF_ACPY(&s->gwy.addr, &s->lan.addr, af);
 		}
 		s->src.state = PFOTHERS_SINGLE;
 		s->dst.state = PFOTHERS_NO_TRAFFIC;
 		s->creation = time_second;
 		s->expire = time_second;
 		s->timeout = PFTM_OTHER_FIRST_PACKET;
 		pf_set_rt_ifp(s, saddr);
 		if (sn != NULL) {
 			s->src_node = sn;
 			s->src_node->states++;
 		}
 		if (nsn != NULL) {
 			PF_ACPY(&nsn->raddr, &pd->naddr, af);
 			s->nat_src_node = nsn;
 			s->nat_src_node->states++;
 		}
 		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
 			REASON_SET(&reason, PFRES_STATEINS);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			pool_put(&pf_state_pl, s);
 			return (PF_DROP);
 		} else
 			*sm = s;
 		if (tag > 0) {
 			pf_tag_ref(tag);
 			s->tag = tag;
 		}
 	}
 
 	return (PF_PASS);
 }
 
 int
 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
     struct pf_ruleset **rsm)
 {
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	sa_family_t		 af = pd->af;
 	u_short			 reason;
 	int			 tag = -1;
 	int			 asd = 0;
 	int			 match = 0;
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 		    r->src.neg, kif))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 		    r->dst.neg, NULL))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY)
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_UDP &&
 		    (r->src.port_op || r->dst.port_op))
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_TCP &&
 		    (r->src.port_op || r->dst.port_op || r->flagset))
 			r = TAILQ_NEXT(r, entries);
 		else if ((pd->proto == IPPROTO_ICMP ||
 		    pd->proto == IPPROTO_ICMPV6) &&
 		    (r->type || r->code))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob && r->prob <= arc4random())
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(&asd, &ruleset,
 				    PF_RULESET_FILTER, &r, &a, &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
 		    PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log)
 		PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset,
 		    pd);
 
 	if (r->action != PF_PASS)
 		return (PF_DROP);
 
 	if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		return (PF_DROP);
 	}
 
 	return (PF_PASS);
 }
 
 int
 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
     u_short *reason)
 {
 	struct pf_state_cmp	 key;
 	struct tcphdr		*th = pd->hdr.tcp;
 	u_int16_t		 win = ntohs(th->th_win);
 	u_int32_t		 ack, end, seq, orig_seq;
 	u_int8_t		 sws, dws;
 	int			 ackskew;
 	int			 copyback = 0;
 	struct pf_state_peer	*src, *dst;
 
 	key.af = pd->af;
 	key.proto = IPPROTO_TCP;
 	if (direction == PF_IN)	{
 		PF_ACPY(&key.ext.addr, pd->src, key.af);
 		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
 		key.ext.port = th->th_sport;
 		key.gwy.port = th->th_dport;
 	} else {
 		PF_ACPY(&key.lan.addr, pd->src, key.af);
 		PF_ACPY(&key.ext.addr, pd->dst, key.af);
 		key.lan.port = th->th_sport;
 		key.ext.port = th->th_dport;
 	}
 
 	STATE_LOOKUP();
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
 		if (direction != (*state)->direction) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 		if (th->th_flags & TH_SYN) {
 			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
 				REASON_SET(reason, PFRES_SYNPROXY);
 				return (PF_DROP);
 			}
 #ifdef __FreeBSD__
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
 #else
 			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
 #endif
 			    pd->src, th->th_dport, th->th_sport,
 			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
 			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1,
 			    0, NULL, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (!(th->th_flags & TH_ACK) ||
 		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
 		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_DROP);
 		} else if ((*state)->src_node != NULL &&
 		    pf_src_connlimit(state)) {
 			REASON_SET(reason, PFRES_SRCLIMIT);
 			return (PF_DROP);
 		} else
 			(*state)->src.state = PF_TCPS_PROXY_DST;
 	}
 	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
 		struct pf_state_host *src, *dst;
 
 		if (direction == PF_OUT) {
 			src = &(*state)->gwy;
 			dst = &(*state)->ext;
 		} else {
 			src = &(*state)->ext;
 			dst = &(*state)->lan;
 		}
 		if (direction == (*state)->direction) {
 			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
 			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
 			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
 				REASON_SET(reason, PFRES_SYNPROXY);
 				return (PF_DROP);
 			}
 			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
 			if ((*state)->dst.seqhi == 1)
 				(*state)->dst.seqhi = htonl(arc4random());
 #ifdef __FreeBSD__
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 			    &src->addr,
 #else
 			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
 #endif
 			    &dst->addr, src->port, dst->port,
 			    (*state)->dst.seqhi, 0, TH_SYN, 0,
 			    (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
 		    (TH_SYN|TH_ACK)) ||
 		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_DROP);
 		} else {
 			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
 			(*state)->dst.seqlo = ntohl(th->th_seq);
 #ifdef __FreeBSD__
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
 #else
 			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
 #endif
 			    pd->src, th->th_dport, th->th_sport,
 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
 			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
 			    (*state)->tag, NULL, NULL);
 #ifdef __FreeBSD__
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 			    &src->addr,
 #else
 			pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr,
 #endif
 			    &dst->addr, src->port, dst->port,
 			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
 			    TH_ACK, (*state)->dst.max_win, 0, 0, 1,
 			    0, NULL, NULL);
 			(*state)->src.seqdiff = (*state)->dst.seqhi -
 			    (*state)->src.seqlo;
 			(*state)->dst.seqdiff = (*state)->src.seqhi -
 			    (*state)->dst.seqlo;
 			(*state)->src.seqhi = (*state)->src.seqlo +
 			    (*state)->dst.max_win;
 			(*state)->dst.seqhi = (*state)->dst.seqlo +
 			    (*state)->src.max_win;
 			(*state)->src.wscale = (*state)->dst.wscale = 0;
 			(*state)->src.state = (*state)->dst.state =
 			    TCPS_ESTABLISHED;
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 	}
 
 	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
 		sws = src->wscale & PF_WSCALE_MASK;
 		dws = dst->wscale & PF_WSCALE_MASK;
 	} else
 		sws = dws = 0;
 
 	/*
 	 * Sequence tracking algorithm from Guido van Rooij's paper:
 	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
 	 *	tcp_filtering.ps
 	 */
 
 	orig_seq = seq = ntohl(th->th_seq);
 	if (src->seqlo == 0) {
 		/* First packet from this end. Set its state */
 
 		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
 		    src->scrub == NULL) {
 			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
 				REASON_SET(reason, PFRES_MEMORY);
 				return (PF_DROP);
 			}
 		}
 
 		/* Deferred generation of sequence number modulator */
 		if (dst->seqdiff && !src->seqdiff) {
 #ifdef __FreeBSD__
 			while ((src->seqdiff = pf_new_isn(*state) - seq) == 0)
 				;
 #else
 			while ((src->seqdiff = tcp_rndiss_next() - seq) == 0)
 				;
 #endif
 			ack = ntohl(th->th_ack) - dst->seqdiff;
 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
 			copyback = 1;
 		} else {
 			ack = ntohl(th->th_ack);
 		}
 
 		end = seq + pd->p_len;
 		if (th->th_flags & TH_SYN) {
 			end++;
 			if (dst->wscale & PF_WSCALE_FLAG) {
 				src->wscale = pf_get_wscale(m, off, th->th_off,
 				    pd->af);
 				if (src->wscale & PF_WSCALE_FLAG) {
 					/* Remove scale factor from initial
 					 * window */
 					sws = src->wscale & PF_WSCALE_MASK;
 					win = ((u_int32_t)win + (1 << sws) - 1)
 					    >> sws;
 					dws = dst->wscale & PF_WSCALE_MASK;
 				} else {
 					/* fixup other window */
 					dst->max_win <<= dst->wscale &
 					    PF_WSCALE_MASK;
 					/* in case of a retrans SYN|ACK */
 					dst->wscale = 0;
 				}
 			}
 		}
 		if (th->th_flags & TH_FIN)
 			end++;
 
 		src->seqlo = seq;
 		if (src->state < TCPS_SYN_SENT)
 			src->state = TCPS_SYN_SENT;
 
 		/*
 		 * May need to slide the window (seqhi may have been set by
 		 * the crappy stack check or if we picked up the connection
 		 * after establishment)
 		 */
 		if (src->seqhi == 1 ||
 		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
 			src->seqhi = end + MAX(1, dst->max_win << dws);
 		if (win > src->max_win)
 			src->max_win = win;
 
 	} else {
 		ack = ntohl(th->th_ack) - dst->seqdiff;
 		if (src->seqdiff) {
 			/* Modulate sequence numbers */
 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
 			copyback = 1;
 		}
 		end = seq + pd->p_len;
 		if (th->th_flags & TH_SYN)
 			end++;
 		if (th->th_flags & TH_FIN)
 			end++;
 	}
 
 	if ((th->th_flags & TH_ACK) == 0) {
 		/* Let it pass through the ack skew check */
 		ack = dst->seqlo;
 	} else if ((ack == 0 &&
 	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
 	    /* broken tcp stacks do not set ack */
 	    (dst->state < TCPS_SYN_SENT)) {
 		/*
 		 * Many stacks (ours included) will set the ACK number in an
 		 * FIN|ACK if the SYN times out -- no sequence to ACK.
 		 */
 		ack = dst->seqlo;
 	}
 
 	if (seq == end) {
 		/* Ease sequencing restrictions on no data packets */
 		seq = src->seqlo;
 		end = seq;
 	}
 
 	ackskew = dst->seqlo - ack;
 
 
 	/*
 	 * Need to demodulate the sequence numbers in any TCP SACK options
 	 * (Selective ACK). We could optionally validate the SACK values
 	 * against the current ACK window, either forwards or backwards, but
 	 * I'm not confident that SACK has been implemented properly
 	 * everywhere. It wouldn't surprise me if several stacks accidently
 	 * SACK too far backwards of previously ACKed data. There really aren't
 	 * any security implications of bad SACKing unless the target stack
 	 * doesn't validate the option length correctly. Someone trying to
 	 * spoof into a TCP connection won't bother blindly sending SACK
 	 * options anyway.
 	 */
 	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
 		if (pf_modulate_sack(m, off, pd, th, dst))
 			copyback = 1;
 	}
 
 
 #define MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
 	if (SEQ_GEQ(src->seqhi, end) &&
 	    /* Last octet inside other's window space */
 	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
 	    /* Retrans: not more than one window back */
 	    (ackskew >= -MAXACKWINDOW) &&
 	    /* Acking not more than one reassembled fragment backwards */
 	    (ackskew <= (MAXACKWINDOW << sws)) &&
 	    /* Acking not more than one window forward */
 	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
 	    (orig_seq == src->seqlo + 1) || (pd->flags & PFDESC_IP_REAS) == 0)) {
 	    /* Require an exact/+1 sequence match on resets when possible */
 
 		if (dst->scrub || src->scrub) {
 			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
 			    *state, src, dst, &copyback))
 				return (PF_DROP);
 		}
 
 		/* update max window */
 		if (src->max_win < win)
 			src->max_win = win;
 		/* synchronize sequencing */
 		if (SEQ_GT(end, src->seqlo))
 			src->seqlo = end;
 		/* slide the window of what the other end can send */
 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 			dst->seqhi = ack + MAX((win << sws), 1);
 
 
 		/* update states */
 		if (th->th_flags & TH_SYN)
 			if (src->state < TCPS_SYN_SENT)
 				src->state = TCPS_SYN_SENT;
 		if (th->th_flags & TH_FIN)
 			if (src->state < TCPS_CLOSING)
 				src->state = TCPS_CLOSING;
 		if (th->th_flags & TH_ACK) {
 			if (dst->state == TCPS_SYN_SENT) {
 				dst->state = TCPS_ESTABLISHED;
 				if (src->state == TCPS_ESTABLISHED &&
 				    (*state)->src_node != NULL &&
 				    pf_src_connlimit(state)) {
 					REASON_SET(reason, PFRES_SRCLIMIT);
 					return (PF_DROP);
 				}
 			} else if (dst->state == TCPS_CLOSING)
 				dst->state = TCPS_FIN_WAIT_2;
 		}
 		if (th->th_flags & TH_RST)
 			src->state = dst->state = TCPS_TIME_WAIT;
 
 		/* update expire time */
 		(*state)->expire = time_second;
 		if (src->state >= TCPS_FIN_WAIT_2 &&
 		    dst->state >= TCPS_FIN_WAIT_2)
 			(*state)->timeout = PFTM_TCP_CLOSED;
 		else if (src->state >= TCPS_CLOSING &&
 		    dst->state >= TCPS_CLOSING)
 			(*state)->timeout = PFTM_TCP_FIN_WAIT;
 		else if (src->state < TCPS_ESTABLISHED ||
 		    dst->state < TCPS_ESTABLISHED)
 			(*state)->timeout = PFTM_TCP_OPENING;
 		else if (src->state >= TCPS_CLOSING ||
 		    dst->state >= TCPS_CLOSING)
 			(*state)->timeout = PFTM_TCP_CLOSING;
 		else
 			(*state)->timeout = PFTM_TCP_ESTABLISHED;
 
 		/* Fall through to PASS packet */
 
 	} else if ((dst->state < TCPS_SYN_SENT ||
 		dst->state >= TCPS_FIN_WAIT_2 ||
 		src->state >= TCPS_FIN_WAIT_2) &&
 	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
 	    /* Within a window forward of the originating packet */
 	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
 	    /* Within a window backward of the originating packet */
 
 		/*
 		 * This currently handles three situations:
 		 *  1) Stupid stacks will shotgun SYNs before their peer
 		 *     replies.
 		 *  2) When PF catches an already established stream (the
 		 *     firewall rebooted, the state table was flushed, routes
 		 *     changed...)
 		 *  3) Packets get funky immediately after the connection
 		 *     closes (this should catch Solaris spurious ACK|FINs
 		 *     that web servers like to spew after a close)
 		 *
 		 * This must be a little more careful than the above code
 		 * since packet floods will also be caught here. We don't
 		 * update the TTL here to mitigate the damage of a packet
 		 * flood and so the same code can handle awkward establishment
 		 * and a loosened connection close.
 		 * In the establishment case, a correct peer response will
 		 * validate the connection, go through the normal state code
 		 * and keep updating the state TTL.
 		 */
 
 		if (pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: loose state match: ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 			    "pkts=%llu:%llu\n", seq, orig_seq, ack, pd->p_len,
 #ifdef __FreeBSD__
 			    ackskew, (unsigned long long)(*state)->packets[0],
 			    (unsigned long long)(*state)->packets[1]);
 #else
 			    ackskew, (*state)->packets[0],
 			    (*state)->packets[1]);
 #endif
 		}
 
 		if (dst->scrub || src->scrub) {
 			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
 			    *state, src, dst, &copyback))
 				return (PF_DROP);
 		}
 
 		/* update max window */
 		if (src->max_win < win)
 			src->max_win = win;
 		/* synchronize sequencing */
 		if (SEQ_GT(end, src->seqlo))
 			src->seqlo = end;
 		/* slide the window of what the other end can send */
 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 			dst->seqhi = ack + MAX((win << sws), 1);
 
 		/*
 		 * Cannot set dst->seqhi here since this could be a shotgunned
 		 * SYN and not an already established connection.
 		 */
 
 		if (th->th_flags & TH_FIN)
 			if (src->state < TCPS_CLOSING)
 				src->state = TCPS_CLOSING;
 		if (th->th_flags & TH_RST)
 			src->state = dst->state = TCPS_TIME_WAIT;
 
 		/* Fall through to PASS packet */
 
 	} else {
 		if ((*state)->dst.state == TCPS_SYN_SENT &&
 		    (*state)->src.state == TCPS_SYN_SENT) {
 			/* Send RST for state mismatches during handshake */
 			if (!(th->th_flags & TH_RST))
 #ifdef __FreeBSD__
 				pf_send_tcp(m, (*state)->rule.ptr, pd->af,
 #else
 				pf_send_tcp((*state)->rule.ptr, pd->af,
 #endif
 				    pd->dst, pd->src, th->th_dport,
 				    th->th_sport, ntohl(th->th_ack), 0,
 				    TH_RST, 0, 0,
 				    (*state)->rule.ptr->return_ttl, 1, 0,
 				    pd->eh, kif->pfik_ifp);
 			src->seqlo = 0;
 			src->seqhi = 1;
 			src->max_win = 1;
 		} else if (pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: BAD state: ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 			    "pkts=%llu:%llu dir=%s,%s\n",
 			    seq, orig_seq, ack, pd->p_len, ackskew,
 #ifdef __FreeBSD__
 			    (unsigned long long)(*state)->packets[0],
 			    (unsigned long long)(*state)->packets[1],
 #else
 			    (*state)->packets[0], (*state)->packets[1],
 #endif
 			    direction == PF_IN ? "in" : "out",
 			    direction == (*state)->direction ? "fwd" : "rev");
 			printf("pf: State failure on: %c %c %c %c | %c %c\n",
 			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
 			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
 			    ' ': '2',
 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
 			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
 			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
 			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
 		}
 		REASON_SET(reason, PFRES_BADSTATE);
 		return (PF_DROP);
 	}
 
 	/* Any packets which have gotten here are to be passed */
 
 	/* translate source/destination address, if necessary */
 	if (STATE_TRANSLATE(*state)) {
 		if (direction == PF_OUT)
 			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
 			    &th->th_sum, &(*state)->gwy.addr,
 			    (*state)->gwy.port, 0, pd->af);
 		else
 			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
 			    &th->th_sum, &(*state)->lan.addr,
 			    (*state)->lan.port, 0, pd->af);
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 	} else if (copyback) {
 		/* Copyback sequence modulation or stateful scrub changes */
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 	}
 
 	return (PF_PASS);
 }
 
 int
 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
 {
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_cmp	 key;
 	struct udphdr		*uh = pd->hdr.udp;
 
 	key.af = pd->af;
 	key.proto = IPPROTO_UDP;
 	if (direction == PF_IN)	{
 		PF_ACPY(&key.ext.addr, pd->src, key.af);
 		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
 		key.ext.port = uh->uh_sport;
 		key.gwy.port = uh->uh_dport;
 	} else {
 		PF_ACPY(&key.lan.addr, pd->src, key.af);
 		PF_ACPY(&key.ext.addr, pd->dst, key.af);
 		key.lan.port = uh->uh_sport;
 		key.ext.port = uh->uh_dport;
 	}
 
 	STATE_LOOKUP();
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	/* update states */
 	if (src->state < PFUDPS_SINGLE)
 		src->state = PFUDPS_SINGLE;
 	if (dst->state == PFUDPS_SINGLE)
 		dst->state = PFUDPS_MULTIPLE;
 
 	/* update expire time */
 	(*state)->expire = time_second;
 	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
 		(*state)->timeout = PFTM_UDP_MULTIPLE;
 	else
 		(*state)->timeout = PFTM_UDP_SINGLE;
 
 	/* translate source/destination address, if necessary */
 	if (STATE_TRANSLATE(*state)) {
 		if (direction == PF_OUT)
 			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
 			    &uh->uh_sum, &(*state)->gwy.addr,
 			    (*state)->gwy.port, 1, pd->af);
 		else
 			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
 			    &uh->uh_sum, &(*state)->lan.addr,
 			    (*state)->lan.port, 1, pd->af);
 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
 	}
 
 	return (PF_PASS);
 }
 
 int
 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
 {
 	struct pf_addr	*saddr = pd->src, *daddr = pd->dst;
 	u_int16_t	 icmpid = 0;		/* make the compiler happy */
 	u_int16_t	*icmpsum = NULL;	/* make the compiler happy */
 	u_int8_t	 icmptype = 0;		/* make the compiler happy */
 	int		 state_icmp = 0;
 	struct pf_state_cmp key;
 
 	switch (pd->proto) {
 #ifdef INET
 	case IPPROTO_ICMP:
 		icmptype = pd->hdr.icmp->icmp_type;
 		icmpid = pd->hdr.icmp->icmp_id;
 		icmpsum = &pd->hdr.icmp->icmp_cksum;
 
 		if (icmptype == ICMP_UNREACH ||
 		    icmptype == ICMP_SOURCEQUENCH ||
 		    icmptype == ICMP_REDIRECT ||
 		    icmptype == ICMP_TIMXCEED ||
 		    icmptype == ICMP_PARAMPROB)
 			state_icmp++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 		icmptype = pd->hdr.icmp6->icmp6_type;
 		icmpid = pd->hdr.icmp6->icmp6_id;
 		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
 
 		if (icmptype == ICMP6_DST_UNREACH ||
 		    icmptype == ICMP6_PACKET_TOO_BIG ||
 		    icmptype == ICMP6_TIME_EXCEEDED ||
 		    icmptype == ICMP6_PARAM_PROB)
 			state_icmp++;
 		break;
 #endif /* INET6 */
 	}
 
 	if (!state_icmp) {
 
 		/*
 		 * ICMP query/reply message not related to a TCP/UDP packet.
 		 * Search for an ICMP state.
 		 */
 		key.af = pd->af;
 		key.proto = pd->proto;
 		if (direction == PF_IN)	{
 			PF_ACPY(&key.ext.addr, pd->src, key.af);
 			PF_ACPY(&key.gwy.addr, pd->dst, key.af);
 			key.ext.port = 0;
 			key.gwy.port = icmpid;
 		} else {
 			PF_ACPY(&key.lan.addr, pd->src, key.af);
 			PF_ACPY(&key.ext.addr, pd->dst, key.af);
 			key.lan.port = icmpid;
 			key.ext.port = 0;
 		}
 
 		STATE_LOOKUP();
 
 		(*state)->expire = time_second;
 		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
 
 		/* translate source/destination address, if necessary */
 		if (STATE_TRANSLATE(*state)) {
 			if (direction == PF_OUT) {
 				switch (pd->af) {
 #ifdef INET
 				case AF_INET:
 					pf_change_a(&saddr->v4.s_addr,
 					    pd->ip_sum,
 					    (*state)->gwy.addr.v4.s_addr, 0);
 					pd->hdr.icmp->icmp_cksum =
 					    pf_cksum_fixup(
 					    pd->hdr.icmp->icmp_cksum, icmpid,
 					    (*state)->gwy.port, 0);
 					pd->hdr.icmp->icmp_id =
 					    (*state)->gwy.port;
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t)pd->hdr.icmp);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					pf_change_a6(saddr,
 					    &pd->hdr.icmp6->icmp6_cksum,
 					    &(*state)->gwy.addr, 0);
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t)pd->hdr.icmp6);
 					break;
 #endif /* INET6 */
 				}
 			} else {
 				switch (pd->af) {
 #ifdef INET
 				case AF_INET:
 					pf_change_a(&daddr->v4.s_addr,
 					    pd->ip_sum,
 					    (*state)->lan.addr.v4.s_addr, 0);
 					pd->hdr.icmp->icmp_cksum =
 					    pf_cksum_fixup(
 					    pd->hdr.icmp->icmp_cksum, icmpid,
 					    (*state)->lan.port, 0);
 					pd->hdr.icmp->icmp_id =
 					    (*state)->lan.port;
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t)pd->hdr.icmp);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					pf_change_a6(daddr,
 					    &pd->hdr.icmp6->icmp6_cksum,
 					    &(*state)->lan.addr, 0);
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t)pd->hdr.icmp6);
 					break;
 #endif /* INET6 */
 				}
 			}
 		}
 
 		return (PF_PASS);
 
 	} else {
 		/*
 		 * ICMP error message in response to a TCP/UDP packet.
 		 * Extract the inner TCP/UDP header and search for that state.
 		 */
 
 		struct pf_pdesc	pd2;
 #ifdef INET
 		struct ip	h2;
 #endif /* INET */
 #ifdef INET6
 		struct ip6_hdr	h2_6;
 		int		terminal = 0;
 #endif /* INET6 */
 		int		ipoff2 = 0;	/* make the compiler happy */
 		int		off2 = 0;	/* make the compiler happy */
 
 		pd2.af = pd->af;
 		switch (pd->af) {
 #ifdef INET
 		case AF_INET:
 			/* offset of h2 in mbuf chain */
 			ipoff2 = off + ICMP_MINLEN;
 
 			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(ip)\n"));
 				return (PF_DROP);
 			}
 			/*
 			 * ICMP error messages don't refer to non-first
 			 * fragments
 			 */
 			if (h2.ip_off & htons(IP_OFFMASK)) {
 				REASON_SET(reason, PFRES_FRAG);
 				return (PF_DROP);
 			}
 
 			/* offset of protocol header that follows h2 */
 			off2 = ipoff2 + (h2.ip_hl << 2);
 
 			pd2.proto = h2.ip_p;
 			pd2.src = (struct pf_addr *)&h2.ip_src;
 			pd2.dst = (struct pf_addr *)&h2.ip_dst;
 			pd2.ip_sum = &h2.ip_sum;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			ipoff2 = off + sizeof(struct icmp6_hdr);
 
 			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(ip6)\n"));
 				return (PF_DROP);
 			}
 			pd2.proto = h2_6.ip6_nxt;
 			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
 			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
 			pd2.ip_sum = NULL;
 			off2 = ipoff2 + sizeof(h2_6);
 			do {
 				switch (pd2.proto) {
 				case IPPROTO_FRAGMENT:
 					/*
 					 * ICMPv6 error messages for
 					 * non-first fragments
 					 */
 					REASON_SET(reason, PFRES_FRAG);
 					return (PF_DROP);
 				case IPPROTO_AH:
 				case IPPROTO_HOPOPTS:
 				case IPPROTO_ROUTING:
 				case IPPROTO_DSTOPTS: {
 					/* get next header and header length */
 					struct ip6_ext opt6;
 
 					if (!pf_pull_hdr(m, off2, &opt6,
 					    sizeof(opt6), NULL, reason,
 					    pd2.af)) {
 						DPFPRINTF(PF_DEBUG_MISC,
 						    ("pf: ICMPv6 short opt\n"));
 						return (PF_DROP);
 					}
 					if (pd2.proto == IPPROTO_AH)
 						off2 += (opt6.ip6e_len + 2) * 4;
 					else
 						off2 += (opt6.ip6e_len + 1) * 8;
 					pd2.proto = opt6.ip6e_nxt;
 					/* goto the next header */
 					break;
 				}
 				default:
 					terminal++;
 					break;
 				}
 			} while (!terminal);
 			break;
 #endif /* INET6 */
 #ifdef __FreeBSD__
 		default:
 			panic("AF not supported: %d", pd->af);
 #endif
 		}
 
 		switch (pd2.proto) {
 		case IPPROTO_TCP: {
 			struct tcphdr		 th;
 			u_int32_t		 seq;
 			struct pf_state_peer	*src, *dst;
 			u_int8_t		 dws;
 			int			 copyback = 0;
 
 			/*
 			 * Only the first 8 bytes of the TCP header can be
 			 * expected. Don't access any TCP header fields after
 			 * th_seq, an ackskew test is not possible.
 			 */
 			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
 			    pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(tcp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_TCP;
 			if (direction == PF_IN)	{
 				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
 				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
 				key.ext.port = th.th_dport;
 				key.gwy.port = th.th_sport;
 			} else {
 				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
 				PF_ACPY(&key.ext.addr, pd2.src, key.af);
 				key.lan.port = th.th_dport;
 				key.ext.port = th.th_sport;
 			}
 
 			STATE_LOOKUP();
 
 			if (direction == (*state)->direction) {
 				src = &(*state)->dst;
 				dst = &(*state)->src;
 			} else {
 				src = &(*state)->src;
 				dst = &(*state)->dst;
 			}
 
 			if (src->wscale && dst->wscale)
 				dws = dst->wscale & PF_WSCALE_MASK;
 			else
 				dws = 0;
 
 			/* Demodulate sequence number */
 			seq = ntohl(th.th_seq) - src->seqdiff;
 			if (src->seqdiff) {
 				pf_change_a(&th.th_seq, icmpsum,
 				    htonl(seq), 0);
 				copyback = 1;
 			}
 
 			if (!SEQ_GEQ(src->seqhi, seq) ||
 			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) {
 				if (pf_status.debug >= PF_DEBUG_MISC) {
 					printf("pf: BAD ICMP %d:%d ",
 					    icmptype, pd->hdr.icmp->icmp_code);
 					pf_print_host(pd->src, 0, pd->af);
 					printf(" -> ");
 					pf_print_host(pd->dst, 0, pd->af);
 					printf(" state: ");
 					pf_print_state(*state);
 					printf(" seq=%u\n", seq);
 				}
 				REASON_SET(reason, PFRES_BADSTATE);
 				return (PF_DROP);
 			}
 
 			if (STATE_TRANSLATE(*state)) {
 				if (direction == PF_IN) {
 					pf_change_icmp(pd2.src, &th.th_sport,
 					    daddr, &(*state)->lan.addr,
 					    (*state)->lan.port, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 				} else {
 					pf_change_icmp(pd2.dst, &th.th_dport,
 					    saddr, &(*state)->gwy.addr,
 					    (*state)->gwy.port, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 				}
 				copyback = 1;
 			}
 
 			if (copyback) {
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t)pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2),
 					    (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t)pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t)&h2_6);
 					break;
 #endif /* INET6 */
 				}
 				m_copyback(m, off2, 8, (caddr_t)&th);
 			}
 
 			return (PF_PASS);
 			break;
 		}
 		case IPPROTO_UDP: {
 			struct udphdr		uh;
 
 			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(udp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_UDP;
 			if (direction == PF_IN)	{
 				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
 				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
 				key.ext.port = uh.uh_dport;
 				key.gwy.port = uh.uh_sport;
 			} else {
 				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
 				PF_ACPY(&key.ext.addr, pd2.src, key.af);
 				key.lan.port = uh.uh_dport;
 				key.ext.port = uh.uh_sport;
 			}
 
 			STATE_LOOKUP();
 
 			if (STATE_TRANSLATE(*state)) {
 				if (direction == PF_IN) {
 					pf_change_icmp(pd2.src, &uh.uh_sport,
 					    daddr, &(*state)->lan.addr,
 					    (*state)->lan.port, &uh.uh_sum,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 1, pd2.af);
 				} else {
 					pf_change_icmp(pd2.dst, &uh.uh_dport,
 					    saddr, &(*state)->gwy.addr,
 					    (*state)->gwy.port, &uh.uh_sum,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 1, pd2.af);
 				}
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t)pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2),
 					    (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t)pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t)&h2_6);
 					break;
 #endif /* INET6 */
 				}
 				m_copyback(m, off2, sizeof(uh),
 				    (caddr_t)&uh);
 			}
 
 			return (PF_PASS);
 			break;
 		}
 #ifdef INET
 		case IPPROTO_ICMP: {
 			struct icmp		iih;
 
 			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short i"
 				    "(icmp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_ICMP;
 			if (direction == PF_IN)	{
 				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
 				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
 				key.ext.port = 0;
 				key.gwy.port = iih.icmp_id;
 			} else {
 				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
 				PF_ACPY(&key.ext.addr, pd2.src, key.af);
 				key.lan.port = iih.icmp_id;
 				key.ext.port = 0;
 			}
 
 			STATE_LOOKUP();
 
 			if (STATE_TRANSLATE(*state)) {
 				if (direction == PF_IN) {
 					pf_change_icmp(pd2.src, &iih.icmp_id,
 					    daddr, &(*state)->lan.addr,
 					    (*state)->lan.port, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET);
 				} else {
 					pf_change_icmp(pd2.dst, &iih.icmp_id,
 					    saddr, &(*state)->gwy.addr,
 					    (*state)->gwy.port, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET);
 				}
 				m_copyback(m, off, ICMP_MINLEN,
 				    (caddr_t)pd->hdr.icmp);
 				m_copyback(m, ipoff2, sizeof(h2),
 				    (caddr_t)&h2);
 				m_copyback(m, off2, ICMP_MINLEN,
 				    (caddr_t)&iih);
 			}
 
 			return (PF_PASS);
 			break;
 		}
 #endif /* INET */
 #ifdef INET6
 		case IPPROTO_ICMPV6: {
 			struct icmp6_hdr	iih;
 
 			if (!pf_pull_hdr(m, off2, &iih,
 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(icmp6)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_ICMPV6;
 			if (direction == PF_IN)	{
 				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
 				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
 				key.ext.port = 0;
 				key.gwy.port = iih.icmp6_id;
 			} else {
 				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
 				PF_ACPY(&key.ext.addr, pd2.src, key.af);
 				key.lan.port = iih.icmp6_id;
 				key.ext.port = 0;
 			}
 
 			STATE_LOOKUP();
 
 			if (STATE_TRANSLATE(*state)) {
 				if (direction == PF_IN) {
 					pf_change_icmp(pd2.src, &iih.icmp6_id,
 					    daddr, &(*state)->lan.addr,
 					    (*state)->lan.port, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET6);
 				} else {
 					pf_change_icmp(pd2.dst, &iih.icmp6_id,
 					    saddr, &(*state)->gwy.addr,
 					    (*state)->gwy.port, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET6);
 				}
 				m_copyback(m, off, sizeof(struct icmp6_hdr),
 				    (caddr_t)pd->hdr.icmp6);
 				m_copyback(m, ipoff2, sizeof(h2_6),
 				    (caddr_t)&h2_6);
 				m_copyback(m, off2, sizeof(struct icmp6_hdr),
 				    (caddr_t)&iih);
 			}
 
 			return (PF_PASS);
 			break;
 		}
 #endif /* INET6 */
 		default: {
 			key.af = pd2.af;
 			key.proto = pd2.proto;
 			if (direction == PF_IN)	{
 				PF_ACPY(&key.ext.addr, pd2.dst, key.af);
 				PF_ACPY(&key.gwy.addr, pd2.src, key.af);
 				key.ext.port = 0;
 				key.gwy.port = 0;
 			} else {
 				PF_ACPY(&key.lan.addr, pd2.dst, key.af);
 				PF_ACPY(&key.ext.addr, pd2.src, key.af);
 				key.lan.port = 0;
 				key.ext.port = 0;
 			}
 
 			STATE_LOOKUP();
 
 			if (STATE_TRANSLATE(*state)) {
 				if (direction == PF_IN) {
 					pf_change_icmp(pd2.src, NULL,
 					    daddr, &(*state)->lan.addr,
 					    0, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 				} else {
 					pf_change_icmp(pd2.dst, NULL,
 					    saddr, &(*state)->gwy.addr,
 					    0, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 				}
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t)pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2),
 					    (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t)pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t)&h2_6);
 					break;
 #endif /* INET6 */
 				}
 			}
 
 			return (PF_PASS);
 			break;
 		}
 		}
 	}
 }
 
 int
 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct pf_pdesc *pd)
 {
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_cmp	 key;
 
 	key.af = pd->af;
 	key.proto = pd->proto;
 	if (direction == PF_IN)	{
 		PF_ACPY(&key.ext.addr, pd->src, key.af);
 		PF_ACPY(&key.gwy.addr, pd->dst, key.af);
 		key.ext.port = 0;
 		key.gwy.port = 0;
 	} else {
 		PF_ACPY(&key.lan.addr, pd->src, key.af);
 		PF_ACPY(&key.ext.addr, pd->dst, key.af);
 		key.lan.port = 0;
 		key.ext.port = 0;
 	}
 
 	STATE_LOOKUP();
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	/* update states */
 	if (src->state < PFOTHERS_SINGLE)
 		src->state = PFOTHERS_SINGLE;
 	if (dst->state == PFOTHERS_SINGLE)
 		dst->state = PFOTHERS_MULTIPLE;
 
 	/* update expire time */
 	(*state)->expire = time_second;
 	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
 		(*state)->timeout = PFTM_OTHER_MULTIPLE;
 	else
 		(*state)->timeout = PFTM_OTHER_SINGLE;
 
 	/* translate source/destination address, if necessary */
 	if (STATE_TRANSLATE(*state)) {
 		if (direction == PF_OUT)
 			switch (pd->af) {
 #ifdef INET
 			case AF_INET:
 				pf_change_a(&pd->src->v4.s_addr,
 				    pd->ip_sum, (*state)->gwy.addr.v4.s_addr,
 				    0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				PF_ACPY(pd->src, &(*state)->gwy.addr, pd->af);
 				break;
 #endif /* INET6 */
 			}
 		else
 			switch (pd->af) {
 #ifdef INET
 			case AF_INET:
 				pf_change_a(&pd->dst->v4.s_addr,
 				    pd->ip_sum, (*state)->lan.addr.v4.s_addr,
 				    0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				PF_ACPY(pd->dst, &(*state)->lan.addr, pd->af);
 				break;
 #endif /* INET6 */
 			}
 	}
 
 	return (PF_PASS);
 }
 
 /*
  * ipoff and off are measured from the start of the mbuf chain.
  * h must be at "ipoff" on the mbuf chain.
  */
 void *
 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
     u_short *actionp, u_short *reasonp, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		struct ip	*h = mtod(m, struct ip *);
 		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 
 		if (fragoff) {
 			if (fragoff >= len)
 				ACTION_SET(actionp, PF_PASS);
 			else {
 				ACTION_SET(actionp, PF_DROP);
 				REASON_SET(reasonp, PFRES_FRAG);
 			}
 			return (NULL);
 		}
 		if (m->m_pkthdr.len < off + len ||
 		    ntohs(h->ip_len) < off + len) {
 			ACTION_SET(actionp, PF_DROP);
 			REASON_SET(reasonp, PFRES_SHORT);
 			return (NULL);
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
 
 		if (m->m_pkthdr.len < off + len ||
 		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
 		    (unsigned)(off + len)) {
 			ACTION_SET(actionp, PF_DROP);
 			REASON_SET(reasonp, PFRES_SHORT);
 			return (NULL);
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 	m_copydata(m, off, len, p);
 	return (p);
 }
 
 int
 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
 {
 	struct sockaddr_in	*dst;
 	int			 ret = 1;
 	int			 check_mpath;
 #ifndef __FreeBSD__
 	extern int		 ipmultipath;
 #endif
 #ifdef INET6
 #ifndef __FreeBSD__
 	extern int		 ip6_multipath;
 #endif
 	struct sockaddr_in6	*dst6;
 	struct route_in6	 ro;
 #else
 	struct route		 ro;
 #endif
 	struct radix_node	*rn;
 	struct rtentry		*rt;
 	struct ifnet		*ifp;
 
 	check_mpath = 0;
 	bzero(&ro, sizeof(ro));
 	switch (af) {
 	case AF_INET:
 		dst = satosin(&ro.ro_dst);
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = addr->v4;
 #ifndef __FreeBSD__	/* MULTIPATH_ROUTING */
 		if (ipmultipath)
 			check_mpath = 1;
 #endif
 		break;
 #ifdef INET6
 	case AF_INET6:
 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_len = sizeof(*dst6);
 		dst6->sin6_addr = addr->v6;
 #ifndef __FreeBSD__	/* MULTIPATH_ROUTING */
 		if (ip6_multipath)
 			check_mpath = 1;
 #endif
 		break;
 #endif /* INET6 */
 	default:
 		return (0);
 	}
 
 	/* Skip checks for ipsec interfaces */
 	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
 		goto out;
 
 #ifdef __FreeBSD__
-	rtalloc_ign((struct route *)&ro, RTF_CLONING);
+/* XXX MRT not always INET */ /* stick with table 0 though */
+	if (af == AF_INET)
+		in_rtalloc_ign((struct route *)&ro, RTF_CLONING, 0);
+	else
+		rtalloc_ign((struct route *)&ro, RTF_CLONING);
 #else /* ! __FreeBSD__ */
 	rtalloc_noclone((struct route *)&ro, NO_CLONING);
 #endif
 
 	if (ro.ro_rt != NULL) {
 		/* No interface given, this is a no-route check */
 		if (kif == NULL)
 			goto out;
 
 		if (kif->pfik_ifp == NULL) {
 			ret = 0;
 			goto out;
 		}
 
 		/* Perform uRPF check if passed input interface */
 		ret = 0;
 		rn = (struct radix_node *)ro.ro_rt;
 		do {
 			rt = (struct rtentry *)rn;
 #ifndef __FreeBSD__ /* CARPDEV */
 			if (rt->rt_ifp->if_type == IFT_CARP)
 				ifp = rt->rt_ifp->if_carpdev;
 			else
 #endif
 				ifp = rt->rt_ifp;
 
 			if (kif->pfik_ifp == ifp)
 				ret = 1;
 #ifdef __FreeBSD__ /* MULTIPATH_ROUTING */
 			rn = NULL;
 #else
 			rn = rn_mpath_next(rn);
 #endif
 		} while (check_mpath == 1 && rn != NULL && ret == 0);
 	} else
 		ret = 0;
 out:
 	if (ro.ro_rt != NULL)
 		RTFREE(ro.ro_rt);
 	return (ret);
 }
 
 int
 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
 {
 	struct sockaddr_in	*dst;
 #ifdef INET6
 	struct sockaddr_in6	*dst6;
 	struct route_in6	 ro;
 #else
 	struct route		 ro;
 #endif
 	int			 ret = 0;
 
 	bzero(&ro, sizeof(ro));
 	switch (af) {
 	case AF_INET:
 		dst = satosin(&ro.ro_dst);
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = addr->v4;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_len = sizeof(*dst6);
 		dst6->sin6_addr = addr->v6;
 		break;
 #endif /* INET6 */
 	default:
 		return (0);
 	}
 
 #ifdef __FreeBSD__
 # ifdef RTF_PRCLONING
 	rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING));
 # else /* !RTF_PRCLONING */
-	rtalloc_ign((struct route *)&ro, RTF_CLONING);
+	if (af == AF_INET)
+		in_rtalloc_ign((struct route *)&ro, RTF_CLONING, 0);
+	else
+		rtalloc_ign((struct route *)&ro, RTF_CLONING);
 # endif
 #else /* ! __FreeBSD__ */
 	rtalloc_noclone((struct route *)&ro, NO_CLONING);
 #endif
 
 	if (ro.ro_rt != NULL) {
 #ifdef __FreeBSD__
 		/* XXX_IMPORT: later */
 #else
 		if (ro.ro_rt->rt_labelid == aw->v.rtlabel)
 			ret = 1;
 #endif
 		RTFREE(ro.ro_rt);
 	}
 
 	return (ret);
 }
 
 #ifdef INET
 
 void
 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0, *m1;
 	struct route		 iproute;
 	struct route		*ro = NULL;
 	struct sockaddr_in	*dst;
 	struct ip		*ip;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 	int			 error = 0;
 #ifdef __FreeBSD__
 	int sw_csum;
 #endif
 #ifdef IPSEC
 	struct m_tag		*mtag;
 #endif /* IPSEC */
 
 	if (m == NULL || *m == NULL || r == NULL ||
 	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
 		panic("pf_route: invalid parameters");
 
 	if (pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
 		goto bad;
 	}
 
 	if (r->rt == PF_DUPTO) {
 #ifdef __FreeBSD__
 		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
 #else
 		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
 #endif
 			return;
 	} else {
 		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
 			return;
 		m0 = *m;
 	}
 
 	if (m0->m_len < sizeof(struct ip)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
 		goto bad;
 	}
 
 	ip = mtod(m0, struct ip *);
 
 	ro = &iproute;
 	bzero((caddr_t)ro, sizeof(*ro));
 	dst = satosin(&ro->ro_dst);
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_addr = ip->ip_dst;
 
 	if (r->rt == PF_FASTROUTE) {
-		rtalloc(ro);
+		in_rtalloc(ro, 0);
 		if (ro->ro_rt == 0) {
 			ipstat.ips_noroute++;
 			goto bad;
 		}
 
 		ifp = ro->ro_rt->rt_ifp;
 		ro->ro_rt->rt_use++;
 
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = satosin(ro->ro_rt->rt_gateway);
 	} else {
 		if (TAILQ_EMPTY(&r->rpool.list)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
 			goto bad;
 		}
 		if (s == NULL) {
 			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
 			    &naddr, NULL, &sn);
 			if (!PF_AZERO(&naddr, AF_INET))
 				dst->sin_addr.s_addr = naddr.v4.s_addr;
 			ifp = r->rpool.cur->kif ?
 			    r->rpool.cur->kif->pfik_ifp : NULL;
 		} else {
 			if (!PF_AZERO(&s->rt_addr, AF_INET))
 				dst->sin_addr.s_addr =
 				    s->rt_addr.v4.s_addr;
 			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 		}
 	}
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 		if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
 			PF_LOCK();
 			goto bad;
 		} else if (m0 == NULL) {
 			PF_LOCK();
 			goto done;
 		}
 		PF_LOCK();
 #else
 		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
 #endif
 		if (m0->m_len < sizeof(struct ip)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("pf_route: m0->m_len < sizeof(struct ip)\n"));
 			goto bad;
 		}
 		ip = mtod(m0, struct ip *);
 	}
 
 #ifdef __FreeBSD__
 	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
 	m0->m_pkthdr.csum_flags |= CSUM_IP;
 	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
 	if (sw_csum & CSUM_DELAY_DATA) {
 		/*
 		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
 		 */
 		NTOHS(ip->ip_len);
 		NTOHS(ip->ip_off);	 /* XXX: needed? */
 		in_delayed_cksum(m0);
 		HTONS(ip->ip_len);
 		HTONS(ip->ip_off);
 		sw_csum &= ~CSUM_DELAY_DATA;
 	}
 	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
 
 	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
 	    (ifp->if_hwassist & CSUM_FRAGMENT &&
 		((ip->ip_off & htons(IP_DF)) == 0))) {
 		/*
 		 * ip->ip_len = htons(ip->ip_len);
 		 * ip->ip_off = htons(ip->ip_off);
 		 */
 		ip->ip_sum = 0;
 		if (sw_csum & CSUM_DELAY_IP) {
 			/* From KAME */
 			if (ip->ip_v == IPVERSION &&
 			    (ip->ip_hl << 2) == sizeof(*ip)) {
 				ip->ip_sum = in_cksum_hdr(ip);
 			} else {
 				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
 			}
 		}
 		PF_UNLOCK();
 		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro->ro_rt);
 		PF_LOCK();
 		goto done;
 	}
 
 #else
 	/* Copied from ip_output. */
 #ifdef IPSEC
 	/*
 	 * If deferred crypto processing is needed, check that the
 	 * interface supports it.
 	 */
 	if ((mtag = m_tag_find(m0, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
 	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
 		/* Notify IPsec to do its own crypto. */
 		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
 		goto bad;
 	}
 #endif /* IPSEC */
 
 	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
 	if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT) {
 		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
 		    ifp->if_bridge != NULL) {
 			in_delayed_cksum(m0);
 			m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */
 		}
 	} else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) {
 		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
 		    ifp->if_bridge != NULL) {
 			in_delayed_cksum(m0);
 			m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */
 		}
 	}
 
 	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
 		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
 		    ifp->if_bridge == NULL) {
 			m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
 			ipstat.ips_outhwcsum++;
 		} else {
 			ip->ip_sum = 0;
 			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
 		}
 		/* Update relevant hardware checksum stats for TCP/UDP */
 		if (m0->m_pkthdr.csum_flags & M_TCPV4_CSUM_OUT)
 			tcpstat.tcps_outhwcsum++;
 		else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT)
 			udpstat.udps_outhwcsum++;
 		error = (*ifp->if_output)(ifp, m0, sintosa(dst), NULL);
 		goto done;
 	}
 #endif
 	/*
 	 * Too large for interface; fragment if possible.
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	if (ip->ip_off & htons(IP_DF)) {
 		ipstat.ips_cantfrag++;
 		if (r->rt != PF_DUPTO) {
 #ifdef __FreeBSD__
 			/* icmp_error() expects host byte ordering */
 			NTOHS(ip->ip_len);
 			NTOHS(ip->ip_off);
 			PF_UNLOCK();
 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
 			    ifp->if_mtu);
 			PF_LOCK();
 #else
 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
 			    ifp->if_mtu);
 #endif
 			goto done;
 		} else
 			goto bad;
 	}
 
 	m1 = m0;
 #ifdef __FreeBSD__
 	/*
 	 * XXX: is cheaper + less error prone than own function
 	 */
 	NTOHS(ip->ip_len);
 	NTOHS(ip->ip_off);
 	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
 #else
 	error = ip_fragment(m0, ifp, ifp->if_mtu);
 #endif
 	if (error) {
 #ifndef __FreeBSD__	/* ip_fragment does not do m_freem() on FreeBSD */
 		m0 = NULL;
 #endif
 		goto bad;
 	}
 
 	for (m0 = m1; m0; m0 = m1) {
 		m1 = m0->m_nextpkt;
 		m0->m_nextpkt = 0;
 #ifdef __FreeBSD__
 		if (error == 0) {
 			PF_UNLOCK();
 			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
 			    NULL);
 			PF_LOCK();
 		} else
 #else
 		if (error == 0)
 			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
 			    NULL);
 		else
 #endif
 			m_freem(m0);
 	}
 
 	if (error == 0)
 		ipstat.ips_fragmented++;
 
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
 	if (ro == &iproute && ro->ro_rt)
 		RTFREE(ro->ro_rt);
 	return;
 
 bad:
 	m_freem(m0);
 	goto done;
 }
 #endif /* INET */
 
 #ifdef INET6
 void
 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0;
 	struct route_in6	 ip6route;
 	struct route_in6	*ro;
 	struct sockaddr_in6	*dst;
 	struct ip6_hdr		*ip6;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 	int			 error = 0;
 
 	if (m == NULL || *m == NULL || r == NULL ||
 	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL)
 		panic("pf_route6: invalid parameters");
 
 	if (pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
 		goto bad;
 	}
 
 	if (r->rt == PF_DUPTO) {
 #ifdef __FreeBSD__
 		if ((m0 = m_dup(*m, M_DONTWAIT)) == NULL)
 #else
 		if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
 #endif
 			return;
 	} else {
 		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
 			return;
 		m0 = *m;
 	}
 
 	if (m0->m_len < sizeof(struct ip6_hdr)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
 		goto bad;
 	}
 	ip6 = mtod(m0, struct ip6_hdr *);
 
 	ro = &ip6route;
 	bzero((caddr_t)ro, sizeof(*ro));
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 	dst->sin6_family = AF_INET6;
 	dst->sin6_len = sizeof(*dst);
 	dst->sin6_addr = ip6->ip6_dst;
 
 	/* Cheat. XXX why only in the v6 case??? */
 	if (r->rt == PF_FASTROUTE) {
 #ifdef __FreeBSD__
 		m0->m_flags |= M_SKIP_FIREWALL;
 		PF_UNLOCK();
 		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		PF_LOCK();
 #else
 		mtag = m_tag_get(PACKET_TAG_PF_GENERATED, 0, M_NOWAIT);
 		if (mtag == NULL)
 			goto bad;
 		m_tag_prepend(m0, mtag);
 		pd->pf_mtag->flags |= PF_TAG_GENERATED;
 		ip6_output(m0, NULL, NULL, 0, NULL, NULL);
 #endif
 		return;
 	}
 
 	if (TAILQ_EMPTY(&r->rpool.list)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
 		goto bad;
 	}
 	if (s == NULL) {
 		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
 		    &naddr, NULL, &sn);
 		if (!PF_AZERO(&naddr, AF_INET6))
 			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
 			    &naddr, AF_INET6);
 		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
 	} else {
 		if (!PF_AZERO(&s->rt_addr, AF_INET6))
 			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
 			    &s->rt_addr, AF_INET6);
 		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 	}
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 		if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
 			PF_LOCK();
 			goto bad;
 		} else if (m0 == NULL) {
 			PF_LOCK();
 			goto done;
 		}
 		PF_LOCK();
 #else
 		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
 #endif
 		if (m0->m_len < sizeof(struct ip6_hdr)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n"));
 			goto bad;
 		}
 		ip6 = mtod(m0, struct ip6_hdr *);
 	}
 
 	/*
 	 * If the packet is too large for the outgoing interface,
 	 * send back an icmp6 error.
 	 */
 	if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
 		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		error = nd6_output(ifp, ifp, m0, dst, NULL);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 	} else {
 		in6_ifstat_inc(ifp, ifs6_in_toobig);
 #ifdef __FreeBSD__
 		if (r->rt != PF_DUPTO) {
 			PF_UNLOCK();
 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
 			PF_LOCK();
 		 } else
 #else
 		if (r->rt != PF_DUPTO)
 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
 		else
 #endif
 			goto bad;
 	}
 
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
 	return;
 
 bad:
 	m_freem(m0);
 	goto done;
 }
 #endif /* INET6 */
 
 
 #ifdef __FreeBSD__
 /*
  * FreeBSD supports cksum offloads for the following drivers.
  *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
  *   ti(4), txp(4), xl(4)
  *
  * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
  *  network driver performed cksum including pseudo header, need to verify
  *   csum_data
  * CSUM_DATA_VALID :
  *  network driver performed cksum, needs to additional pseudo header
  *  cksum computation with partial csum_data(i.e. lack of H/W support for
  *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
  *
  * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
  * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
  * TCP/UDP layer.
  * Also, set csum_data to 0xffff to force cksum validation.
  */
 int
 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
 {
 	u_int16_t sum = 0;
 	int hw_assist = 0;
 	struct ip *ip;
 
 	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
 		return (1);
 	if (m->m_pkthdr.len < off + len)
 		return (1);
 
 	switch (p) {
 	case IPPROTO_TCP:
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
 				sum = m->m_pkthdr.csum_data;
 			} else {
 				ip = mtod(m, struct ip *);	
 				sum = in_pseudo(ip->ip_src.s_addr,
 					ip->ip_dst.s_addr, htonl((u_short)len + 
 					m->m_pkthdr.csum_data + IPPROTO_TCP));
 			}
 			sum ^= 0xffff;
 			++hw_assist;
 		}
 		break;
 	case IPPROTO_UDP:
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
 				sum = m->m_pkthdr.csum_data;
 			} else {
 				ip = mtod(m, struct ip *);	
 				sum = in_pseudo(ip->ip_src.s_addr,
 					ip->ip_dst.s_addr, htonl((u_short)len +
 					m->m_pkthdr.csum_data + IPPROTO_UDP));
 			}
 			sum ^= 0xffff;
 			++hw_assist;
                 }
 		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 #endif /* INET6 */
 		break;
 	default:
 		return (1);
 	}
 
 	if (!hw_assist) {
 		switch (af) {
 		case AF_INET:
 			if (p == IPPROTO_ICMP) {
 				if (m->m_len < off)
 					return (1);
 				m->m_data += off;
 				m->m_len -= off;
 				sum = in_cksum(m, len);
 				m->m_data -= off;
 				m->m_len += off;
 			} else {
 				if (m->m_len < sizeof(struct ip))
 					return (1);
 				sum = in4_cksum(m, p, off, len);
 			}
 			break;
 #ifdef INET6
 		case AF_INET6:
 			if (m->m_len < sizeof(struct ip6_hdr))
 				return (1);
 			sum = in6_cksum(m, p, off, len);
 			break;
 #endif /* INET6 */
 		default:
 			return (1);
 		}
 	}
 	if (sum) {
 		switch (p) {
 		case IPPROTO_TCP:
 			tcpstat.tcps_rcvbadsum++;
 			break;
 		case IPPROTO_UDP:
 			udpstat.udps_badsum++;
 			break;
 		case IPPROTO_ICMP:
 			icmpstat.icps_checksum++;
 			break;
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 			icmp6stat.icp6s_checksum++;
 			break;
 #endif /* INET6 */
 		}
 		return (1);
 	} else {
 		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
 			m->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 	}
 	return (0);
 }
 #else /* !__FreeBSD__ */
 /*
  * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
  *   off is the offset where the protocol header starts
  *   len is the total length of protocol header plus payload
  * returns 0 when the checksum is valid, otherwise returns 1.
  */
 int
 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p,
     sa_family_t af)
 {
 	u_int16_t flag_ok, flag_bad;
 	u_int16_t sum;
 
 	switch (p) {
 	case IPPROTO_TCP:
 		flag_ok = M_TCP_CSUM_IN_OK;
 		flag_bad = M_TCP_CSUM_IN_BAD;
 		break;
 	case IPPROTO_UDP:
 		flag_ok = M_UDP_CSUM_IN_OK;
 		flag_bad = M_UDP_CSUM_IN_BAD;
 		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 #endif /* INET6 */
 		flag_ok = flag_bad = 0;
 		break;
 	default:
 		return (1);
 	}
 	if (m->m_pkthdr.csum_flags & flag_ok)
 		return (0);
 	if (m->m_pkthdr.csum_flags & flag_bad)
 		return (1);
 	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
 		return (1);
 	if (m->m_pkthdr.len < off + len)
 		return (1);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if (p == IPPROTO_ICMP) {
 			if (m->m_len < off)
 				return (1);
 			m->m_data += off;
 			m->m_len -= off;
 			sum = in_cksum(m, len);
 			m->m_data -= off;
 			m->m_len += off;
 		} else {
 			if (m->m_len < sizeof(struct ip))
 				return (1);
 			sum = in4_cksum(m, p, off, len);
 		}
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (m->m_len < sizeof(struct ip6_hdr))
 			return (1);
 		sum = in6_cksum(m, p, off, len);
 		break;
 #endif /* INET6 */
 	default:
 		return (1);
 	}
 	if (sum) {
 		m->m_pkthdr.csum_flags |= flag_bad;
 		switch (p) {
 		case IPPROTO_TCP:
 			tcpstat.tcps_rcvbadsum++;
 			break;
 		case IPPROTO_UDP:
 			udpstat.udps_badsum++;
 			break;
 		case IPPROTO_ICMP:
 			icmpstat.icps_checksum++;
 			break;
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 			icmp6stat.icp6s_checksum++;
 			break;
 #endif /* INET6 */
 		}
 		return (1);
 	}
 	m->m_pkthdr.csum_flags |= flag_ok;
 	return (0);
 }
 #endif /* __FreeBSD__ */
 
 #ifdef INET
 int
 #ifdef __FreeBSD__
 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
     struct ether_header *eh, struct inpcb *inp)
 #else
 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
     struct ether_header *eh)
 #endif
 {
 	struct pfi_kif		*kif;
 	u_short			 action, reason = 0, log = 0;
 	struct mbuf		*m = *m0;
 	struct ip		*h = NULL;	/* make the compiler happy */
 	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
 	struct pf_state		*s = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_pdesc		 pd;
 	int			 off, dirndx, pqid = 0;
 
 #ifdef __FreeBSD__
 	PF_LOCK();
 #endif
 	if (!pf_status.running)
 #ifdef __FreeBSD__
 	{
 		PF_UNLOCK();
 #endif
 		return (PF_PASS);
 #ifdef __FreeBSD__
 	}
 #endif
 
 	memset(&pd, 0, sizeof(pd));
 	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test: pf_get_mtag returned NULL\n"));
 		return (PF_DROP);
 	}
 #ifdef __FreeBSD__
 	if (m->m_flags & M_SKIP_FIREWALL) {
 		PF_UNLOCK();
 		return (PF_PASS);
 	}
 #else
 	if (pd.pf_mtag->flags & PF_TAG_GENERATED)
 		return (PF_PASS);
 #endif
 
 #ifdef __FreeBSD__
 	/* XXX_IMPORT: later */
 #else
 	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
 		ifp = ifp->if_carpdev;
 #endif
 
 	kif = (struct pfi_kif *)ifp->if_pf_kif;
 	if (kif == NULL) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		return (PF_PASS);
 	}
 
 #ifdef __FreeBSD__
 	M_ASSERTPKTHDR(m);
 #else
 #ifdef DIAGNOSTIC
 	if ((m->m_flags & M_PKTHDR) == 0)
 		panic("non-M_PKTHDR is passed to pf_test");
 #endif /* DIAGNOSTIC */
 #endif /* __FreeBSD__ */
 
 	if (m->m_pkthdr.len < (int)sizeof(*h)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_SHORT);
 		log = 1;
 		goto done;
 	}
 
 	/* We do IP header normalization and packet reassembly here */
 	if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
 		action = PF_DROP;
 		goto done;
 	}
 	m = *m0;
 	h = mtod(m, struct ip *);
 
 	off = h->ip_hl << 2;
 	if (off < (int)sizeof(*h)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_SHORT);
 		log = 1;
 		goto done;
 	}
 
 	pd.src = (struct pf_addr *)&h->ip_src;
 	pd.dst = (struct pf_addr *)&h->ip_dst;
 	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET);
 	pd.ip_sum = &h->ip_sum;
 	pd.proto = h->ip_p;
 	pd.af = AF_INET;
 	pd.tos = h->ip_tos;
 	pd.tot_len = ntohs(h->ip_len);
 	pd.eh = eh;
 
 	/* handle fragments that didn't get reassembled by normalization */
 	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		action = pf_test_fragment(&r, dir, kif, m, h,
 		    &pd, &a, &ruleset);
 		goto done;
 	}
 
 	switch (h->ip_p) {
 
 	case IPPROTO_TCP: {
 		struct tcphdr	th;
 
 		pd.hdr.tcp = &th;
 		if (!pf_pull_hdr(m, off, &th, sizeof(th),
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (dir == PF_IN && pf_check_proto_cksum(m, off,
 		    ntohs(h->ip_len) - off, IPPROTO_TCP, AF_INET)) {
 			REASON_SET(&reason, PFRES_PROTCKSUM);
 			action = PF_DROP;
 			goto done;
 		}
 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
 		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
 			pqid = 1;
 		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
 		if (action == PF_DROP)
 			goto done;
 		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 #if NPFSYNC
 			pfsync_update_state(s);
 #endif /* NPFSYNC */
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 #ifdef __FreeBSD__
 			action = pf_test_tcp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, NULL, inp);
 #else
 			action = pf_test_tcp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, &ipintrq);
 #endif
 		break;
 	}
 
 	case IPPROTO_UDP: {
 		struct udphdr	uh;
 
 		pd.hdr.udp = &uh;
 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(m,
 		    off, ntohs(h->ip_len) - off, IPPROTO_UDP, AF_INET)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_PROTCKSUM);
 			goto done;
 		}
 		if (uh.uh_dport == 0 ||
 		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_SHORT);
 			goto done;
 		}
 		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
 		if (action == PF_PASS) {
 #if NPFSYNC
 			pfsync_update_state(s);
 #endif /* NPFSYNC */
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 #ifdef __FreeBSD__
 			action = pf_test_udp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, NULL, inp);
 #else
 			action = pf_test_udp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, &ipintrq);
 #endif
 		break;
 	}
 
 	case IPPROTO_ICMP: {
 		struct icmp	ih;
 
 		pd.hdr.icmp = &ih;
 		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (dir == PF_IN && pf_check_proto_cksum(m, off,
 		    ntohs(h->ip_len) - off, IPPROTO_ICMP, AF_INET)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_PROTCKSUM);
 			goto done;
 		}
 		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 #if NPFSYNC
 			pfsync_update_state(s);
 #endif /* NPFSYNC */
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 #ifdef __FreeBSD__
 			action = pf_test_icmp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, NULL);
 #else
 			action = pf_test_icmp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, &ipintrq);
 #endif
 		break;
 	}
 
 	default:
 		action = pf_test_state_other(&s, dir, kif, &pd);
 		if (action == PF_PASS) {
 #if NPFSYNC
 			pfsync_update_state(s);
 #endif /* NPFSYNC */
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 #ifdef __FreeBSD__
 			action = pf_test_other(&r, &s, dir, kif, m, off, h,
 			    &pd, &a, &ruleset, NULL);
 #else
 			action = pf_test_other(&r, &s, dir, kif, m, off, h,
 			    &pd, &a, &ruleset, &ipintrq);
 #endif
 		break;
 	}
 
 done:
 	if (action == PF_PASS && h->ip_hl > 5 &&
 	    !((s && s->allow_opts) || r->allow_opts)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		log = 1;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping packet with ip options\n"));
 	}
 
 	if ((s && s->tag) || r->rtableid)
 		pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
 
 #ifdef ALTQ
 	if (action == PF_PASS && r->qid) {
 		if (pqid || (pd.tos & IPTOS_LOWDELAY))
 			pd.pf_mtag->qid = r->pqid;
 		else
 			pd.pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pd.pf_mtag->af = AF_INET;
 		pd.pf_mtag->hdr = h;
 	}
 #endif /* ALTQ */
 
 	/*
 	 * connections redirected to loopback should not match sockets
 	 * bound specifically to loopback due to security implications,
 	 * see tcp_input() and in_pcblookup_listen().
 	 */
 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
 	    (s->nat_rule.ptr->action == PF_RDR ||
 	    s->nat_rule.ptr->action == PF_BINAT) &&
 	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
 		pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
 
 	if (log) {
 		struct pf_rule *lr;
 
 		if (s != NULL && s->nat_rule.ptr != NULL &&
 		    s->nat_rule.ptr->log & PF_LOG_ALL)
 			lr = s->nat_rule.ptr;
 		else
 			lr = r;
 		PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset,
 		    &pd);
 	}
 
 	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
 	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
 
 	if (action == PF_PASS || r->action == PF_DROP) {
 		dirndx = (dir == PF_OUT);
 		r->packets[dirndx]++;
 		r->bytes[dirndx] += pd.tot_len;
 		if (a != NULL) {
 			a->packets[dirndx]++;
 			a->bytes[dirndx] += pd.tot_len;
 		}
 		if (s != NULL) {
 			if (s->nat_rule.ptr != NULL) {
 				s->nat_rule.ptr->packets[dirndx]++;
 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->src_node != NULL) {
 				s->src_node->packets[dirndx]++;
 				s->src_node->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->nat_src_node != NULL) {
 				s->nat_src_node->packets[dirndx]++;
 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
 			}
 			dirndx = (dir == s->direction) ? 0 : 1;
 			s->packets[dirndx]++;
 			s->bytes[dirndx] += pd.tot_len;
 		}
 		tr = r;
 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
 		if (nr != NULL) {
 			struct pf_addr *x;
 			/*
 			 * XXX: we need to make sure that the addresses
 			 * passed to pfr_update_stats() are the same than
 			 * the addresses used during matching (pfr_match)
 			 */
 			if (r == &pf_default_rule) {
 				tr = nr;
 				x = (s == NULL || s->direction == dir) ?
 				    &pd.baddr : &pd.naddr;
 			} else
 				x = (s == NULL || s->direction == dir) ?
 				    &pd.naddr : &pd.baddr;
 			if (x == &pd.baddr || s == NULL) {
 				/* we need to change the address */
 				if (dir == PF_OUT)
 					pd.src = x;
 				else
 					pd.dst = x;
 			}
 		}
 		if (tr->src.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
 			    s->direction == dir) ? pd.src : pd.dst, pd.af,
 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
 			    tr->src.neg);
 		if (tr->dst.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
 			    s->direction == dir) ? pd.dst : pd.src, pd.af,
 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
 			    tr->dst.neg);
 	}
 
 
 	if (action == PF_SYNPROXY_DROP) {
 		m_freem(*m0);
 		*m0 = NULL;
 		action = PF_PASS;
 	} else if (r->rt)
 		/* pf_route can free the mbuf causing *m0 to become NULL */
 		pf_route(m0, r, dir, ifp, s, &pd);
 
 #ifdef __FreeBSD__
 	PF_UNLOCK();
 #endif
 
 	return (action);
 }
 #endif /* INET */
 
 #ifdef INET6
 int
 #ifdef __FreeBSD__
 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
     struct ether_header *eh, struct inpcb *inp)
 #else
 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0,
     struct ether_header *eh)
 #endif
 {
 	struct pfi_kif		*kif;
 	u_short			 action, reason = 0, log = 0;
 	struct mbuf		*m = *m0, *n = NULL;
 	struct ip6_hdr		*h;
 	struct pf_rule		*a = NULL, *r = &pf_default_rule, *tr, *nr;
 	struct pf_state		*s = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_pdesc		 pd;
 	int			 off, terminal = 0, dirndx, rh_cnt = 0;
 
 #ifdef __FreeBSD__
 	PF_LOCK();
 #endif
 
 	if (!pf_status.running)
 #ifdef __FreeBSD__
 	{
 		PF_UNLOCK();
 #endif
 		return (PF_PASS);
 #ifdef __FreeBSD__
 	}
 #endif
 
 	memset(&pd, 0, sizeof(pd));
 	if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test6: pf_get_mtag returned NULL\n"));
 		return (PF_DROP);
 	}
 	if (pd.pf_mtag->flags & PF_TAG_GENERATED)
 		return (PF_PASS);
 
 #ifdef __FreeBSD__
 	/* XXX_IMPORT: later */
 #else
 	if (ifp->if_type == IFT_CARP && ifp->if_carpdev)
 		ifp = ifp->if_carpdev;
 #endif
 
 	kif = (struct pfi_kif *)ifp->if_pf_kif;
 	if (kif == NULL) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		return (PF_PASS);
 	}
 
 #ifdef __FreeBSD__
 	M_ASSERTPKTHDR(m);
 #else
 #ifdef DIAGNOSTIC
 	if ((m->m_flags & M_PKTHDR) == 0)
 		panic("non-M_PKTHDR is passed to pf_test6");
 #endif /* DIAGNOSTIC */
 #endif
 
 #ifdef __FreeBSD__
 	h = NULL;	/* make the compiler happy */
 #endif
 
 	if (m->m_pkthdr.len < (int)sizeof(*h)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_SHORT);
 		log = 1;
 		goto done;
 	}
 
 	/* We do IP header normalization and packet reassembly here */
 	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
 		action = PF_DROP;
 		goto done;
 	}
 	m = *m0;
 	h = mtod(m, struct ip6_hdr *);
 
 #if 1
 	/*
 	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
 	 * will do something bad, so drop the packet for now.
 	 */
 	if (htons(h->ip6_plen) == 0) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
 		goto done;
 	}
 #endif
 
 	pd.src = (struct pf_addr *)&h->ip6_src;
 	pd.dst = (struct pf_addr *)&h->ip6_dst;
 	PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6);
 	pd.ip_sum = NULL;
 	pd.af = AF_INET6;
 	pd.tos = 0;
 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
 	pd.eh = eh;
 
 	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
 	pd.proto = h->ip6_nxt;
 	do {
 		switch (pd.proto) {
 		case IPPROTO_FRAGMENT:
 			action = pf_test_fragment(&r, dir, kif, m, h,
 			    &pd, &a, &ruleset);
 			if (action == PF_DROP)
 				REASON_SET(&reason, PFRES_FRAG);
 			goto done;
 		case IPPROTO_ROUTING: {
 			struct ip6_rthdr rthdr;
 
 			if (rh_cnt++) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 more than one rthdr\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_IPOPTIONS);
 				log = 1;
 				goto done;
 			}
 			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
 			    &reason, pd.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 short rthdr\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_SHORT);
 				log = 1;
 				goto done;
 			}
 			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 rthdr0\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_IPOPTIONS);
 				log = 1;
 				goto done;
 			}
 			/* fallthrough */
 		}
 		case IPPROTO_AH:
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_DSTOPTS: {
 			/* get next header and header length */
 			struct ip6_ext	opt6;
 
 			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
 			    NULL, &reason, pd.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 short opt\n"));
 				action = PF_DROP;
 				log = 1;
 				goto done;
 			}
 			if (pd.proto == IPPROTO_AH)
 				off += (opt6.ip6e_len + 2) * 4;
 			else
 				off += (opt6.ip6e_len + 1) * 8;
 			pd.proto = opt6.ip6e_nxt;
 			/* goto the next header */
 			break;
 		}
 		default:
 			terminal++;
 			break;
 		}
 	} while (!terminal);
 
 	/* if there's no routing header, use unmodified mbuf for checksumming */
 	if (!n)
 		n = m;
 
 	switch (pd.proto) {
 
 	case IPPROTO_TCP: {
 		struct tcphdr	th;
 
 		pd.hdr.tcp = &th;
 		if (!pf_pull_hdr(m, off, &th, sizeof(th),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (dir == PF_IN && pf_check_proto_cksum(n, off,
 		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
 		    IPPROTO_TCP, AF_INET6)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_PROTCKSUM);
 			goto done;
 		}
 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
 		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
 		if (action == PF_DROP)
 			goto done;
 		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 #if NPFSYNC
 			pfsync_update_state(s);
 #endif /* NPFSYNC */
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 #ifdef __FreeBSD__
 			action = pf_test_tcp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, NULL, inp);
 #else
 			action = pf_test_tcp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
 #endif
 		break;
 	}
 
 	case IPPROTO_UDP: {
 		struct udphdr	uh;
 
 		pd.hdr.udp = &uh;
 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (dir == PF_IN && uh.uh_sum && pf_check_proto_cksum(n,
 		    off, ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
 		    IPPROTO_UDP, AF_INET6)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_PROTCKSUM);
 			goto done;
 		}
 		if (uh.uh_dport == 0 ||
 		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_SHORT);
 			goto done;
 		}
 		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
 		if (action == PF_PASS) {
 #if NPFSYNC
 			pfsync_update_state(s);
 #endif /* NPFSYNC */
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 #ifdef __FreeBSD__
 			action = pf_test_udp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, NULL, inp);
 #else
 			action = pf_test_udp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
 #endif
 		break;
 	}
 
 	case IPPROTO_ICMPV6: {
 		struct icmp6_hdr	ih;
 
 		pd.hdr.icmp6 = &ih;
 		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (dir == PF_IN && pf_check_proto_cksum(n, off,
 		    ntohs(h->ip6_plen) - (off - sizeof(struct ip6_hdr)),
 		    IPPROTO_ICMPV6, AF_INET6)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_PROTCKSUM);
 			goto done;
 		}
 		action = pf_test_state_icmp(&s, dir, kif,
 		    m, off, h, &pd, &reason);
 		if (action == PF_PASS) {
 #if NPFSYNC
 			pfsync_update_state(s);
 #endif /* NPFSYNC */
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 #ifdef __FreeBSD__
 			action = pf_test_icmp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, NULL);
 #else
 			action = pf_test_icmp(&r, &s, dir, kif,
 			    m, off, h, &pd, &a, &ruleset, &ip6intrq);
 #endif
 		break;
 	}
 
 	default:
 		action = pf_test_state_other(&s, dir, kif, &pd);
 		if (action == PF_PASS) {
 #if NPFSYNC
 			pfsync_update_state(s);
 #endif /* NPFSYNC */
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 #ifdef __FreeBSD__
 			action = pf_test_other(&r, &s, dir, kif, m, off, h,
 			    &pd, &a, &ruleset, NULL);
 #else
 			action = pf_test_other(&r, &s, dir, kif, m, off, h,
 			    &pd, &a, &ruleset, &ip6intrq);
 #endif
 		break;
 	}
 
 done:
 	/* handle dangerous IPv6 extension headers. */
 	if (action == PF_PASS && rh_cnt &&
 	    !((s && s->allow_opts) || r->allow_opts)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		log = 1;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping packet with dangerous v6 headers\n"));
 	}
 
 	if ((s && s->tag) || r->rtableid)
 		pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid);
 
 #ifdef ALTQ
 	if (action == PF_PASS && r->qid) {
 		if (pd.tos & IPTOS_LOWDELAY)
 			pd.pf_mtag->qid = r->pqid;
 		else
 			pd.pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pd.pf_mtag->af = AF_INET6;
 		pd.pf_mtag->hdr = h;
 	}
 #endif /* ALTQ */
 
 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
 	    (s->nat_rule.ptr->action == PF_RDR ||
 	    s->nat_rule.ptr->action == PF_BINAT) &&
 	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
 		pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST;
 
 	if (log) {
 		struct pf_rule *lr;
 
 		if (s != NULL && s->nat_rule.ptr != NULL &&
 		    s->nat_rule.ptr->log & PF_LOG_ALL)
 			lr = s->nat_rule.ptr;
 		else
 			lr = r;
 		PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset,
 		    &pd);
 	}
 
 	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
 	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
 
 	if (action == PF_PASS || r->action == PF_DROP) {
 		dirndx = (dir == PF_OUT);
 		r->packets[dirndx]++;
 		r->bytes[dirndx] += pd.tot_len;
 		if (a != NULL) {
 			a->packets[dirndx]++;
 			a->bytes[dirndx] += pd.tot_len;
 		}
 		if (s != NULL) {
 			if (s->nat_rule.ptr != NULL) {
 				s->nat_rule.ptr->packets[dirndx]++;
 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->src_node != NULL) {
 				s->src_node->packets[dirndx]++;
 				s->src_node->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->nat_src_node != NULL) {
 				s->nat_src_node->packets[dirndx]++;
 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
 			}
 			dirndx = (dir == s->direction) ? 0 : 1;
 			s->packets[dirndx]++;
 			s->bytes[dirndx] += pd.tot_len;
 		}
 		tr = r;
 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
 		if (nr != NULL) {
 			struct pf_addr *x;
 			/*
 			 * XXX: we need to make sure that the addresses
 			 * passed to pfr_update_stats() are the same than
 			 * the addresses used during matching (pfr_match)
 			 */
 			if (r == &pf_default_rule) {
 				tr = nr;
 				x = (s == NULL || s->direction == dir) ?
 				    &pd.baddr : &pd.naddr;
 			} else {
 				x = (s == NULL || s->direction == dir) ?
 				    &pd.naddr : &pd.baddr;
 			}
 			if (x == &pd.baddr || s == NULL) {
 				if (dir == PF_OUT)
 					pd.src = x;
 				else
 					pd.dst = x;
 			}
 		}
 		if (tr->src.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->src.addr.p.tbl, (s == NULL ||
 			    s->direction == dir) ? pd.src : pd.dst, pd.af,
 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
 			    tr->src.neg);
 		if (tr->dst.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL ||
 			    s->direction == dir) ? pd.dst : pd.src, pd.af,
 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
 			    tr->dst.neg);
 	}
 
 
 	if (action == PF_SYNPROXY_DROP) {
 		m_freem(*m0);
 		*m0 = NULL;
 		action = PF_PASS;
 	} else if (r->rt)
 		/* pf_route6 can free the mbuf causing *m0 to become NULL */
 		pf_route6(m0, r, dir, ifp, s, &pd);
 
 #ifdef __FreeBSD__
 	PF_UNLOCK();
 #endif
 	return (action);
 }
 #endif /* INET6 */
 
 int
 pf_check_congestion(struct ifqueue *ifq)
 {
 #ifdef __FreeBSD__
 	/* XXX_IMPORT: later */
 	return (0);
 #else
 	if (ifq->ifq_congestion)
 		return (1);
 	else
 		return (0);
 #endif
 }
Index: head/sys/contrib/pf/net/pf_ioctl.c
===================================================================
--- head/sys/contrib/pf/net/pf_ioctl.c	(revision 178887)
+++ head/sys/contrib/pf/net/pf_ioctl.c	(revision 178888)
@@ -1,3894 +1,3894 @@
 /*	$OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
  * Copyright (c) 2002,2003 Henning Brauer
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Effort sponsored in part by the Defense Advanced Research Projects
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
  */
 
 #ifdef __FreeBSD__
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #endif
 
 #ifdef __FreeBSD__
 #include "opt_bpf.h"
 #include "opt_pf.h"
 
 #ifdef DEV_BPF
 #define	NBPFILTER	DEV_BPF
 #else
 #define	NBPFILTER	0
 #endif
 
 #ifdef DEV_PFLOG
 #define	NPFLOG		DEV_PFLOG
 #else
 #define	NPFLOG		0
 #endif
 
 #ifdef DEV_PFSYNC
 #define	NPFSYNC		DEV_PFSYNC
 #else
 #define	NPFSYNC		0
 #endif
 
 #else
 #include "bpfilter.h"
 #include "pflog.h"
 #include "pfsync.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/filio.h>
 #include <sys/fcntl.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
 #include <sys/time.h>
 #include <sys/malloc.h>
 #ifdef __FreeBSD__
 #include <sys/module.h>
 #include <sys/conf.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #else
 #include <sys/timeout.h>
 #include <sys/pool.h>
 #endif
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #ifndef __FreeBSD__
 #include <sys/rwlock.h>
 #include <uvm/uvm_extern.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 
 #ifdef __FreeBSD__
 #include <sys/md5.h>
 #else
 #include <dev/rndvar.h>
 #include <crypto/md5.h>
 #endif
 #include <net/pfvar.h>
 
 #if NPFSYNC > 0
 #include <net/if_pfsync.h>
 #endif /* NPFSYNC > 0 */
 
 #include <net/if_pflog.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/in_pcb.h>
 #endif /* INET6 */
 
 #ifdef ALTQ
 #include <altq/altq.h>
 #endif
 
 #ifdef __FreeBSD__
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <net/pfil.h>
 #endif /* __FreeBSD__ */
 
 #ifdef __FreeBSD__
 void			 init_zone_var(void);
 void			 cleanup_pf_zone(void);
 int			 pfattach(void);
 #else
 void			 pfattach(int);
 void			 pf_thread_create(void *);
 int			 pfopen(dev_t, int, int, struct proc *);
 int			 pfclose(dev_t, int, int, struct proc *);
 #endif
 struct pf_pool		*pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t,
 			    u_int8_t, u_int8_t, u_int8_t);
 
 void			 pf_mv_pool(struct pf_palist *, struct pf_palist *);
 void			 pf_empty_pool(struct pf_palist *);
 #ifdef __FreeBSD__
 int			 pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *);
 #else
 int			 pfioctl(struct cdev *, u_long, caddr_t, int, struct proc *);
 #endif
 #ifdef ALTQ
 int			 pf_begin_altq(u_int32_t *);
 int			 pf_rollback_altq(u_int32_t);
 int			 pf_commit_altq(u_int32_t);
 int			 pf_enable_altq(struct pf_altq *);
 int			 pf_disable_altq(struct pf_altq *);
 #endif /* ALTQ */
 int			 pf_begin_rules(u_int32_t *, int, const char *);
 int			 pf_rollback_rules(u_int32_t, int, char *);
 int			 pf_setup_pfsync_matching(struct pf_ruleset *);
 void			 pf_hash_rule(MD5_CTX *, struct pf_rule *);
 void			 pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
 int			 pf_commit_rules(u_int32_t, int, char *);
 
 struct pf_rule		 pf_default_rule;
 #ifdef __FreeBSD__
 struct sx		 pf_consistency_lock;
 SX_SYSINIT(pf_consistency_lock, &pf_consistency_lock, "pf_statetbl_lock");
 #else
 struct rwlock		 pf_consistency_lock = RWLOCK_INITIALIZER;
 #endif
 #ifdef ALTQ
 static int		 pf_altq_running;
 #endif
 
 #define	TAGID_MAX	 50000
 TAILQ_HEAD(pf_tags, pf_tagname)	pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags),
 				pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
 
 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
 #endif
 u_int16_t		 tagname2tag(struct pf_tags *, char *);
 void			 tag2tagname(struct pf_tags *, u_int16_t, char *);
 void			 tag_unref(struct pf_tags *, u_int16_t);
 int			 pf_rtlabel_add(struct pf_addr_wrap *);
 void			 pf_rtlabel_remove(struct pf_addr_wrap *);
 void			 pf_rtlabel_copyout(struct pf_addr_wrap *);
 
 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x
 
 
 #ifdef __FreeBSD__
 static struct cdev	*pf_dev;
 
 /*
  * XXX - These are new and need to be checked when moveing to a new version
  */
 static void		 pf_clear_states(void);
 static int		 pf_clear_tables(void);
 static void		 pf_clear_srcnodes(void);
 /*
  * XXX - These are new and need to be checked when moveing to a new version
  */
  
 /*
  * Wrapper functions for pfil(9) hooks
  */
 static int pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp,
 		int dir, struct inpcb *inp);
 static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp,
 		int dir, struct inpcb *inp);
 #ifdef INET6
 static int pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp,
 		int dir, struct inpcb *inp);
 static int pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp,
 		int dir, struct inpcb *inp);
 #endif
 
 static int 		 hook_pf(void);
 static int 		 dehook_pf(void);
 static int 		 shutdown_pf(void);
 static int 		 pf_load(void);
 static int 		 pf_unload(void);
 
 static struct cdevsw pf_cdevsw = {
 	.d_ioctl =	pfioctl,
 	.d_name =	PF_NAME,
 	.d_version =	D_VERSION,
 };
 
 static volatile int pf_pfil_hooked = 0;
 int pf_end_threads = 0;
 struct mtx pf_task_mtx;
 pflog_packet_t *pflog_packet_ptr = NULL;
 
 int debug_pfugidhack = 0;
 SYSCTL_INT(_debug, OID_AUTO, pfugidhack, CTLFLAG_RW, &debug_pfugidhack, 0,
     "Enable/disable pf user/group rules mpsafe hack");
 
 void
 init_pf_mutex(void)
 {
 	mtx_init(&pf_task_mtx, "pf task mtx", NULL, MTX_DEF);
 }
 
 void
 destroy_pf_mutex(void)
 {
 	mtx_destroy(&pf_task_mtx);
 }
 
 void
 init_zone_var(void)
 {
 	pf_src_tree_pl = pf_rule_pl = NULL;
 	pf_state_pl = pf_altq_pl = pf_pooladdr_pl = NULL;
 	pf_frent_pl = pf_frag_pl = pf_cache_pl = pf_cent_pl = NULL;
 	pf_state_scrub_pl = NULL;
 	pfr_ktable_pl = pfr_kentry_pl = NULL;
 }
 
 void
 cleanup_pf_zone(void)
 {
 	UMA_DESTROY(pf_src_tree_pl);
 	UMA_DESTROY(pf_rule_pl);
 	UMA_DESTROY(pf_state_pl);
 	UMA_DESTROY(pf_altq_pl);
 	UMA_DESTROY(pf_pooladdr_pl);
 	UMA_DESTROY(pf_frent_pl);
 	UMA_DESTROY(pf_frag_pl);
 	UMA_DESTROY(pf_cache_pl);
 	UMA_DESTROY(pf_cent_pl);
 	UMA_DESTROY(pfr_ktable_pl);
 	UMA_DESTROY(pfr_kentry_pl2);
 	UMA_DESTROY(pfr_kentry_pl);
 	UMA_DESTROY(pf_state_scrub_pl);
 	UMA_DESTROY(pfi_addr_pl);
 }
 
 int
 pfattach(void)
 {
 	u_int32_t *my_timeout = pf_default_rule.timeout;
 	int error = 1;
 
 	do {
 		UMA_CREATE(pf_src_tree_pl,struct pf_src_node, "pfsrctrpl");
 		UMA_CREATE(pf_rule_pl,	  struct pf_rule, "pfrulepl");
 		UMA_CREATE(pf_state_pl,	  struct pf_state, "pfstatepl");
 		UMA_CREATE(pf_altq_pl,	  struct pf_altq, "pfaltqpl");
 		UMA_CREATE(pf_pooladdr_pl, struct pf_pooladdr, "pfpooladdrpl");
 		UMA_CREATE(pfr_ktable_pl,  struct pfr_ktable, "pfrktable");
 		UMA_CREATE(pfr_kentry_pl,  struct pfr_kentry, "pfrkentry");
 		UMA_CREATE(pfr_kentry_pl2,  struct pfr_kentry, "pfrkentry2");
 		UMA_CREATE(pf_frent_pl,	  struct pf_frent, "pffrent");
 		UMA_CREATE(pf_frag_pl,	  struct pf_fragment, "pffrag");
 		UMA_CREATE(pf_cache_pl,	  struct pf_fragment, "pffrcache");
 		UMA_CREATE(pf_cent_pl,	  struct pf_frcache, "pffrcent");
 		UMA_CREATE(pf_state_scrub_pl, struct pf_state_scrub, 
 		    "pfstatescrub");
 		UMA_CREATE(pfi_addr_pl, struct pfi_dynaddr, "pfiaddrpl");
 		error = 0;
 	} while(0);
 	if (error) {
 		cleanup_pf_zone();
 		return (error);
 	}
 	pfr_initialize();
 	pfi_initialize();
 	if ( (error = pf_osfp_initialize()) ) {
 		cleanup_pf_zone();
 		pf_osfp_cleanup();
 		return (error);
 	}
 
 	pf_pool_limits[PF_LIMIT_STATES].pp = pf_state_pl;
 	pf_pool_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
 	pf_pool_limits[PF_LIMIT_SRC_NODES].pp = pf_src_tree_pl;
 	pf_pool_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
 	pf_pool_limits[PF_LIMIT_FRAGS].pp = pf_frent_pl;
 	pf_pool_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT;
 	pf_pool_limits[PF_LIMIT_TABLES].pp = pfr_ktable_pl;
 	pf_pool_limits[PF_LIMIT_TABLES].limit = PFR_KTABLE_HIWAT;
 	pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].pp = pfr_kentry_pl;
 	pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT;
 	uma_zone_set_max(pf_pool_limits[PF_LIMIT_STATES].pp,
 		pf_pool_limits[PF_LIMIT_STATES].limit);
 
 	RB_INIT(&tree_src_tracking);
 	RB_INIT(&pf_anchors);
 	pf_init_ruleset(&pf_main_ruleset);
 	TAILQ_INIT(&pf_altqs[0]);
 	TAILQ_INIT(&pf_altqs[1]);
 	TAILQ_INIT(&pf_pabuf);
 	pf_altqs_active = &pf_altqs[0];
 	pf_altqs_inactive = &pf_altqs[1];
 	TAILQ_INIT(&state_list);
 
 	/* default rule should never be garbage collected */
 	pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next;
 	pf_default_rule.action = PF_PASS;
 	pf_default_rule.nr = -1;
 	pf_default_rule.rtableid = -1;
 
 	/* initialize default timeouts */
 	my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
 	my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
 	my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
 	my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
 	my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
 	my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
 	my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
 	my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
 	my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
 	my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
 	my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
 	my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
 	my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
 	my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
 	my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
 	my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
 	my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
 	my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
 	my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
 	my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
 
 	pf_normalize_init();
 	bzero(&pf_status, sizeof(pf_status));
 	pf_status.debug = PF_DEBUG_URGENT;
 
 	pf_pfil_hooked = 0;
 
 	/* XXX do our best to avoid a conflict */
 	pf_status.hostid = arc4random();
 
 	if (kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pfpurge"))
 		return (ENXIO);
 
 	return (error);
 }
 #else /* !__FreeBSD__ */
 void
 pfattach(int num)
 {
 	u_int32_t *timeout = pf_default_rule.timeout;
 
 	pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, 0, 0, "pfrulepl",
 	    &pool_allocator_nointr);
 	pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0, 0, 0,
 	    "pfsrctrpl", NULL);
 	pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl",
 	    NULL);
 	pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl",
 	    &pool_allocator_nointr);
 	pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0,
 	    "pfpooladdrpl", &pool_allocator_nointr);
 	pfr_initialize();
 	pfi_initialize();
 	pf_osfp_initialize();
 
 	pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp,
 	    pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0);
 
 	if (ctob(physmem) <= 100*1024*1024)
 		pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit =
 		    PFR_KENTRY_HIWAT_SMALL;
 
 	RB_INIT(&tree_src_tracking);
 	RB_INIT(&pf_anchors);
 	pf_init_ruleset(&pf_main_ruleset);
 	TAILQ_INIT(&pf_altqs[0]);
 	TAILQ_INIT(&pf_altqs[1]);
 	TAILQ_INIT(&pf_pabuf);
 	pf_altqs_active = &pf_altqs[0];
 	pf_altqs_inactive = &pf_altqs[1];
 	TAILQ_INIT(&state_list);
 
 	/* default rule should never be garbage collected */
 	pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next;
 	pf_default_rule.action = PF_PASS;
 	pf_default_rule.nr = -1;
 	pf_default_rule.rtableid = -1;
 
 	/* initialize default timeouts */
 	timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
 	timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
 	timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
 	timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
 	timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
 	timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
 	timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
 	timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
 	timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
 	timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
 	timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
 	timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
 	timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
 	timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
 	timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
 	timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
 	timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
 	timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
 	timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
 	timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
 
 	pf_normalize_init();
 	bzero(&pf_status, sizeof(pf_status));
 	pf_status.debug = PF_DEBUG_URGENT;
 
 	/* XXX do our best to avoid a conflict */
 	pf_status.hostid = arc4random();
 
 	/* require process context to purge states, so perform in a thread */
 	kproc_create_deferred(pf_thread_create, NULL);
 }
 
 void
 pf_thread_create(void *v)
 {
 	if (kproc_create(pf_purge_thread, NULL, NULL, "pfpurge"))
 		panic("pfpurge thread");
 }
 
 int
 pfopen(struct cdev *dev, int flags, int fmt, struct proc *p)
 {
 	if (minor(dev) >= 1)
 		return (ENXIO);
 	return (0);
 }
 
 int
 pfclose(struct cdev *dev, int flags, int fmt, struct proc *p)
 {
 	if (minor(dev) >= 1)
 		return (ENXIO);
 	return (0);
 }
 #endif /* __FreeBSD__ */
 
 struct pf_pool *
 pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action,
     u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
     u_int8_t check_ticket)
 {
 	struct pf_ruleset	*ruleset;
 	struct pf_rule		*rule;
 	int			 rs_num;
 
 	ruleset = pf_find_ruleset(anchor);
 	if (ruleset == NULL)
 		return (NULL);
 	rs_num = pf_get_ruleset_number(rule_action);
 	if (rs_num >= PF_RULESET_MAX)
 		return (NULL);
 	if (active) {
 		if (check_ticket && ticket !=
 		    ruleset->rules[rs_num].active.ticket)
 			return (NULL);
 		if (r_last)
 			rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
 			    pf_rulequeue);
 		else
 			rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
 	} else {
 		if (check_ticket && ticket !=
 		    ruleset->rules[rs_num].inactive.ticket)
 			return (NULL);
 		if (r_last)
 			rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
 			    pf_rulequeue);
 		else
 			rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
 	}
 	if (!r_last) {
 		while ((rule != NULL) && (rule->nr != rule_number))
 			rule = TAILQ_NEXT(rule, entries);
 	}
 	if (rule == NULL)
 		return (NULL);
 
 	return (&rule->rpool);
 }
 
 void
 pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb)
 {
 	struct pf_pooladdr	*mv_pool_pa;
 
 	while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
 		TAILQ_REMOVE(poola, mv_pool_pa, entries);
 		TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
 	}
 }
 
 void
 pf_empty_pool(struct pf_palist *poola)
 {
 	struct pf_pooladdr	*empty_pool_pa;
 
 	while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) {
 		pfi_dynaddr_remove(&empty_pool_pa->addr);
 		pf_tbladdr_remove(&empty_pool_pa->addr);
 		pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE);
 		TAILQ_REMOVE(poola, empty_pool_pa, entries);
 		pool_put(&pf_pooladdr_pl, empty_pool_pa);
 	}
 }
 
 void
 pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule)
 {
 	if (rulequeue != NULL) {
 		if (rule->states <= 0) {
 			/*
 			 * XXX - we need to remove the table *before* detaching
 			 * the rule to make sure the table code does not delete
 			 * the anchor under our feet.
 			 */
 			pf_tbladdr_remove(&rule->src.addr);
 			pf_tbladdr_remove(&rule->dst.addr);
 			if (rule->overload_tbl)
 				pfr_detach_table(rule->overload_tbl);
 		}
 		TAILQ_REMOVE(rulequeue, rule, entries);
 		rule->entries.tqe_prev = NULL;
 		rule->nr = -1;
 	}
 
 	if (rule->states > 0 || rule->src_nodes > 0 ||
 	    rule->entries.tqe_prev != NULL)
 		return;
 	pf_tag_unref(rule->tag);
 	pf_tag_unref(rule->match_tag);
 #ifdef ALTQ
 	if (rule->pqid != rule->qid)
 		pf_qid_unref(rule->pqid);
 	pf_qid_unref(rule->qid);
 #endif
 	pf_rtlabel_remove(&rule->src.addr);
 	pf_rtlabel_remove(&rule->dst.addr);
 	pfi_dynaddr_remove(&rule->src.addr);
 	pfi_dynaddr_remove(&rule->dst.addr);
 	if (rulequeue == NULL) {
 		pf_tbladdr_remove(&rule->src.addr);
 		pf_tbladdr_remove(&rule->dst.addr);
 		if (rule->overload_tbl)
 			pfr_detach_table(rule->overload_tbl);
 	}
 	pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE);
 	pf_anchor_remove(rule);
 	pf_empty_pool(&rule->rpool.list);
 	pool_put(&pf_rule_pl, rule);
 }
 
 u_int16_t
 tagname2tag(struct pf_tags *head, char *tagname)
 {
 	struct pf_tagname	*tag, *p = NULL;
 	u_int16_t		 new_tagid = 1;
 
 	TAILQ_FOREACH(tag, head, entries)
 		if (strcmp(tagname, tag->name) == 0) {
 			tag->ref++;
 			return (tag->tag);
 		}
 
 	/*
 	 * to avoid fragmentation, we do a linear search from the beginning
 	 * and take the first free slot we find. if there is none or the list
 	 * is empty, append a new entry at the end.
 	 */
 
 	/* new entry */
 	if (!TAILQ_EMPTY(head))
 		for (p = TAILQ_FIRST(head); p != NULL &&
 		    p->tag == new_tagid; p = TAILQ_NEXT(p, entries))
 			new_tagid = p->tag + 1;
 
 	if (new_tagid > TAGID_MAX)
 		return (0);
 
 	/* allocate and fill new struct pf_tagname */
 	tag = (struct pf_tagname *)malloc(sizeof(struct pf_tagname),
 	    M_TEMP, M_NOWAIT);
 	if (tag == NULL)
 		return (0);
 	bzero(tag, sizeof(struct pf_tagname));
 	strlcpy(tag->name, tagname, sizeof(tag->name));
 	tag->tag = new_tagid;
 	tag->ref++;
 
 	if (p != NULL)	/* insert new entry before p */
 		TAILQ_INSERT_BEFORE(p, tag, entries);
 	else	/* either list empty or no free slot in between */
 		TAILQ_INSERT_TAIL(head, tag, entries);
 
 	return (tag->tag);
 }
 
 void
 tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p)
 {
 	struct pf_tagname	*tag;
 
 	TAILQ_FOREACH(tag, head, entries)
 		if (tag->tag == tagid) {
 			strlcpy(p, tag->name, PF_TAG_NAME_SIZE);
 			return;
 		}
 }
 
 void
 tag_unref(struct pf_tags *head, u_int16_t tag)
 {
 	struct pf_tagname	*p, *next;
 
 	if (tag == 0)
 		return;
 
 	for (p = TAILQ_FIRST(head); p != NULL; p = next) {
 		next = TAILQ_NEXT(p, entries);
 		if (tag == p->tag) {
 			if (--p->ref == 0) {
 				TAILQ_REMOVE(head, p, entries);
 				free(p, M_TEMP);
 			}
 			break;
 		}
 	}
 }
 
 u_int16_t
 pf_tagname2tag(char *tagname)
 {
 	return (tagname2tag(&pf_tags, tagname));
 }
 
 void
 pf_tag2tagname(u_int16_t tagid, char *p)
 {
 	tag2tagname(&pf_tags, tagid, p);
 }
 
 void
 pf_tag_ref(u_int16_t tag)
 {
 	struct pf_tagname *t;
 
 	TAILQ_FOREACH(t, &pf_tags, entries)
 		if (t->tag == tag)
 			break;
 	if (t != NULL)
 		t->ref++;
 }
 
 void
 pf_tag_unref(u_int16_t tag)
 {
 	tag_unref(&pf_tags, tag);
 }
 
 int
 pf_rtlabel_add(struct pf_addr_wrap *a)
 {
 #ifdef __FreeBSD__
 	/* XXX_IMPORT: later */
 	return (0);
 #else
 	if (a->type == PF_ADDR_RTLABEL &&
 	    (a->v.rtlabel = rtlabel_name2id(a->v.rtlabelname)) == 0)
 		return (-1);
 	return (0);
 #endif
 }
 
 void
 pf_rtlabel_remove(struct pf_addr_wrap *a)
 {
 #ifdef __FreeBSD__
 	/* XXX_IMPORT: later */
 #else
 	if (a->type == PF_ADDR_RTLABEL)
 		rtlabel_unref(a->v.rtlabel);
 #endif
 }
 
 void
 pf_rtlabel_copyout(struct pf_addr_wrap *a)
 {
 #ifdef __FreeBSD__
 	/* XXX_IMPORT: later */
 	if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel)
 		strlcpy(a->v.rtlabelname, "?", sizeof(a->v.rtlabelname));
 #else
 	const char	*name;
 
 	if (a->type == PF_ADDR_RTLABEL && a->v.rtlabel) {
 		if ((name = rtlabel_id2name(a->v.rtlabel)) == NULL)
 			strlcpy(a->v.rtlabelname, "?",
 			    sizeof(a->v.rtlabelname));
 		else
 			strlcpy(a->v.rtlabelname, name,
 			    sizeof(a->v.rtlabelname));
 	}
 #endif
 }
 
 #ifdef ALTQ
 u_int32_t
 pf_qname2qid(char *qname)
 {
 	return ((u_int32_t)tagname2tag(&pf_qids, qname));
 }
 
 void
 pf_qid2qname(u_int32_t qid, char *p)
 {
 	tag2tagname(&pf_qids, (u_int16_t)qid, p);
 }
 
 void
 pf_qid_unref(u_int32_t qid)
 {
 	tag_unref(&pf_qids, (u_int16_t)qid);
 }
 
 int
 pf_begin_altq(u_int32_t *ticket)
 {
 	struct pf_altq	*altq;
 	int		 error = 0;
 
 	/* Purge the old altq list */
 	while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
 		TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
 #ifdef __FreeBSD__
 		if (altq->qname[0] == 0 &&
 		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
 #else
 		if (altq->qname[0] == 0) {
 #endif
 			/* detach and destroy the discipline */
 			error = altq_remove(altq);
 		} else
 			pf_qid_unref(altq->qid);
 		pool_put(&pf_altq_pl, altq);
 	}
 	if (error)
 		return (error);
 	*ticket = ++ticket_altqs_inactive;
 	altqs_inactive_open = 1;
 	return (0);
 }
 
 int
 pf_rollback_altq(u_int32_t ticket)
 {
 	struct pf_altq	*altq;
 	int		 error = 0;
 
 	if (!altqs_inactive_open || ticket != ticket_altqs_inactive)
 		return (0);
 	/* Purge the old altq list */
 	while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
 		TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
 #ifdef __FreeBSD__
 		if (altq->qname[0] == 0 &&
 		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
 #else
 		if (altq->qname[0] == 0) {
 #endif
 			/* detach and destroy the discipline */
 			error = altq_remove(altq);
 		} else
 			pf_qid_unref(altq->qid);
 		pool_put(&pf_altq_pl, altq);
 	}
 	altqs_inactive_open = 0;
 	return (error);
 }
 
 int
 pf_commit_altq(u_int32_t ticket)
 {
 	struct pf_altqqueue	*old_altqs;
 	struct pf_altq		*altq;
 	int			 s, err, error = 0;
 
 	if (!altqs_inactive_open || ticket != ticket_altqs_inactive)
 		return (EBUSY);
 
 	/* swap altqs, keep the old. */
 	s = splsoftnet();
 	old_altqs = pf_altqs_active;
 	pf_altqs_active = pf_altqs_inactive;
 	pf_altqs_inactive = old_altqs;
 	ticket_altqs_active = ticket_altqs_inactive;
 
 	/* Attach new disciplines */
 	TAILQ_FOREACH(altq, pf_altqs_active, entries) {
 #ifdef __FreeBSD__
 		if (altq->qname[0] == 0 &&
 		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
 #else
 		if (altq->qname[0] == 0) {
 #endif
 			/* attach the discipline */
 			error = altq_pfattach(altq);
 			if (error == 0 && pf_altq_running)
 				error = pf_enable_altq(altq);
 			if (error != 0) {
 				splx(s);
 				return (error);
 			}
 		}
 	}
 
 	/* Purge the old altq list */
 	while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) {
 		TAILQ_REMOVE(pf_altqs_inactive, altq, entries);
 #ifdef __FreeBSD__
 		if (altq->qname[0] == 0 &&
 		    (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
 #else
 		if (altq->qname[0] == 0) {
 #endif
 			/* detach and destroy the discipline */
 			if (pf_altq_running)
 				error = pf_disable_altq(altq);
 			err = altq_pfdetach(altq);
 			if (err != 0 && error == 0)
 				error = err;
 			err = altq_remove(altq);
 			if (err != 0 && error == 0)
 				error = err;
 		} else
 			pf_qid_unref(altq->qid);
 		pool_put(&pf_altq_pl, altq);
 	}
 	splx(s);
 
 	altqs_inactive_open = 0;
 	return (error);
 }
 
 int
 pf_enable_altq(struct pf_altq *altq)
 {
 	struct ifnet		*ifp;
 	struct tb_profile	 tb;
 	int			 s, error = 0;
 
 	if ((ifp = ifunit(altq->ifname)) == NULL)
 		return (EINVAL);
 
 	if (ifp->if_snd.altq_type != ALTQT_NONE)
 		error = altq_enable(&ifp->if_snd);
 
 	/* set tokenbucket regulator */
 	if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
 		tb.rate = altq->ifbandwidth;
 		tb.depth = altq->tbrsize;
 		s = splnet();
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		error = tbr_set(&ifp->if_snd, &tb);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 		splx(s);
 	}
 
 	return (error);
 }
 
 int
 pf_disable_altq(struct pf_altq *altq)
 {
 	struct ifnet		*ifp;
 	struct tb_profile	 tb;
 	int			 s, error;
 
 	if ((ifp = ifunit(altq->ifname)) == NULL)
 		return (EINVAL);
 
 	/*
 	 * when the discipline is no longer referenced, it was overridden
 	 * by a new one.  if so, just return.
 	 */
 	if (altq->altq_disc != ifp->if_snd.altq_disc)
 		return (0);
 
 	error = altq_disable(&ifp->if_snd);
 
 	if (error == 0) {
 		/* clear tokenbucket regulator */
 		tb.rate = 0;
 		s = splnet();
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		error = tbr_set(&ifp->if_snd, &tb);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 		splx(s);
 	}
 
 	return (error);
 }
 
 #ifdef __FreeBSD__
 void
 pf_altq_ifnet_event(struct ifnet *ifp, int remove)
 {
 	struct ifnet		*ifp1;
 	struct pf_altq		*a1, *a2, *a3;
 	u_int32_t		 ticket;
 	int			 error = 0;
 
 	/* Interrupt userland queue modifications */
 	if (altqs_inactive_open)
 		pf_rollback_altq(ticket_altqs_inactive);
 
 	/* Start new altq ruleset */
 	if (pf_begin_altq(&ticket))
 		return;
 
 	/* Copy the current active set */
 	TAILQ_FOREACH(a1, pf_altqs_active, entries) {
 		a2 = pool_get(&pf_altq_pl, PR_NOWAIT);
 		if (a2 == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		bcopy(a1, a2, sizeof(struct pf_altq));
 
 		if (a2->qname[0] != 0) {
 			if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
 				error = EBUSY;
 				pool_put(&pf_altq_pl, a2);
 				break;
 			}
 			a2->altq_disc = NULL;
 			TAILQ_FOREACH(a3, pf_altqs_inactive, entries) {
 				if (strncmp(a3->ifname, a2->ifname,
 				    IFNAMSIZ) == 0 && a3->qname[0] == 0) {
 					a2->altq_disc = a3->altq_disc;
 					break;
 				}
 			}
 		}
 		/* Deactivate the interface in question */
 		a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
 		if ((ifp1 = ifunit(a2->ifname)) == NULL ||
 		    (remove && ifp1 == ifp)) {
 			a2->local_flags |= PFALTQ_FLAG_IF_REMOVED;
 		} else {
 			PF_UNLOCK();
 			error = altq_add(a2);
 			PF_LOCK();
 
 			if (ticket != ticket_altqs_inactive)
 				error = EBUSY;
 
 			if (error) {
 				pool_put(&pf_altq_pl, a2);
 				break;
 			}
 		}
 
 		TAILQ_INSERT_TAIL(pf_altqs_inactive, a2, entries);
 	}
 
 	if (error != 0)
 		pf_rollback_altq(ticket);
 	else
 		pf_commit_altq(ticket);
 }
 #endif
 #endif /* ALTQ */
 
 int
 pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
 {
 	struct pf_ruleset	*rs;
 	struct pf_rule		*rule;
 
 	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
 		return (EINVAL);
 	rs = pf_find_or_create_ruleset(anchor);
 	if (rs == NULL)
 		return (EINVAL);
 	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
 		pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule);
 		rs->rules[rs_num].inactive.rcount--;
 	}
 	*ticket = ++rs->rules[rs_num].inactive.ticket;
 	rs->rules[rs_num].inactive.open = 1;
 	return (0);
 }
 
 int
 pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
 {
 	struct pf_ruleset	*rs;
 	struct pf_rule		*rule;
 
 	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
 		return (EINVAL);
 	rs = pf_find_ruleset(anchor);
 	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
 	    rs->rules[rs_num].inactive.ticket != ticket)
 		return (0);
 	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
 		pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule);
 		rs->rules[rs_num].inactive.rcount--;
 	}
 	rs->rules[rs_num].inactive.open = 0;
 	return (0);
 }
 
 #define PF_MD5_UPD(st, elm)						\
 		MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
 
 #define PF_MD5_UPD_STR(st, elm)						\
 		MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
 
 #define PF_MD5_UPD_HTONL(st, elm, stor) do {				\
 		(stor) = htonl((st)->elm);				\
 		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
 } while (0)
 
 #define PF_MD5_UPD_HTONS(st, elm, stor) do {				\
 		(stor) = htons((st)->elm);				\
 		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
 } while (0)
 
 void
 pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
 {
 	PF_MD5_UPD(pfr, addr.type);
 	switch (pfr->addr.type) {
 		case PF_ADDR_DYNIFTL:
 			PF_MD5_UPD(pfr, addr.v.ifname);
 			PF_MD5_UPD(pfr, addr.iflags);
 			break;
 		case PF_ADDR_TABLE:
 			PF_MD5_UPD(pfr, addr.v.tblname);
 			break;
 		case PF_ADDR_ADDRMASK:
 			/* XXX ignore af? */
 			PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
 			PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
 			break;
 		case PF_ADDR_RTLABEL:
 			PF_MD5_UPD(pfr, addr.v.rtlabelname);
 			break;
 	}
 
 	PF_MD5_UPD(pfr, port[0]);
 	PF_MD5_UPD(pfr, port[1]);
 	PF_MD5_UPD(pfr, neg);
 	PF_MD5_UPD(pfr, port_op);
 }
 
 void
 pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
 {
 	u_int16_t x;
 	u_int32_t y;
 
 	pf_hash_rule_addr(ctx, &rule->src);
 	pf_hash_rule_addr(ctx, &rule->dst);
 	PF_MD5_UPD_STR(rule, label);
 	PF_MD5_UPD_STR(rule, ifname);
 	PF_MD5_UPD_STR(rule, match_tagname);
 	PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
 	PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
 	PF_MD5_UPD_HTONL(rule, prob, y);
 	PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
 	PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
 	PF_MD5_UPD(rule, uid.op);
 	PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
 	PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
 	PF_MD5_UPD(rule, gid.op);
 	PF_MD5_UPD_HTONL(rule, rule_flag, y);
 	PF_MD5_UPD(rule, action);
 	PF_MD5_UPD(rule, direction);
 	PF_MD5_UPD(rule, af);
 	PF_MD5_UPD(rule, quick);
 	PF_MD5_UPD(rule, ifnot);
 	PF_MD5_UPD(rule, match_tag_not);
 	PF_MD5_UPD(rule, natpass);
 	PF_MD5_UPD(rule, keep_state);
 	PF_MD5_UPD(rule, proto);
 	PF_MD5_UPD(rule, type);
 	PF_MD5_UPD(rule, code);
 	PF_MD5_UPD(rule, flags);
 	PF_MD5_UPD(rule, flagset);
 	PF_MD5_UPD(rule, allow_opts);
 	PF_MD5_UPD(rule, rt);
 	PF_MD5_UPD(rule, tos);
 }
 
 int
 pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
 {
 	struct pf_ruleset	*rs;
 	struct pf_rule		*rule, **old_array;
 	struct pf_rulequeue	*old_rules;
 	int			 s, error;
 	u_int32_t		 old_rcount;
 
 	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
 		return (EINVAL);
 	rs = pf_find_ruleset(anchor);
 	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
 	    ticket != rs->rules[rs_num].inactive.ticket)
 		return (EBUSY);
 
 	/* Calculate checksum for the main ruleset */
 	if (rs == &pf_main_ruleset) {
 		error = pf_setup_pfsync_matching(rs);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Swap rules, keep the old. */
 	s = splsoftnet();
 	old_rules = rs->rules[rs_num].active.ptr;
 	old_rcount = rs->rules[rs_num].active.rcount;
 	old_array = rs->rules[rs_num].active.ptr_array;
 
 	rs->rules[rs_num].active.ptr =
 	    rs->rules[rs_num].inactive.ptr;
 	rs->rules[rs_num].active.ptr_array =
 	    rs->rules[rs_num].inactive.ptr_array;
 	rs->rules[rs_num].active.rcount =
 	    rs->rules[rs_num].inactive.rcount;
 	rs->rules[rs_num].inactive.ptr = old_rules;
 	rs->rules[rs_num].inactive.ptr_array = old_array;
 	rs->rules[rs_num].inactive.rcount = old_rcount;
 
 	rs->rules[rs_num].active.ticket =
 	    rs->rules[rs_num].inactive.ticket;
 	pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
 
 
 	/* Purge the old rule list. */
 	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
 		pf_rm_rule(old_rules, rule);
 	if (rs->rules[rs_num].inactive.ptr_array)
 		free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
 	rs->rules[rs_num].inactive.ptr_array = NULL;
 	rs->rules[rs_num].inactive.rcount = 0;
 	rs->rules[rs_num].inactive.open = 0;
 	pf_remove_if_empty_ruleset(rs);
 	splx(s);
 	return (0);
 }
 
 int
 pf_setup_pfsync_matching(struct pf_ruleset *rs)
 {
 	MD5_CTX			 ctx;
 	struct pf_rule		*rule;
 	int			 rs_cnt;
 	u_int8_t		 digest[PF_MD5_DIGEST_LENGTH];
 
 	MD5Init(&ctx);
 	for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
 		/* XXX PF_RULESET_SCRUB as well? */
 		if (rs_cnt == PF_RULESET_SCRUB)
 			continue;
 
 		if (rs->rules[rs_cnt].inactive.ptr_array)
 			free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
 		rs->rules[rs_cnt].inactive.ptr_array = NULL;
 
 		if (rs->rules[rs_cnt].inactive.rcount) {
 			rs->rules[rs_cnt].inactive.ptr_array =
 			    malloc(sizeof(caddr_t) *
 			    rs->rules[rs_cnt].inactive.rcount,
 			    M_TEMP, M_NOWAIT);
 
 			if (!rs->rules[rs_cnt].inactive.ptr_array)
 				return (ENOMEM);
 		}
 
 		TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
 		    entries) {
 			pf_hash_rule(&ctx, rule);
 			(rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
 		}
 	}
 
 	MD5Final(digest, &ctx);
 	memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum));
 	return (0);
 }
 
 int
 #ifdef __FreeBSD__
 pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
 #else
 pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 #endif
 {
 	struct pf_pooladdr	*pa = NULL;
 	struct pf_pool		*pool = NULL;
 #ifndef __FreeBSD__
 	int			 s;
 #endif
 	int			 error = 0;
 
 	/* XXX keep in sync with switch() below */
 #ifdef __FreeBSD__
 	if (securelevel_gt(td->td_ucred, 2))
 #else
 	if (securelevel > 1)
 #endif
 		switch (cmd) {
 		case DIOCGETRULES:
 		case DIOCGETRULE:
 		case DIOCGETADDRS:
 		case DIOCGETADDR:
 		case DIOCGETSTATE:
 		case DIOCSETSTATUSIF:
 		case DIOCGETSTATUS:
 		case DIOCCLRSTATUS:
 		case DIOCNATLOOK:
 		case DIOCSETDEBUG:
 		case DIOCGETSTATES:
 		case DIOCGETTIMEOUT:
 		case DIOCCLRRULECTRS:
 		case DIOCGETLIMIT:
 		case DIOCGETALTQS:
 		case DIOCGETALTQ:
 		case DIOCGETQSTATS:
 		case DIOCGETRULESETS:
 		case DIOCGETRULESET:
 		case DIOCRGETTABLES:
 		case DIOCRGETTSTATS:
 		case DIOCRCLRTSTATS:
 		case DIOCRCLRADDRS:
 		case DIOCRADDADDRS:
 		case DIOCRDELADDRS:
 		case DIOCRSETADDRS:
 		case DIOCRGETADDRS:
 		case DIOCRGETASTATS:
 		case DIOCRCLRASTATS:
 		case DIOCRTSTADDRS:
 		case DIOCOSFPGET:
 		case DIOCGETSRCNODES:
 		case DIOCCLRSRCNODES:
 		case DIOCIGETIFACES:
 #ifdef __FreeBSD__
 		case DIOCGIFSPEED:
 #endif
 		case DIOCSETIFFLAG:
 		case DIOCCLRIFFLAG:
 			break;
 		case DIOCRCLRTABLES:
 		case DIOCRADDTABLES:
 		case DIOCRDELTABLES:
 		case DIOCRSETTFLAGS:
 			if (((struct pfioc_table *)addr)->pfrio_flags &
 			    PFR_FLAG_DUMMY)
 				break; /* dummy operation ok */
 			return (EPERM);
 		default:
 			return (EPERM);
 		}
 
 	if (!(flags & FWRITE))
 		switch (cmd) {
 		case DIOCGETRULES:
 		case DIOCGETADDRS:
 		case DIOCGETADDR:
 		case DIOCGETSTATE:
 		case DIOCGETSTATUS:
 		case DIOCGETSTATES:
 		case DIOCGETTIMEOUT:
 		case DIOCGETLIMIT:
 		case DIOCGETALTQS:
 		case DIOCGETALTQ:
 		case DIOCGETQSTATS:
 		case DIOCGETRULESETS:
 		case DIOCGETRULESET:
 		case DIOCNATLOOK:
 		case DIOCRGETTABLES:
 		case DIOCRGETTSTATS:
 		case DIOCRGETADDRS:
 		case DIOCRGETASTATS:
 		case DIOCRTSTADDRS:
 		case DIOCOSFPGET:
 		case DIOCGETSRCNODES:
 		case DIOCIGETIFACES:
 #ifdef __FreeBSD__
 		case DIOCGIFSPEED:
 #endif
 			break;
 		case DIOCRCLRTABLES:
 		case DIOCRADDTABLES:
 		case DIOCRDELTABLES:
 		case DIOCRCLRTSTATS:
 		case DIOCRCLRADDRS:
 		case DIOCRADDADDRS:
 		case DIOCRDELADDRS:
 		case DIOCRSETADDRS:
 		case DIOCRSETTFLAGS:
 			if (((struct pfioc_table *)addr)->pfrio_flags &
 			    PFR_FLAG_DUMMY) {
 				flags |= FWRITE; /* need write lock for dummy */
 				break; /* dummy operation ok */
 			}
 			return (EACCES);
 		case DIOCGETRULE:
 			if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR)
 				return (EACCES);
 			break;
 		default:
 			return (EACCES);
 		}
 
 	if (flags & FWRITE)
 #ifdef __FreeBSD__
 		sx_xlock(&pf_consistency_lock);
 	else
 		sx_slock(&pf_consistency_lock);
 #else
 		rw_enter_write(&pf_consistency_lock);
 	else
 		rw_enter_read(&pf_consistency_lock);
 #endif
 
 #ifdef __FreeBSD__
 	PF_LOCK();
 #else
 	s = splsoftnet();
 #endif
 	switch (cmd) {
 
 	case DIOCSTART:
 		if (pf_status.running)
 			error = EEXIST;
 		else {
 #ifdef __FreeBSD__
 			PF_UNLOCK();
 			error = hook_pf();
 			PF_LOCK();
 			if (error) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: pfil registeration fail\n"));
 				break;
 			}
 #endif
 			pf_status.running = 1;
 			pf_status.since = time_second;
 			if (pf_status.stateid == 0) {
 				pf_status.stateid = time_second;
 				pf_status.stateid = pf_status.stateid << 32;
 			}
 			DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
 		}
 		break;
 
 	case DIOCSTOP:
 		if (!pf_status.running)
 			error = ENOENT;
 		else {
 			pf_status.running = 0;
 #ifdef __FreeBSD__
 			PF_UNLOCK();
 			error = dehook_pf();
 			PF_LOCK();
 			if (error) {
 				pf_status.running = 1;
 				DPFPRINTF(PF_DEBUG_MISC,
 					("pf: pfil unregisteration failed\n"));
 			}
 #endif
 			pf_status.since = time_second;
 			DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
 		}
 		break;
 
 	case DIOCADDRULE: {
 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
 		struct pf_ruleset	*ruleset;
 		struct pf_rule		*rule, *tail;
 		struct pf_pooladdr	*pa;
 		int			 rs_num;
 
 		pr->anchor[sizeof(pr->anchor) - 1] = 0;
 		ruleset = pf_find_ruleset(pr->anchor);
 		if (ruleset == NULL) {
 			error = EINVAL;
 			break;
 		}
 		rs_num = pf_get_ruleset_number(pr->rule.action);
 		if (rs_num >= PF_RULESET_MAX) {
 			error = EINVAL;
 			break;
 		}
 		if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
 			error = EINVAL;
 			break;
 		}
 		if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) {
 #ifdef __FreeBSD__
 			DPFPRINTF(PF_DEBUG_MISC,
 			    ("ticket: %d != [%d]%d\n", pr->ticket, rs_num,
 			    ruleset->rules[rs_num].inactive.ticket));
 #endif
 			error = EBUSY;
 			break;
 		}
 		if (pr->pool_ticket != ticket_pabuf) {
 #ifdef __FreeBSD__
 			DPFPRINTF(PF_DEBUG_MISC,
 			    ("pool_ticket: %d != %d\n", pr->pool_ticket,
 			    ticket_pabuf));
 #endif
 			error = EBUSY;
 			break;
 		}
 		rule = pool_get(&pf_rule_pl, PR_NOWAIT);
 		if (rule == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		bcopy(&pr->rule, rule, sizeof(struct pf_rule));
 #ifdef __FreeBSD__
 		rule->cuid = td->td_ucred->cr_ruid;
 		rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
 #else
 		rule->cuid = p->p_cred->p_ruid;
 		rule->cpid = p->p_pid;
 #endif
 		rule->anchor = NULL;
 		rule->kif = NULL;
 		TAILQ_INIT(&rule->rpool.list);
 		/* initialize refcounting */
 		rule->states = 0;
 		rule->src_nodes = 0;
 		rule->entries.tqe_prev = NULL;
 #ifndef INET
 		if (rule->af == AF_INET) {
 			pool_put(&pf_rule_pl, rule);
 			error = EAFNOSUPPORT;
 			break;
 		}
 #endif /* INET */
 #ifndef INET6
 		if (rule->af == AF_INET6) {
 			pool_put(&pf_rule_pl, rule);
 			error = EAFNOSUPPORT;
 			break;
 		}
 #endif /* INET6 */
 		tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
 		    pf_rulequeue);
 		if (tail)
 			rule->nr = tail->nr + 1;
 		else
 			rule->nr = 0;
 		if (rule->ifname[0]) {
 			rule->kif = pfi_kif_get(rule->ifname);
 			if (rule->kif == NULL) {
 				pool_put(&pf_rule_pl, rule);
 				error = EINVAL;
 				break;
 			}
 			pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE);
 		}
 
 #ifdef __FreeBSD__ /* ROUTEING */
-		if (rule->rtableid > 0)
+		if (rule->rtableid > 0 && rule->rtableid < rt_numfibs)
 #else
 		if (rule->rtableid > 0 && !rtable_exists(rule->rtableid))
 #endif
 			error = EBUSY;
 
 #ifdef ALTQ
 		/* set queue IDs */
 		if (rule->qname[0] != 0) {
 			if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
 				error = EBUSY;
 			else if (rule->pqname[0] != 0) {
 				if ((rule->pqid =
 				    pf_qname2qid(rule->pqname)) == 0)
 					error = EBUSY;
 			} else
 				rule->pqid = rule->qid;
 		}
 #endif
 		if (rule->tagname[0])
 			if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
 				error = EBUSY;
 		if (rule->match_tagname[0])
 			if ((rule->match_tag =
 			    pf_tagname2tag(rule->match_tagname)) == 0)
 				error = EBUSY;
 		if (rule->rt && !rule->direction)
 			error = EINVAL;
 #if NPFLOG > 0
 #ifdef __FreeBSD__
 		if (!rule->log)
 			rule->logif = 0;
 #endif
 		if (rule->logif >= PFLOGIFS_MAX)
 			error = EINVAL;
 #endif
 		if (pf_rtlabel_add(&rule->src.addr) ||
 		    pf_rtlabel_add(&rule->dst.addr))
 			error = EBUSY;
 		if (pfi_dynaddr_setup(&rule->src.addr, rule->af))
 			error = EINVAL;
 		if (pfi_dynaddr_setup(&rule->dst.addr, rule->af))
 			error = EINVAL;
 		if (pf_tbladdr_setup(ruleset, &rule->src.addr))
 			error = EINVAL;
 		if (pf_tbladdr_setup(ruleset, &rule->dst.addr))
 			error = EINVAL;
 		if (pf_anchor_setup(rule, ruleset, pr->anchor_call))
 			error = EINVAL;
 		TAILQ_FOREACH(pa, &pf_pabuf, entries)
 			if (pf_tbladdr_setup(ruleset, &pa->addr))
 				error = EINVAL;
 
 		if (rule->overload_tblname[0]) {
 			if ((rule->overload_tbl = pfr_attach_table(ruleset,
 			    rule->overload_tblname)) == NULL)
 				error = EINVAL;
 			else
 				rule->overload_tbl->pfrkt_flags |=
 				    PFR_TFLAG_ACTIVE;
 		}
 
 		pf_mv_pool(&pf_pabuf, &rule->rpool.list);
 		if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
 		    (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
 		    (rule->rt > PF_FASTROUTE)) &&
 		    (TAILQ_FIRST(&rule->rpool.list) == NULL))
 			error = EINVAL;
 
 		if (error) {
 			pf_rm_rule(NULL, rule);
 			break;
 		}
 
 #ifdef __FreeBSD__
 		if (!debug_pfugidhack && (rule->uid.op || rule->gid.op ||
 		    rule->log & PF_LOG_SOCKET_LOOKUP)) {
 			DPFPRINTF(PF_DEBUG_MISC,
 			    ("pf: debug.pfugidhack enabled\n"));
 			debug_pfugidhack = 1;
 		}
 #endif
 
 		rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
 		rule->evaluations = rule->packets[0] = rule->packets[1] =
 		    rule->bytes[0] = rule->bytes[1] = 0;
 		TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
 		    rule, entries);
 		ruleset->rules[rs_num].inactive.rcount++;
 		break;
 	}
 
 	case DIOCGETRULES: {
 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
 		struct pf_ruleset	*ruleset;
 		struct pf_rule		*tail;
 		int			 rs_num;
 
 		pr->anchor[sizeof(pr->anchor) - 1] = 0;
 		ruleset = pf_find_ruleset(pr->anchor);
 		if (ruleset == NULL) {
 			error = EINVAL;
 			break;
 		}
 		rs_num = pf_get_ruleset_number(pr->rule.action);
 		if (rs_num >= PF_RULESET_MAX) {
 			error = EINVAL;
 			break;
 		}
 		tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
 		    pf_rulequeue);
 		if (tail)
 			pr->nr = tail->nr + 1;
 		else
 			pr->nr = 0;
 		pr->ticket = ruleset->rules[rs_num].active.ticket;
 		break;
 	}
 
 	case DIOCGETRULE: {
 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
 		struct pf_ruleset	*ruleset;
 		struct pf_rule		*rule;
 		int			 rs_num, i;
 
 		pr->anchor[sizeof(pr->anchor) - 1] = 0;
 		ruleset = pf_find_ruleset(pr->anchor);
 		if (ruleset == NULL) {
 			error = EINVAL;
 			break;
 		}
 		rs_num = pf_get_ruleset_number(pr->rule.action);
 		if (rs_num >= PF_RULESET_MAX) {
 			error = EINVAL;
 			break;
 		}
 		if (pr->ticket != ruleset->rules[rs_num].active.ticket) {
 			error = EBUSY;
 			break;
 		}
 		rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
 		while ((rule != NULL) && (rule->nr != pr->nr))
 			rule = TAILQ_NEXT(rule, entries);
 		if (rule == NULL) {
 			error = EBUSY;
 			break;
 		}
 		bcopy(rule, &pr->rule, sizeof(struct pf_rule));
 		if (pf_anchor_copyout(ruleset, rule, pr)) {
 			error = EBUSY;
 			break;
 		}
 		pfi_dynaddr_copyout(&pr->rule.src.addr);
 		pfi_dynaddr_copyout(&pr->rule.dst.addr);
 		pf_tbladdr_copyout(&pr->rule.src.addr);
 		pf_tbladdr_copyout(&pr->rule.dst.addr);
 		pf_rtlabel_copyout(&pr->rule.src.addr);
 		pf_rtlabel_copyout(&pr->rule.dst.addr);
 		for (i = 0; i < PF_SKIP_COUNT; ++i)
 			if (rule->skip[i].ptr == NULL)
 				pr->rule.skip[i].nr = -1;
 			else
 				pr->rule.skip[i].nr =
 				    rule->skip[i].ptr->nr;
 
 		if (pr->action == PF_GET_CLR_CNTR) {
 			rule->evaluations = 0;
 			rule->packets[0] = rule->packets[1] = 0;
 			rule->bytes[0] = rule->bytes[1] = 0;
 		}
 		break;
 	}
 
 	case DIOCCHANGERULE: {
 		struct pfioc_rule	*pcr = (struct pfioc_rule *)addr;
 		struct pf_ruleset	*ruleset;
 		struct pf_rule		*oldrule = NULL, *newrule = NULL;
 		u_int32_t		 nr = 0;
 		int			 rs_num;
 
 		if (!(pcr->action == PF_CHANGE_REMOVE ||
 		    pcr->action == PF_CHANGE_GET_TICKET) &&
 		    pcr->pool_ticket != ticket_pabuf) {
 			error = EBUSY;
 			break;
 		}
 
 		if (pcr->action < PF_CHANGE_ADD_HEAD ||
 		    pcr->action > PF_CHANGE_GET_TICKET) {
 			error = EINVAL;
 			break;
 		}
 		ruleset = pf_find_ruleset(pcr->anchor);
 		if (ruleset == NULL) {
 			error = EINVAL;
 			break;
 		}
 		rs_num = pf_get_ruleset_number(pcr->rule.action);
 		if (rs_num >= PF_RULESET_MAX) {
 			error = EINVAL;
 			break;
 		}
 
 		if (pcr->action == PF_CHANGE_GET_TICKET) {
 			pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
 			break;
 		} else {
 			if (pcr->ticket !=
 			    ruleset->rules[rs_num].active.ticket) {
 				error = EINVAL;
 				break;
 			}
 			if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
 				error = EINVAL;
 				break;
 			}
 		}
 
 		if (pcr->action != PF_CHANGE_REMOVE) {
 			newrule = pool_get(&pf_rule_pl, PR_NOWAIT);
 			if (newrule == NULL) {
 				error = ENOMEM;
 				break;
 			}
 			bcopy(&pcr->rule, newrule, sizeof(struct pf_rule));
 #ifdef __FreeBSD__
 			newrule->cuid = td->td_ucred->cr_ruid;
 			newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
 #else
 			newrule->cuid = p->p_cred->p_ruid;
 			newrule->cpid = p->p_pid;
 #endif
 			TAILQ_INIT(&newrule->rpool.list);
 			/* initialize refcounting */
 			newrule->states = 0;
 			newrule->entries.tqe_prev = NULL;
 #ifndef INET
 			if (newrule->af == AF_INET) {
 				pool_put(&pf_rule_pl, newrule);
 				error = EAFNOSUPPORT;
 				break;
 			}
 #endif /* INET */
 #ifndef INET6
 			if (newrule->af == AF_INET6) {
 				pool_put(&pf_rule_pl, newrule);
 				error = EAFNOSUPPORT;
 				break;
 			}
 #endif /* INET6 */
 			if (newrule->ifname[0]) {
 				newrule->kif = pfi_kif_get(newrule->ifname);
 				if (newrule->kif == NULL) {
 					pool_put(&pf_rule_pl, newrule);
 					error = EINVAL;
 					break;
 				}
 				pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE);
 			} else
 				newrule->kif = NULL;
 
 			if (newrule->rtableid > 0 &&
 #ifdef __FreeBSD__ /* ROUTING */
-			    1)
+			    newrule->rtableid < rt_numfibs)
 #else
 			    !rtable_exists(newrule->rtableid))
 #endif
 				error = EBUSY;
 
 #ifdef ALTQ
 			/* set queue IDs */
 			if (newrule->qname[0] != 0) {
 				if ((newrule->qid =
 				    pf_qname2qid(newrule->qname)) == 0)
 					error = EBUSY;
 				else if (newrule->pqname[0] != 0) {
 					if ((newrule->pqid =
 					    pf_qname2qid(newrule->pqname)) == 0)
 						error = EBUSY;
 				} else
 					newrule->pqid = newrule->qid;
 			}
 #endif /* ALTQ */
 			if (newrule->tagname[0])
 				if ((newrule->tag =
 				    pf_tagname2tag(newrule->tagname)) == 0)
 					error = EBUSY;
 			if (newrule->match_tagname[0])
 				if ((newrule->match_tag = pf_tagname2tag(
 				    newrule->match_tagname)) == 0)
 					error = EBUSY;
 			if (newrule->rt && !newrule->direction)
 				error = EINVAL;
 #ifdef __FreeBSD__
 #if NPFLOG > 0
 			if (!newrule->log)
 				newrule->logif = 0;
 			if (newrule->logif >= PFLOGIFS_MAX)
 				error = EINVAL;
 #endif
 #endif
 			if (pf_rtlabel_add(&newrule->src.addr) ||
 			    pf_rtlabel_add(&newrule->dst.addr))
 				error = EBUSY;
 			if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af))
 				error = EINVAL;
 			if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af))
 				error = EINVAL;
 			if (pf_tbladdr_setup(ruleset, &newrule->src.addr))
 				error = EINVAL;
 			if (pf_tbladdr_setup(ruleset, &newrule->dst.addr))
 				error = EINVAL;
 			if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call))
 				error = EINVAL;
 			TAILQ_FOREACH(pa, &pf_pabuf, entries)
 				if (pf_tbladdr_setup(ruleset, &pa->addr))
 					error = EINVAL;
 
 			if (newrule->overload_tblname[0]) {
 				if ((newrule->overload_tbl = pfr_attach_table(
 				    ruleset, newrule->overload_tblname)) ==
 				    NULL)
 					error = EINVAL;
 				else
 					newrule->overload_tbl->pfrkt_flags |=
 					    PFR_TFLAG_ACTIVE;
 			}
 
 			pf_mv_pool(&pf_pabuf, &newrule->rpool.list);
 			if (((((newrule->action == PF_NAT) ||
 			    (newrule->action == PF_RDR) ||
 			    (newrule->action == PF_BINAT) ||
 			    (newrule->rt > PF_FASTROUTE)) &&
 			    !newrule->anchor)) &&
 			    (TAILQ_FIRST(&newrule->rpool.list) == NULL))
 				error = EINVAL;
 
 			if (error) {
 				pf_rm_rule(NULL, newrule);
 				break;
 			}
 
 #ifdef __FreeBSD__
 			if (!debug_pfugidhack && (newrule->uid.op ||
 			    newrule->gid.op ||
 			    newrule->log & PF_LOG_SOCKET_LOOKUP)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: debug.pfugidhack enabled\n"));
 				debug_pfugidhack = 1;
 			}
 #endif
 
 			newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
 			newrule->evaluations = 0;
 			newrule->packets[0] = newrule->packets[1] = 0;
 			newrule->bytes[0] = newrule->bytes[1] = 0;
 		}
 		pf_empty_pool(&pf_pabuf);
 
 		if (pcr->action == PF_CHANGE_ADD_HEAD)
 			oldrule = TAILQ_FIRST(
 			    ruleset->rules[rs_num].active.ptr);
 		else if (pcr->action == PF_CHANGE_ADD_TAIL)
 			oldrule = TAILQ_LAST(
 			    ruleset->rules[rs_num].active.ptr, pf_rulequeue);
 		else {
 			oldrule = TAILQ_FIRST(
 			    ruleset->rules[rs_num].active.ptr);
 			while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
 				oldrule = TAILQ_NEXT(oldrule, entries);
 			if (oldrule == NULL) {
 				if (newrule != NULL)
 					pf_rm_rule(NULL, newrule);
 				error = EINVAL;
 				break;
 			}
 		}
 
 		if (pcr->action == PF_CHANGE_REMOVE) {
 			pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule);
 			ruleset->rules[rs_num].active.rcount--;
 		} else {
 			if (oldrule == NULL)
 				TAILQ_INSERT_TAIL(
 				    ruleset->rules[rs_num].active.ptr,
 				    newrule, entries);
 			else if (pcr->action == PF_CHANGE_ADD_HEAD ||
 			    pcr->action == PF_CHANGE_ADD_BEFORE)
 				TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
 			else
 				TAILQ_INSERT_AFTER(
 				    ruleset->rules[rs_num].active.ptr,
 				    oldrule, newrule, entries);
 			ruleset->rules[rs_num].active.rcount++;
 		}
 
 		nr = 0;
 		TAILQ_FOREACH(oldrule,
 		    ruleset->rules[rs_num].active.ptr, entries)
 			oldrule->nr = nr++;
 
 		ruleset->rules[rs_num].active.ticket++;
 
 		pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
 		pf_remove_if_empty_ruleset(ruleset);
 
 		break;
 	}
 
 	case DIOCCLRSTATES: {
 		struct pf_state		*state, *nexts;
 		struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
 		int			 killed = 0;
 
 		for (state = RB_MIN(pf_state_tree_id, &tree_id); state;
 		    state = nexts) {
 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, state);
 
 			if (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
 			    state->u.s.kif->pfik_name)) {
 #if NPFSYNC
 				/* don't send out individual delete messages */
 				state->sync_flags = PFSTATE_NOSYNC;
 #endif
 				pf_unlink_state(state);
 				killed++;
 			}
 		}
 		psk->psk_af = killed;
 #if NPFSYNC
 		pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
 #endif
 		break;
 	}
 
 	case DIOCKILLSTATES: {
 		struct pf_state		*state, *nexts;
 		struct pf_state_host	*src, *dst;
 		struct pfioc_state_kill	*psk = (struct pfioc_state_kill *)addr;
 		int			 killed = 0;
 
 		for (state = RB_MIN(pf_state_tree_id, &tree_id); state;
 		    state = nexts) {
 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, state);
 
 			if (state->direction == PF_OUT) {
 				src = &state->lan;
 				dst = &state->ext;
 			} else {
 				src = &state->ext;
 				dst = &state->lan;
 			}
 			if ((!psk->psk_af || state->af == psk->psk_af)
 			    && (!psk->psk_proto || psk->psk_proto ==
 			    state->proto) &&
 			    PF_MATCHA(psk->psk_src.neg,
 			    &psk->psk_src.addr.v.a.addr,
 			    &psk->psk_src.addr.v.a.mask,
 			    &src->addr, state->af) &&
 			    PF_MATCHA(psk->psk_dst.neg,
 			    &psk->psk_dst.addr.v.a.addr,
 			    &psk->psk_dst.addr.v.a.mask,
 			    &dst->addr, state->af) &&
 			    (psk->psk_src.port_op == 0 ||
 			    pf_match_port(psk->psk_src.port_op,
 			    psk->psk_src.port[0], psk->psk_src.port[1],
 			    src->port)) &&
 			    (psk->psk_dst.port_op == 0 ||
 			    pf_match_port(psk->psk_dst.port_op,
 			    psk->psk_dst.port[0], psk->psk_dst.port[1],
 			    dst->port)) &&
 			    (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname,
 			    state->u.s.kif->pfik_name))) {
 #if NPFSYNC > 0
 				/* send immediate delete of state */
 				pfsync_delete_state(state);
 				state->sync_flags |= PFSTATE_NOSYNC;
 #endif
 				pf_unlink_state(state);
 				killed++;
 			}
 		}
 		psk->psk_af = killed;
 		break;
 	}
 
 	case DIOCADDSTATE: {
 		struct pfioc_state	*ps = (struct pfioc_state *)addr;
 		struct pf_state		*state;
 		struct pfi_kif		*kif;
 
 		if (ps->state.timeout >= PFTM_MAX &&
 		    ps->state.timeout != PFTM_UNTIL_PACKET) {
 			error = EINVAL;
 			break;
 		}
 		state = pool_get(&pf_state_pl, PR_NOWAIT);
 		if (state == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		kif = pfi_kif_get(ps->state.u.ifname);
 		if (kif == NULL) {
 			pool_put(&pf_state_pl, state);
 			error = ENOENT;
 			break;
 		}
 		bcopy(&ps->state, state, sizeof(struct pf_state));
 		bzero(&state->u, sizeof(state->u));
 		state->rule.ptr = &pf_default_rule;
 		state->nat_rule.ptr = NULL;
 		state->anchor.ptr = NULL;
 		state->rt_kif = NULL;
 		state->creation = time_second;
 		state->pfsync_time = 0;
 		state->packets[0] = state->packets[1] = 0;
 		state->bytes[0] = state->bytes[1] = 0;
 
 		if (pf_insert_state(kif, state)) {
 			pfi_kif_unref(kif, PFI_KIF_REF_NONE);
 			pool_put(&pf_state_pl, state);
 			error = ENOMEM;
 		}
 		break;
 	}
 
 	case DIOCGETSTATE: {
 		struct pfioc_state	*ps = (struct pfioc_state *)addr;
 		struct pf_state		*state;
 		u_int32_t		 nr;
 		int			 secs;
 
 		nr = 0;
 		RB_FOREACH(state, pf_state_tree_id, &tree_id) {
 			if (nr >= ps->nr)
 				break;
 			nr++;
 		}
 		if (state == NULL) {
 			error = EBUSY;
 			break;
 		}
 		secs = time_second;
 		bcopy(state, &ps->state, sizeof(ps->state));
 		strlcpy(ps->state.u.ifname, state->u.s.kif->pfik_name,
 		    sizeof(ps->state.u.ifname));
 		ps->state.rule.nr = state->rule.ptr->nr;
 		ps->state.nat_rule.nr = (state->nat_rule.ptr == NULL) ?
 		    -1 : state->nat_rule.ptr->nr;
 		ps->state.anchor.nr = (state->anchor.ptr == NULL) ?
 		    -1 : state->anchor.ptr->nr;
 		ps->state.creation = secs - ps->state.creation;
 		ps->state.expire = pf_state_expires(state);
 		if (ps->state.expire > secs)
 			ps->state.expire -= secs;
 		else
 			ps->state.expire = 0;
 		break;
 	}
 
 	case DIOCGETSTATES: {
 		struct pfioc_states	*ps = (struct pfioc_states *)addr;
 		struct pf_state		*state;
 		struct pf_state		*p, *pstore;
 		u_int32_t		 nr = 0;
 		int			 space = ps->ps_len;
 
 		if (space == 0) {
 			nr = pf_status.states;
 			ps->ps_len = sizeof(struct pf_state) * nr;
 			break;
 		}
 
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 
 		p = ps->ps_states;
 
 		state = TAILQ_FIRST(&state_list);
 		while (state) {
 			if (state->timeout != PFTM_UNLINKED) {
 				int	secs = time_second;
 
 				if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len)
 					break;
 
 				bcopy(state, pstore, sizeof(*pstore));
 				strlcpy(pstore->u.ifname,
 				    state->u.s.kif->pfik_name,
 				    sizeof(pstore->u.ifname));
 				pstore->rule.nr = state->rule.ptr->nr;
 				pstore->nat_rule.nr = (state->nat_rule.ptr ==
 				    NULL) ? -1 : state->nat_rule.ptr->nr;
 				pstore->anchor.nr = (state->anchor.ptr ==
 				    NULL) ? -1 : state->anchor.ptr->nr;
 				pstore->creation = secs - pstore->creation;
 				pstore->expire = pf_state_expires(state);
 				if (pstore->expire > secs)
 					pstore->expire -= secs;
 				else
 					pstore->expire = 0;
 #ifdef __FreeBSD__
 				PF_COPYOUT(pstore, p, sizeof(*p), error);
 #else
 				error = copyout(pstore, p, sizeof(*p));
 #endif
 				if (error) {
 					free(pstore, M_TEMP);
 					goto fail;
 				}
 				p++;
 				nr++;
 			}
 			state = TAILQ_NEXT(state, u.s.entry_list);
 		}
 
 		ps->ps_len = sizeof(struct pf_state) * nr;
 
 		free(pstore, M_TEMP);
 		break;
 	}
 
 	case DIOCGETSTATUS: {
 		struct pf_status *s = (struct pf_status *)addr;
 		bcopy(&pf_status, s, sizeof(struct pf_status));
 		pfi_fill_oldstatus(s);
 		break;
 	}
 
 	case DIOCSETSTATUSIF: {
 		struct pfioc_if	*pi = (struct pfioc_if *)addr;
 
 		if (pi->ifname[0] == 0) {
 			bzero(pf_status.ifname, IFNAMSIZ);
 			break;
 		}
 		if (ifunit(pi->ifname) == NULL) {
 			error = EINVAL;
 			break;
 		}
 		strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ);
 		break;
 	}
 
 	case DIOCCLRSTATUS: {
 		bzero(pf_status.counters, sizeof(pf_status.counters));
 		bzero(pf_status.fcounters, sizeof(pf_status.fcounters));
 		bzero(pf_status.scounters, sizeof(pf_status.scounters));
 		pf_status.since = time_second;
 		if (*pf_status.ifname)
 			pfi_clr_istats(pf_status.ifname);
 		break;
 	}
 
 	case DIOCNATLOOK: {
 		struct pfioc_natlook	*pnl = (struct pfioc_natlook *)addr;
 		struct pf_state		*state;
 		struct pf_state_cmp	 key;
 		int			 m = 0, direction = pnl->direction;
 
 		key.af = pnl->af;
 		key.proto = pnl->proto;
 
 		if (!pnl->proto ||
 		    PF_AZERO(&pnl->saddr, pnl->af) ||
 		    PF_AZERO(&pnl->daddr, pnl->af) ||
 		    ((pnl->proto == IPPROTO_TCP ||
 		    pnl->proto == IPPROTO_UDP) &&
 		    (!pnl->dport || !pnl->sport)))
 			error = EINVAL;
 		else {
 			/*
 			 * userland gives us source and dest of connection,
 			 * reverse the lookup so we ask for what happens with
 			 * the return traffic, enabling us to find it in the
 			 * state tree.
 			 */
 			if (direction == PF_IN) {
 				PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af);
 				key.ext.port = pnl->dport;
 				PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af);
 				key.gwy.port = pnl->sport;
 				state = pf_find_state_all(&key, PF_EXT_GWY, &m);
 			} else {
 				PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af);
 				key.lan.port = pnl->dport;
 				PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af);
 				key.ext.port = pnl->sport;
 				state = pf_find_state_all(&key, PF_LAN_EXT, &m);
 			}
 			if (m > 1)
 				error = E2BIG;	/* more than one state */
 			else if (state != NULL) {
 				if (direction == PF_IN) {
 					PF_ACPY(&pnl->rsaddr, &state->lan.addr,
 					    state->af);
 					pnl->rsport = state->lan.port;
 					PF_ACPY(&pnl->rdaddr, &pnl->daddr,
 					    pnl->af);
 					pnl->rdport = pnl->dport;
 				} else {
 					PF_ACPY(&pnl->rdaddr, &state->gwy.addr,
 					    state->af);
 					pnl->rdport = state->gwy.port;
 					PF_ACPY(&pnl->rsaddr, &pnl->saddr,
 					    pnl->af);
 					pnl->rsport = pnl->sport;
 				}
 			} else
 				error = ENOENT;
 		}
 		break;
 	}
 
 	case DIOCSETTIMEOUT: {
 		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
 		int		 old;
 
 		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
 		    pt->seconds < 0) {
 			error = EINVAL;
 			goto fail;
 		}
 		old = pf_default_rule.timeout[pt->timeout];
 		if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
 			pt->seconds = 1;
 		pf_default_rule.timeout[pt->timeout] = pt->seconds;
 		if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
 			wakeup(pf_purge_thread);
 		pt->seconds = old;
 		break;
 	}
 
 	case DIOCGETTIMEOUT: {
 		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
 
 		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
 			error = EINVAL;
 			goto fail;
 		}
 		pt->seconds = pf_default_rule.timeout[pt->timeout];
 		break;
 	}
 
 	case DIOCGETLIMIT: {
 		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
 
 		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
 			error = EINVAL;
 			goto fail;
 		}
 		pl->limit = pf_pool_limits[pl->index].limit;
 		break;
 	}
 
 	case DIOCSETLIMIT: {
 		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
 		int			 old_limit;
 
 		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
 		    pf_pool_limits[pl->index].pp == NULL) {
 			error = EINVAL;
 			goto fail;
 		}
 #ifdef __FreeBSD__
 		uma_zone_set_max(pf_pool_limits[pl->index].pp, pl->limit);
 #else
 		if (pool_sethardlimit(pf_pool_limits[pl->index].pp,
 		    pl->limit, NULL, 0) != 0) {
 			error = EBUSY;
 			goto fail;
 		}
 #endif
 		old_limit = pf_pool_limits[pl->index].limit;
 		pf_pool_limits[pl->index].limit = pl->limit;
 		pl->limit = old_limit;
 		break;
 	}
 
 	case DIOCSETDEBUG: {
 		u_int32_t	*level = (u_int32_t *)addr;
 
 		pf_status.debug = *level;
 		break;
 	}
 
 	case DIOCCLRRULECTRS: {
 		/* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
 		struct pf_ruleset	*ruleset = &pf_main_ruleset;
 		struct pf_rule		*rule;
 
 		TAILQ_FOREACH(rule,
 		    ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
 			rule->evaluations = 0;
 			rule->packets[0] = rule->packets[1] = 0;
 			rule->bytes[0] = rule->bytes[1] = 0;
 		}
 		break;
 	}
 
 #ifdef __FreeBSD__
 	case DIOCGIFSPEED: {
 		struct pf_ifspeed	*psp = (struct pf_ifspeed *)addr;
 		struct pf_ifspeed	ps;
 		struct ifnet		*ifp;
 
 		if (psp->ifname[0] != 0) {
 			/* Can we completely trust user-land? */
 			strlcpy(ps.ifname, psp->ifname, IFNAMSIZ);
 			ifp = ifunit(ps.ifname);
 			if (ifp != NULL)
 				psp->baudrate = ifp->if_baudrate;
 			else
 				error = EINVAL;
 		} else
 			error = EINVAL;
 		break;
 	}
 #endif /* __FreeBSD__ */
 
 #ifdef ALTQ
 	case DIOCSTARTALTQ: {
 		struct pf_altq		*altq;
 
 		/* enable all altq interfaces on active list */
 		TAILQ_FOREACH(altq, pf_altqs_active, entries) {
 #ifdef __FreeBSD__
 			if (altq->qname[0] == 0 && (altq->local_flags &
 			    PFALTQ_FLAG_IF_REMOVED) == 0) {
 #else
 			if (altq->qname[0] == 0) {
 #endif
 				error = pf_enable_altq(altq);
 				if (error != 0)
 					break;
 			}
 		}
 		if (error == 0)
 			pf_altq_running = 1;
 		DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
 		break;
 	}
 
 	case DIOCSTOPALTQ: {
 		struct pf_altq		*altq;
 
 		/* disable all altq interfaces on active list */
 		TAILQ_FOREACH(altq, pf_altqs_active, entries) {
 #ifdef __FreeBSD__
 			if (altq->qname[0] == 0 && (altq->local_flags &
 			    PFALTQ_FLAG_IF_REMOVED) == 0) {
 #else
 			if (altq->qname[0] == 0) {
 #endif
 				error = pf_disable_altq(altq);
 				if (error != 0)
 					break;
 			}
 		}
 		if (error == 0)
 			pf_altq_running = 0;
 		DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
 		break;
 	}
 
 	case DIOCADDALTQ: {
 		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
 		struct pf_altq		*altq, *a;
 
 		if (pa->ticket != ticket_altqs_inactive) {
 			error = EBUSY;
 			break;
 		}
 		altq = pool_get(&pf_altq_pl, PR_NOWAIT);
 		if (altq == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		bcopy(&pa->altq, altq, sizeof(struct pf_altq));
 #ifdef __FreeBSD__
 		altq->local_flags = 0;
 #endif
 
 		/*
 		 * if this is for a queue, find the discipline and
 		 * copy the necessary fields
 		 */
 		if (altq->qname[0] != 0) {
 			if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
 				error = EBUSY;
 				pool_put(&pf_altq_pl, altq);
 				break;
 			}
 			altq->altq_disc = NULL;
 			TAILQ_FOREACH(a, pf_altqs_inactive, entries) {
 				if (strncmp(a->ifname, altq->ifname,
 				    IFNAMSIZ) == 0 && a->qname[0] == 0) {
 					altq->altq_disc = a->altq_disc;
 					break;
 				}
 			}
 		}
 
 #ifdef __FreeBSD__
 		struct ifnet *ifp;
 
 		if ((ifp = ifunit(altq->ifname)) == NULL) {
 			altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
 		} else {
 			PF_UNLOCK();
 #endif		
 		error = altq_add(altq);
 #ifdef __FreeBSD__
 			PF_LOCK();
 		}
 #endif
 		if (error) {
 			pool_put(&pf_altq_pl, altq);
 			break;
 		}
 
 		TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries);
 		bcopy(altq, &pa->altq, sizeof(struct pf_altq));
 		break;
 	}
 
 	case DIOCGETALTQS: {
 		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
 		struct pf_altq		*altq;
 
 		pa->nr = 0;
 		TAILQ_FOREACH(altq, pf_altqs_active, entries)
 			pa->nr++;
 		pa->ticket = ticket_altqs_active;
 		break;
 	}
 
 	case DIOCGETALTQ: {
 		struct pfioc_altq	*pa = (struct pfioc_altq *)addr;
 		struct pf_altq		*altq;
 		u_int32_t		 nr;
 
 		if (pa->ticket != ticket_altqs_active) {
 			error = EBUSY;
 			break;
 		}
 		nr = 0;
 		altq = TAILQ_FIRST(pf_altqs_active);
 		while ((altq != NULL) && (nr < pa->nr)) {
 			altq = TAILQ_NEXT(altq, entries);
 			nr++;
 		}
 		if (altq == NULL) {
 			error = EBUSY;
 			break;
 		}
 		bcopy(altq, &pa->altq, sizeof(struct pf_altq));
 		break;
 	}
 
 	case DIOCCHANGEALTQ:
 		/* CHANGEALTQ not supported yet! */
 		error = ENODEV;
 		break;
 
 	case DIOCGETQSTATS: {
 		struct pfioc_qstats	*pq = (struct pfioc_qstats *)addr;
 		struct pf_altq		*altq;
 		u_int32_t		 nr;
 		int			 nbytes;
 
 		if (pq->ticket != ticket_altqs_active) {
 			error = EBUSY;
 			break;
 		}
 		nbytes = pq->nbytes;
 		nr = 0;
 		altq = TAILQ_FIRST(pf_altqs_active);
 		while ((altq != NULL) && (nr < pq->nr)) {
 			altq = TAILQ_NEXT(altq, entries);
 			nr++;
 		}
 		if (altq == NULL) {
 			error = EBUSY;
 			break;
 		}
 #ifdef __FreeBSD__
 		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
 			error = ENXIO;
 			break;
 		}
 		PF_UNLOCK();
 #endif
 		error = altq_getqstats(altq, pq->buf, &nbytes);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 		if (error == 0) {
 			pq->scheduler = altq->scheduler;
 			pq->nbytes = nbytes;
 		}
 		break;
 	}
 #endif /* ALTQ */
 
 	case DIOCBEGINADDRS: {
 		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
 
 		pf_empty_pool(&pf_pabuf);
 		pp->ticket = ++ticket_pabuf;
 		break;
 	}
 
 	case DIOCADDADDR: {
 		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
 
 		if (pp->ticket != ticket_pabuf) {
 			error = EBUSY;
 			break;
 		}
 #ifndef INET
 		if (pp->af == AF_INET) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 #endif /* INET */
 #ifndef INET6
 		if (pp->af == AF_INET6) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 #endif /* INET6 */
 		if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
 		    pp->addr.addr.type != PF_ADDR_DYNIFTL &&
 		    pp->addr.addr.type != PF_ADDR_TABLE) {
 			error = EINVAL;
 			break;
 		}
 		pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT);
 		if (pa == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		bcopy(&pp->addr, pa, sizeof(struct pf_pooladdr));
 		if (pa->ifname[0]) {
 			pa->kif = pfi_kif_get(pa->ifname);
 			if (pa->kif == NULL) {
 				pool_put(&pf_pooladdr_pl, pa);
 				error = EINVAL;
 				break;
 			}
 			pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE);
 		}
 		if (pfi_dynaddr_setup(&pa->addr, pp->af)) {
 			pfi_dynaddr_remove(&pa->addr);
 			pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE);
 			pool_put(&pf_pooladdr_pl, pa);
 			error = EINVAL;
 			break;
 		}
 		TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries);
 		break;
 	}
 
 	case DIOCGETADDRS: {
 		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
 
 		pp->nr = 0;
 		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
 		    pp->r_num, 0, 1, 0);
 		if (pool == NULL) {
 			error = EBUSY;
 			break;
 		}
 		TAILQ_FOREACH(pa, &pool->list, entries)
 			pp->nr++;
 		break;
 	}
 
 	case DIOCGETADDR: {
 		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
 		u_int32_t		 nr = 0;
 
 		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
 		    pp->r_num, 0, 1, 1);
 		if (pool == NULL) {
 			error = EBUSY;
 			break;
 		}
 		pa = TAILQ_FIRST(&pool->list);
 		while ((pa != NULL) && (nr < pp->nr)) {
 			pa = TAILQ_NEXT(pa, entries);
 			nr++;
 		}
 		if (pa == NULL) {
 			error = EBUSY;
 			break;
 		}
 		bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr));
 		pfi_dynaddr_copyout(&pp->addr.addr);
 		pf_tbladdr_copyout(&pp->addr.addr);
 		pf_rtlabel_copyout(&pp->addr.addr);
 		break;
 	}
 
 	case DIOCCHANGEADDR: {
 		struct pfioc_pooladdr	*pca = (struct pfioc_pooladdr *)addr;
 		struct pf_pooladdr	*oldpa = NULL, *newpa = NULL;
 		struct pf_ruleset	*ruleset;
 
 		if (pca->action < PF_CHANGE_ADD_HEAD ||
 		    pca->action > PF_CHANGE_REMOVE) {
 			error = EINVAL;
 			break;
 		}
 		if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
 		    pca->addr.addr.type != PF_ADDR_DYNIFTL &&
 		    pca->addr.addr.type != PF_ADDR_TABLE) {
 			error = EINVAL;
 			break;
 		}
 
 		ruleset = pf_find_ruleset(pca->anchor);
 		if (ruleset == NULL) {
 			error = EBUSY;
 			break;
 		}
 		pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action,
 		    pca->r_num, pca->r_last, 1, 1);
 		if (pool == NULL) {
 			error = EBUSY;
 			break;
 		}
 		if (pca->action != PF_CHANGE_REMOVE) {
 			newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT);
 			if (newpa == NULL) {
 				error = ENOMEM;
 				break;
 			}
 			bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
 #ifndef INET
 			if (pca->af == AF_INET) {
 				pool_put(&pf_pooladdr_pl, newpa);
 				error = EAFNOSUPPORT;
 				break;
 			}
 #endif /* INET */
 #ifndef INET6
 			if (pca->af == AF_INET6) {
 				pool_put(&pf_pooladdr_pl, newpa);
 				error = EAFNOSUPPORT;
 				break;
 			}
 #endif /* INET6 */
 			if (newpa->ifname[0]) {
 				newpa->kif = pfi_kif_get(newpa->ifname);
 				if (newpa->kif == NULL) {
 					pool_put(&pf_pooladdr_pl, newpa);
 					error = EINVAL;
 					break;
 				}
 				pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE);
 			} else
 				newpa->kif = NULL;
 			if (pfi_dynaddr_setup(&newpa->addr, pca->af) ||
 			    pf_tbladdr_setup(ruleset, &newpa->addr)) {
 				pfi_dynaddr_remove(&newpa->addr);
 				pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE);
 				pool_put(&pf_pooladdr_pl, newpa);
 				error = EINVAL;
 				break;
 			}
 		}
 
 		if (pca->action == PF_CHANGE_ADD_HEAD)
 			oldpa = TAILQ_FIRST(&pool->list);
 		else if (pca->action == PF_CHANGE_ADD_TAIL)
 			oldpa = TAILQ_LAST(&pool->list, pf_palist);
 		else {
 			int	i = 0;
 
 			oldpa = TAILQ_FIRST(&pool->list);
 			while ((oldpa != NULL) && (i < pca->nr)) {
 				oldpa = TAILQ_NEXT(oldpa, entries);
 				i++;
 			}
 			if (oldpa == NULL) {
 				error = EINVAL;
 				break;
 			}
 		}
 
 		if (pca->action == PF_CHANGE_REMOVE) {
 			TAILQ_REMOVE(&pool->list, oldpa, entries);
 			pfi_dynaddr_remove(&oldpa->addr);
 			pf_tbladdr_remove(&oldpa->addr);
 			pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE);
 			pool_put(&pf_pooladdr_pl, oldpa);
 		} else {
 			if (oldpa == NULL)
 				TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
 			else if (pca->action == PF_CHANGE_ADD_HEAD ||
 			    pca->action == PF_CHANGE_ADD_BEFORE)
 				TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
 			else
 				TAILQ_INSERT_AFTER(&pool->list, oldpa,
 				    newpa, entries);
 		}
 
 		pool->cur = TAILQ_FIRST(&pool->list);
 		PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr,
 		    pca->af);
 		break;
 	}
 
 	case DIOCGETRULESETS: {
 		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
 		struct pf_ruleset	*ruleset;
 		struct pf_anchor	*anchor;
 
 		pr->path[sizeof(pr->path) - 1] = 0;
 		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
 			error = EINVAL;
 			break;
 		}
 		pr->nr = 0;
 		if (ruleset->anchor == NULL) {
 			/* XXX kludge for pf_main_ruleset */
 			RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
 				if (anchor->parent == NULL)
 					pr->nr++;
 		} else {
 			RB_FOREACH(anchor, pf_anchor_node,
 			    &ruleset->anchor->children)
 				pr->nr++;
 		}
 		break;
 	}
 
 	case DIOCGETRULESET: {
 		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
 		struct pf_ruleset	*ruleset;
 		struct pf_anchor	*anchor;
 		u_int32_t		 nr = 0;
 
 		pr->path[sizeof(pr->path) - 1] = 0;
 		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
 			error = EINVAL;
 			break;
 		}
 		pr->name[0] = 0;
 		if (ruleset->anchor == NULL) {
 			/* XXX kludge for pf_main_ruleset */
 			RB_FOREACH(anchor, pf_anchor_global, &pf_anchors)
 				if (anchor->parent == NULL && nr++ == pr->nr) {
 					strlcpy(pr->name, anchor->name,
 					    sizeof(pr->name));
 					break;
 				}
 		} else {
 			RB_FOREACH(anchor, pf_anchor_node,
 			    &ruleset->anchor->children)
 				if (nr++ == pr->nr) {
 					strlcpy(pr->name, anchor->name,
 					    sizeof(pr->name));
 					break;
 				}
 		}
 		if (!pr->name[0])
 			error = EBUSY;
 		break;
 	}
 
 	case DIOCRCLRTABLES: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != 0) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
 		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRADDTABLES: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size,
 		    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRDELTABLES: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size,
 		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRGETTABLES: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer,
 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRGETTSTATS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer,
 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRCLRTSTATS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size,
 		    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRSETTFLAGS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_table)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size,
 		    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
 		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRCLRADDRS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != 0) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
 		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRADDADDRS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer,
 		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRDELADDRS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer,
 		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRSETADDRS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer,
 		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
 		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL, 0);
 		break;
 	}
 
 	case DIOCRGETADDRS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer,
 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRGETASTATS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_astats)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer,
 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRCLRASTATS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer,
 		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRTSTADDRS: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer,
 		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCRINADEFINE: {
 		struct pfioc_table *io = (struct pfioc_table *)addr;
 
 		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer,
 		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
 		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
 	}
 
 	case DIOCOSFPADD: {
 		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
 		error = pf_osfp_add(io);
 		break;
 	}
 
 	case DIOCOSFPGET: {
 		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
 		error = pf_osfp_get(io);
 		break;
 	}
 
 	case DIOCXBEGIN: {
 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
 		struct pfioc_trans_e	*ioe;
 		struct pfr_table	*table;
 		int			 i;
 
 		if (io->esize != sizeof(*ioe)) {
 			error = ENODEV;
 			goto fail;
 		}
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
 		    M_TEMP, M_WAITOK);
 		table = (struct pfr_table *)malloc(sizeof(*table),
 		    M_TEMP, M_WAITOK);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 		for (i = 0; i < io->size; i++) {
 #ifdef __FreeBSD__
 			PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
 			if (error) {
 #else
 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
 #endif
 				free(table, M_TEMP);
 				free(ioe, M_TEMP);
 				error = EFAULT;
 				goto fail;
 			}
 			switch (ioe->rs_num) {
 #ifdef ALTQ
 			case PF_RULESET_ALTQ:
 				if (ioe->anchor[0]) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					error = EINVAL;
 					goto fail;
 				}
 				if ((error = pf_begin_altq(&ioe->ticket))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail;
 				}
 				break;
 #endif /* ALTQ */
 			case PF_RULESET_TABLE:
 				bzero(table, sizeof(*table));
 				strlcpy(table->pfrt_anchor, ioe->anchor,
 				    sizeof(table->pfrt_anchor));
 				if ((error = pfr_ina_begin(table,
 				    &ioe->ticket, NULL, 0))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail;
 				}
 				break;
 			default:
 				if ((error = pf_begin_rules(&ioe->ticket,
 				    ioe->rs_num, ioe->anchor))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail;
 				}
 				break;
 			}
 #ifdef __FreeBSD__
 			PF_COPYOUT(ioe, io->array+i, sizeof(io->array[i]),
 			    error);
 			if (error) {
 #else
 			if (copyout(ioe, io->array+i, sizeof(io->array[i]))) {
 #endif
 				free(table, M_TEMP);
 				free(ioe, M_TEMP);
 				error = EFAULT;
 				goto fail;
 			}
 		}
 		free(table, M_TEMP);
 		free(ioe, M_TEMP);
 		break;
 	}
 
 	case DIOCXROLLBACK: {
 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
 		struct pfioc_trans_e	*ioe;
 		struct pfr_table	*table;
 		int			 i;
 
 		if (io->esize != sizeof(*ioe)) {
 			error = ENODEV;
 			goto fail;
 		}
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
 		    M_TEMP, M_WAITOK);
 		table = (struct pfr_table *)malloc(sizeof(*table),
 		    M_TEMP, M_WAITOK);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 		for (i = 0; i < io->size; i++) {
 #ifdef __FreeBSD__
 			PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
 			if (error) {
 #else
 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
 #endif
 				free(table, M_TEMP);
 				free(ioe, M_TEMP);
 				error = EFAULT;
 				goto fail;
 			}
 			switch (ioe->rs_num) {
 #ifdef ALTQ
 			case PF_RULESET_ALTQ:
 				if (ioe->anchor[0]) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					error = EINVAL;
 					goto fail;
 				}
 				if ((error = pf_rollback_altq(ioe->ticket))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail; /* really bad */
 				}
 				break;
 #endif /* ALTQ */
 			case PF_RULESET_TABLE:
 				bzero(table, sizeof(*table));
 				strlcpy(table->pfrt_anchor, ioe->anchor,
 				    sizeof(table->pfrt_anchor));
 				if ((error = pfr_ina_rollback(table,
 				    ioe->ticket, NULL, 0))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail; /* really bad */
 				}
 				break;
 			default:
 				if ((error = pf_rollback_rules(ioe->ticket,
 				    ioe->rs_num, ioe->anchor))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail; /* really bad */
 				}
 				break;
 			}
 		}
 		free(table, M_TEMP);
 		free(ioe, M_TEMP);
 		break;
 	}
 
 	case DIOCXCOMMIT: {
 		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
 		struct pfioc_trans_e	*ioe;
 		struct pfr_table	*table;
 		struct pf_ruleset	*rs;
 		int			 i;
 
 		if (io->esize != sizeof(*ioe)) {
 			error = ENODEV;
 			goto fail;
 		}
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		ioe = (struct pfioc_trans_e *)malloc(sizeof(*ioe),
 		    M_TEMP, M_WAITOK);
 		table = (struct pfr_table *)malloc(sizeof(*table),
 		    M_TEMP, M_WAITOK);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 		/* first makes sure everything will succeed */
 		for (i = 0; i < io->size; i++) {
 #ifdef __FreeBSD__
 			PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
 			if (error) {
 #else
 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
 #endif
 				free(table, M_TEMP);
 				free(ioe, M_TEMP);
 				error = EFAULT;
 				goto fail;
 			}
 			switch (ioe->rs_num) {
 #ifdef ALTQ
 			case PF_RULESET_ALTQ:
 				if (ioe->anchor[0]) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					error = EINVAL;
 					goto fail;
 				}
 				if (!altqs_inactive_open || ioe->ticket !=
 				    ticket_altqs_inactive) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					error = EBUSY;
 					goto fail;
 				}
 				break;
 #endif /* ALTQ */
 			case PF_RULESET_TABLE:
 				rs = pf_find_ruleset(ioe->anchor);
 				if (rs == NULL || !rs->topen || ioe->ticket !=
 				     rs->tticket) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					error = EBUSY;
 					goto fail;
 				}
 				break;
 			default:
 				if (ioe->rs_num < 0 || ioe->rs_num >=
 				    PF_RULESET_MAX) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					error = EINVAL;
 					goto fail;
 				}
 				rs = pf_find_ruleset(ioe->anchor);
 				if (rs == NULL ||
 				    !rs->rules[ioe->rs_num].inactive.open ||
 				    rs->rules[ioe->rs_num].inactive.ticket !=
 				    ioe->ticket) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					error = EBUSY;
 					goto fail;
 				}
 				break;
 			}
 		}
 		/* now do the commit - no errors should happen here */
 		for (i = 0; i < io->size; i++) {
 #ifdef __FreeBSD__
 			PF_COPYIN(io->array+i, ioe, sizeof(*ioe), error);
 			if (error) {
 #else
 			if (copyin(io->array+i, ioe, sizeof(*ioe))) {
 #endif
 				free(table, M_TEMP);
 				free(ioe, M_TEMP);
 				error = EFAULT;
 				goto fail;
 			}
 			switch (ioe->rs_num) {
 #ifdef ALTQ
 			case PF_RULESET_ALTQ:
 				if ((error = pf_commit_altq(ioe->ticket))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail; /* really bad */
 				}
 				break;
 #endif /* ALTQ */
 			case PF_RULESET_TABLE:
 				bzero(table, sizeof(*table));
 				strlcpy(table->pfrt_anchor, ioe->anchor,
 				    sizeof(table->pfrt_anchor));
 				if ((error = pfr_ina_commit(table, ioe->ticket,
 				    NULL, NULL, 0))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail; /* really bad */
 				}
 				break;
 			default:
 				if ((error = pf_commit_rules(ioe->ticket,
 				    ioe->rs_num, ioe->anchor))) {
 					free(table, M_TEMP);
 					free(ioe, M_TEMP);
 					goto fail; /* really bad */
 				}
 				break;
 			}
 		}
 		free(table, M_TEMP);
 		free(ioe, M_TEMP);
 		break;
 	}
 
 	case DIOCGETSRCNODES: {
 		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
 		struct pf_src_node	*n, *p, *pstore;
 		u_int32_t		 nr = 0;
 		int			 space = psn->psn_len;
 
 		if (space == 0) {
 			RB_FOREACH(n, pf_src_tree, &tree_src_tracking)
 				nr++;
 			psn->psn_len = sizeof(struct pf_src_node) * nr;
 			break;
 		}
 
 #ifdef __FreeBSD__
 		PF_UNLOCK();
 #endif
 		pstore = malloc(sizeof(*pstore), M_TEMP, M_WAITOK);
 #ifdef __FreeBSD__
 		PF_LOCK();
 #endif
 
 		p = psn->psn_src_nodes;
 		RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
 			int	secs = time_second, diff;
 
 			if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
 				break;
 
 			bcopy(n, pstore, sizeof(*pstore));
 			if (n->rule.ptr != NULL)
 				pstore->rule.nr = n->rule.ptr->nr;
 			pstore->creation = secs - pstore->creation;
 			if (pstore->expire > secs)
 				pstore->expire -= secs;
 			else
 				pstore->expire = 0;
 
 			/* adjust the connection rate estimate */
 			diff = secs - n->conn_rate.last;
 			if (diff >= n->conn_rate.seconds)
 				pstore->conn_rate.count = 0;
 			else
 				pstore->conn_rate.count -=
 				    n->conn_rate.count * diff /
 				    n->conn_rate.seconds;
 
 #ifdef __FreeBSD__
 			PF_COPYOUT(pstore, p, sizeof(*p), error);
 #else
 			error = copyout(pstore, p, sizeof(*p));
 #endif
 			if (error) {
 				free(pstore, M_TEMP);
 				goto fail;
 			}
 			p++;
 			nr++;
 		}
 		psn->psn_len = sizeof(struct pf_src_node) * nr;
 
 		free(pstore, M_TEMP);
 		break;
 	}
 
 	case DIOCCLRSRCNODES: {
 		struct pf_src_node	*n;
 		struct pf_state		*state;
 
 		RB_FOREACH(state, pf_state_tree_id, &tree_id) {
 			state->src_node = NULL;
 			state->nat_src_node = NULL;
 		}
 		RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
 			n->expire = 1;
 			n->states = 0;
 		}
 		pf_purge_expired_src_nodes(1);
 		pf_status.src_nodes = 0;
 		break;
 	}
 
 	case DIOCKILLSRCNODES: {
 		struct pf_src_node	*sn;
 		struct pf_state		*s;
 		struct pfioc_src_node_kill *psnk = \
 			(struct pfioc_src_node_kill *) addr;
 		int			killed = 0;
 
 		RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) {
         		if (PF_MATCHA(psnk->psnk_src.neg, \
 				      &psnk->psnk_src.addr.v.a.addr, \
 				      &psnk->psnk_src.addr.v.a.mask, \
 				      &sn->addr, sn->af) &&
 			    PF_MATCHA(psnk->psnk_dst.neg, \
 				      &psnk->psnk_dst.addr.v.a.addr, \
 				      &psnk->psnk_dst.addr.v.a.mask, \
 				      &sn->raddr, sn->af)) {
 				/* Handle state to src_node linkage */
 				if (sn->states != 0) {
 					RB_FOREACH(s, pf_state_tree_id, 
 					    &tree_id) {
 						if (s->src_node == sn)
 							s->src_node = NULL;
 						if (s->nat_src_node == sn)
 							s->nat_src_node = NULL;
 					}
 					sn->states = 0;
 				}
 				sn->expire = 1;
 				killed++;
 			}
 		}
 
 		if (killed > 0)
 			pf_purge_expired_src_nodes(1);
 
 		psnk->psnk_af = killed;
 		break;
 	}
 
 	case DIOCSETHOSTID: {
 		u_int32_t	*hostid = (u_int32_t *)addr;
 
 		if (*hostid == 0)
 			pf_status.hostid = arc4random();
 		else
 			pf_status.hostid = *hostid;
 		break;
 	}
 
 	case DIOCOSFPFLUSH:
 		pf_osfp_flush();
 		break;
 
 	case DIOCIGETIFACES: {
 		struct pfioc_iface *io = (struct pfioc_iface *)addr;
 
 		if (io->pfiio_esize != sizeof(struct pfi_kif)) {
 			error = ENODEV;
 			break;
 		}
 		error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer,
 		    &io->pfiio_size);
 		break;
 	}
 
 	case DIOCSETIFFLAG: {
 		struct pfioc_iface *io = (struct pfioc_iface *)addr;
 
 		error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
 		break;
 	}
 
 	case DIOCCLRIFFLAG: {
 		struct pfioc_iface *io = (struct pfioc_iface *)addr;
 
 		error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
 		break;
 	}
 
 	default:
 		error = ENODEV;
 		break;
 	}
 fail:
 #ifdef __FreeBSD__
 	PF_UNLOCK();
 
 	if (flags & FWRITE)
 		sx_xunlock(&pf_consistency_lock);
 	else
 		sx_sunlock(&pf_consistency_lock);
 #else
 	splx(s);
 	/* XXX: Lock order? */
 	if (flags & FWRITE)
 		rw_exit_write(&pf_consistency_lock);
 	else
 		rw_exit_read(&pf_consistency_lock);
 #endif
 	return (error);
 }
 
 #ifdef __FreeBSD__
 /*
  * XXX - Check for version missmatch!!!
  */
 static void
 pf_clear_states(void)
 {
 	struct pf_state		*state;
 
 	RB_FOREACH(state, pf_state_tree_id, &tree_id) {
 		state->timeout = PFTM_PURGE;
 #if NPFSYNC
 		/* don't send out individual delete messages */
 		state->sync_flags = PFSTATE_NOSYNC;
 #endif
 		pf_unlink_state(state);
 	}
 
 #if 0 /* NPFSYNC */
 /*
  * XXX This is called on module unload, we do not want to sync that over? */
  */
 	pfsync_clear_states(pf_status.hostid, psk->psk_ifname);
 #endif
 }
 
 static int
 pf_clear_tables(void)
 {
 	struct pfioc_table io;
 	int error;
 
 	bzero(&io, sizeof(io));
 
 	error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
 	    io.pfrio_flags);
 
 	return (error);
 }
 
 static void
 pf_clear_srcnodes(void)
 {
 	struct pf_src_node	*n;
 	struct pf_state		*state;
 
 	RB_FOREACH(state, pf_state_tree_id, &tree_id) {
 		state->src_node = NULL;
 		state->nat_src_node = NULL;
 	}
 	RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
 		n->expire = 1;
 		n->states = 0;
 	}
 }
 /*
  * XXX - Check for version missmatch!!!
  */
 
 /*
  * Duplicate pfctl -Fa operation to get rid of as much as we can.
  */
 static int
 shutdown_pf(void)
 {
 	int error = 0;
 	u_int32_t t[5];
 	char nn = '\0';
 
 	pf_status.running = 0;
 	do {
 		if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
 		    != 0) {
 			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
 			break;
 		}
 		if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
 		    != 0) {
 			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
 			break;		/* XXX: rollback? */
 		}
 		if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
 		    != 0) {
 			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
 			break;		/* XXX: rollback? */
 		}
 		if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
 		    != 0) {
 			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
 			break;		/* XXX: rollback? */
 		}
 		if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
 		    != 0) {
 			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
 			break;		/* XXX: rollback? */
 		}
 
 		/* XXX: these should always succeed here */
 		pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
 		pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
 		pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
 		pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
 		pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
 
 		if ((error = pf_clear_tables()) != 0)
 			break;
 
 #ifdef ALTQ
 		if ((error = pf_begin_altq(&t[0])) != 0) {
 			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
 			break;
 		}
 		pf_commit_altq(t[0]);
 #endif
 
 		pf_clear_states();
 
 		pf_clear_srcnodes();
 
 		/* status does not use malloced mem so no need to cleanup */
 		/* fingerprints and interfaces have thier own cleanup code */
 	} while(0);
 
         return (error);
 }
 
 static int
 pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
     struct inpcb *inp)
 {
 	/*
 	 * XXX Wed Jul 9 22:03:16 2003 UTC
 	 * OpenBSD has changed its byte ordering convention on ip_len/ip_off
 	 * in network stack. OpenBSD's network stack have converted
 	 * ip_len/ip_off to host byte order frist as FreeBSD.
 	 * Now this is not true anymore , so we should convert back to network
 	 * byte order. 
 	 */
 	struct ip *h = NULL;
 	int chk;
 
 	if ((*m)->m_pkthdr.len >= (int)sizeof(struct ip)) {
 		/* if m_pkthdr.len is less than ip header, pf will handle. */
 		h = mtod(*m, struct ip *);
 	        HTONS(h->ip_len);
 	        HTONS(h->ip_off);
 	}
 	chk = pf_test(PF_IN, ifp, m, NULL, inp);
 	if (chk && *m) {
 		m_freem(*m);
 		*m = NULL;
 	}
 	if (*m != NULL) {
 		/* pf_test can change ip header location */
 		h = mtod(*m, struct ip *);
 		NTOHS(h->ip_len);
 		NTOHS(h->ip_off);
 	}
 	return chk;
 }
 
 static int
 pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
     struct inpcb *inp)
 {
 	/*
 	 * XXX Wed Jul 9 22:03:16 2003 UTC
 	 * OpenBSD has changed its byte ordering convention on ip_len/ip_off
 	 * in network stack. OpenBSD's network stack have converted
 	 * ip_len/ip_off to host byte order frist as FreeBSD.
 	 * Now this is not true anymore , so we should convert back to network
 	 * byte order. 
 	 */
 	struct ip *h = NULL;
 	int chk;
 
 	/* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
 	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(*m);
 		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 	if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) {
 		/* if m_pkthdr.len is less than ip header, pf will handle. */
 		h = mtod(*m, struct ip *);
 	        HTONS(h->ip_len);
 	        HTONS(h->ip_off);
 	}
 	chk = pf_test(PF_OUT, ifp, m, NULL, inp);
 	if (chk && *m) {
 		m_freem(*m);
 		*m = NULL;
 	}
 	if (*m != NULL) {
 		/* pf_test can change ip header location */
 		h = mtod(*m, struct ip *);
 		NTOHS(h->ip_len);
 		NTOHS(h->ip_off);
 	}
 	return chk;
 }
 
 #ifdef INET6
 static int
 pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
     struct inpcb *inp)
 {
 	/*
 	 * IPv6 is not affected by ip_len/ip_off byte order changes.
 	 */
 	int chk;
 
 	/*
 	 * In case of loopback traffic IPv6 uses the real interface in
 	 * order to support scoped addresses. In order to support stateful
 	 * filtering we have change this to lo0 as it is the case in IPv4.
 	 */
 	chk = pf_test6(PF_IN, (*m)->m_flags & M_LOOP ? &loif[0] : ifp, m,
 	    NULL, inp);
 	if (chk && *m) {
 		m_freem(*m);
 		*m = NULL;
 	}
 	return chk;
 }
 
 static int
 pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
     struct inpcb *inp)
 {
 	/*
 	 * IPv6 does not affected ip_len/ip_off byte order changes.
 	 */
 	int chk;
 
 	/* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
 	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(*m);
 		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 	chk = pf_test6(PF_OUT, ifp, m, NULL, inp);
 	if (chk && *m) {
 		m_freem(*m);
 		*m = NULL;
 	}
 	return chk;
 }
 #endif /* INET6 */
 
 static int
 hook_pf(void)
 {
 	struct pfil_head *pfh_inet;
 #ifdef INET6
 	struct pfil_head *pfh_inet6;
 #endif
 	
 	PF_ASSERT(MA_NOTOWNED);
 
 	if (pf_pfil_hooked)
 		return (0); 
 	
 	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 	if (pfh_inet == NULL)
 		return (ESRCH); /* XXX */
 	pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
 	pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
 #ifdef INET6
 	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 	if (pfh_inet6 == NULL) {
 		pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
 		    pfh_inet);
 		pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
 		    pfh_inet);
 		return (ESRCH); /* XXX */
 	}
 	pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
 	pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
 #endif
 
 	pf_pfil_hooked = 1;
 	return (0);
 }
 
 static int
 dehook_pf(void)
 {
 	struct pfil_head *pfh_inet;
 #ifdef INET6
 	struct pfil_head *pfh_inet6;
 #endif
 
 	PF_ASSERT(MA_NOTOWNED);
 
 	if (pf_pfil_hooked == 0)
 		return (0);
 
 	pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 	if (pfh_inet == NULL)
 		return (ESRCH); /* XXX */
 	pfil_remove_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK,
 	    pfh_inet);
 	pfil_remove_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK,
 	    pfh_inet);
 #ifdef INET6
 	pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 	if (pfh_inet6 == NULL)
 		return (ESRCH); /* XXX */
 	pfil_remove_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK,
 	    pfh_inet6);
 	pfil_remove_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK,
 	    pfh_inet6);
 #endif
 
 	pf_pfil_hooked = 0;
 	return (0);
 }
 
 static int
 pf_load(void)
 {
 	init_zone_var();
 	init_pf_mutex();
 	pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME);
 	if (pfattach() < 0) {
 		destroy_dev(pf_dev);
 		destroy_pf_mutex();
 		return (ENOMEM);
 	}
 	return (0);
 }
 
 static int
 pf_unload(void)
 {
 	int error = 0;
 
 	PF_LOCK();
 	pf_status.running = 0;
 	PF_UNLOCK();
 	error = dehook_pf();
 	if (error) {
 		/*
 		 * Should not happen!
 		 * XXX Due to error code ESRCH, kldunload will show
 		 * a message like 'No such process'.
 		 */
 		printf("%s : pfil unregisteration fail\n", __FUNCTION__);
 		return error;
 	}
 	PF_LOCK();
 	shutdown_pf();
 	pf_end_threads = 1;
 	while (pf_end_threads < 2) {
 		wakeup_one(pf_purge_thread);
 		msleep(pf_purge_thread, &pf_task_mtx, 0, "pftmo", hz);
 	}
 	pfi_cleanup();
 	pf_osfp_flush();
 	pf_osfp_cleanup();
 	cleanup_pf_zone();
 	PF_UNLOCK();
 	destroy_dev(pf_dev);
 	destroy_pf_mutex();
 	return error;
 }
 
 static int
 pf_modevent(module_t mod, int type, void *data)
 {
 	int error = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		error = pf_load();
 		break;
 
 	case MOD_UNLOAD:
 		error = pf_unload();
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return error;
 }
 
 static moduledata_t pf_mod = {
 	"pf",
 	pf_modevent,
 	0
 };
 
 DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST);
 MODULE_VERSION(pf, PF_MODVER);
 #endif	/* __FreeBSD__ */
Index: head/sys/kern/init_sysent.c
===================================================================
--- head/sys/kern/init_sysent.c	(revision 178887)
+++ head/sys/kern/init_sysent.c	(revision 178888)
@@ -1,535 +1,535 @@
 /*
  * System call switch table.
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
  * created from FreeBSD: src/sys/kern/syscalls.master,v 1.242 2008/03/31 12:06:55 kib Exp 
  */
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 
 #define AS(name) (sizeof(struct name) / sizeof(register_t))
 
 #ifdef COMPAT_43
 #define compat(n, name) n, (sy_call_t *)__CONCAT(o,name)
 #else
 #define compat(n, name) 0, (sy_call_t *)nosys
 #endif
 
 #ifdef COMPAT_FREEBSD4
 #define compat4(n, name) n, (sy_call_t *)__CONCAT(freebsd4_,name)
 #else
 #define compat4(n, name) 0, (sy_call_t *)nosys
 #endif
 
 /* The casts are bogus but will do for now. */
 struct sysent sysent[] = {
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },		/* 0 = syscall */
 	{ AS(sys_exit_args), (sy_call_t *)sys_exit, AUE_EXIT, NULL, 0, 0 },	/* 1 = exit */
 	{ 0, (sy_call_t *)fork, AUE_FORK, NULL, 0, 0 },		/* 2 = fork */
 	{ AS(read_args), (sy_call_t *)read, AUE_NULL, NULL, 0, 0 },	/* 3 = read */
 	{ AS(write_args), (sy_call_t *)write, AUE_NULL, NULL, 0, 0 },	/* 4 = write */
 	{ AS(open_args), (sy_call_t *)open, AUE_OPEN_RWTC, NULL, 0, 0 },	/* 5 = open */
 	{ AS(close_args), (sy_call_t *)close, AUE_CLOSE, NULL, 0, 0 },	/* 6 = close */
 	{ AS(wait_args), (sy_call_t *)wait4, AUE_WAIT4, NULL, 0, 0 },	/* 7 = wait4 */
 	{ compat(AS(ocreat_args),creat), AUE_CREAT, NULL, 0, 0 },	/* 8 = old creat */
 	{ AS(link_args), (sy_call_t *)link, AUE_LINK, NULL, 0, 0 },	/* 9 = link */
 	{ AS(unlink_args), (sy_call_t *)unlink, AUE_UNLINK, NULL, 0, 0 },	/* 10 = unlink */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 11 = obsolete execv */
 	{ AS(chdir_args), (sy_call_t *)chdir, AUE_CHDIR, NULL, 0, 0 },	/* 12 = chdir */
 	{ AS(fchdir_args), (sy_call_t *)fchdir, AUE_FCHDIR, NULL, 0, 0 },	/* 13 = fchdir */
 	{ AS(mknod_args), (sy_call_t *)mknod, AUE_MKNOD, NULL, 0, 0 },	/* 14 = mknod */
 	{ AS(chmod_args), (sy_call_t *)chmod, AUE_CHMOD, NULL, 0, 0 },	/* 15 = chmod */
 	{ AS(chown_args), (sy_call_t *)chown, AUE_CHOWN, NULL, 0, 0 },	/* 16 = chown */
 	{ AS(obreak_args), (sy_call_t *)obreak, AUE_NULL, NULL, 0, 0 },	/* 17 = break */
 	{ compat4(AS(freebsd4_getfsstat_args),getfsstat), AUE_GETFSSTAT, NULL, 0, 0 },	/* 18 = old getfsstat */
 	{ compat(AS(olseek_args),lseek), AUE_LSEEK, NULL, 0, 0 },	/* 19 = old lseek */
 	{ 0, (sy_call_t *)getpid, AUE_GETPID, NULL, 0, 0 },		/* 20 = getpid */
 	{ AS(mount_args), (sy_call_t *)mount, AUE_MOUNT, NULL, 0, 0 },	/* 21 = mount */
 	{ AS(unmount_args), (sy_call_t *)unmount, AUE_UMOUNT, NULL, 0, 0 },	/* 22 = unmount */
 	{ AS(setuid_args), (sy_call_t *)setuid, AUE_SETUID, NULL, 0, 0 },	/* 23 = setuid */
 	{ 0, (sy_call_t *)getuid, AUE_GETUID, NULL, 0, 0 },		/* 24 = getuid */
 	{ 0, (sy_call_t *)geteuid, AUE_GETEUID, NULL, 0, 0 },		/* 25 = geteuid */
 	{ AS(ptrace_args), (sy_call_t *)ptrace, AUE_PTRACE, NULL, 0, 0 },	/* 26 = ptrace */
 	{ AS(recvmsg_args), (sy_call_t *)recvmsg, AUE_RECVMSG, NULL, 0, 0 },	/* 27 = recvmsg */
 	{ AS(sendmsg_args), (sy_call_t *)sendmsg, AUE_SENDMSG, NULL, 0, 0 },	/* 28 = sendmsg */
 	{ AS(recvfrom_args), (sy_call_t *)recvfrom, AUE_RECVFROM, NULL, 0, 0 },	/* 29 = recvfrom */
 	{ AS(accept_args), (sy_call_t *)accept, AUE_ACCEPT, NULL, 0, 0 },	/* 30 = accept */
 	{ AS(getpeername_args), (sy_call_t *)getpeername, AUE_GETPEERNAME, NULL, 0, 0 },	/* 31 = getpeername */
 	{ AS(getsockname_args), (sy_call_t *)getsockname, AUE_GETSOCKNAME, NULL, 0, 0 },	/* 32 = getsockname */
 	{ AS(access_args), (sy_call_t *)access, AUE_ACCESS, NULL, 0, 0 },	/* 33 = access */
 	{ AS(chflags_args), (sy_call_t *)chflags, AUE_CHFLAGS, NULL, 0, 0 },	/* 34 = chflags */
 	{ AS(fchflags_args), (sy_call_t *)fchflags, AUE_FCHFLAGS, NULL, 0, 0 },	/* 35 = fchflags */
 	{ 0, (sy_call_t *)sync, AUE_SYNC, NULL, 0, 0 },		/* 36 = sync */
 	{ AS(kill_args), (sy_call_t *)kill, AUE_KILL, NULL, 0, 0 },	/* 37 = kill */
 	{ compat(AS(ostat_args),stat), AUE_STAT, NULL, 0, 0 },	/* 38 = old stat */
 	{ 0, (sy_call_t *)getppid, AUE_GETPPID, NULL, 0, 0 },		/* 39 = getppid */
 	{ compat(AS(olstat_args),lstat), AUE_LSTAT, NULL, 0, 0 },	/* 40 = old lstat */
 	{ AS(dup_args), (sy_call_t *)dup, AUE_DUP, NULL, 0, 0 },	/* 41 = dup */
 	{ 0, (sy_call_t *)pipe, AUE_PIPE, NULL, 0, 0 },		/* 42 = pipe */
 	{ 0, (sy_call_t *)getegid, AUE_GETEGID, NULL, 0, 0 },		/* 43 = getegid */
 	{ AS(profil_args), (sy_call_t *)profil, AUE_PROFILE, NULL, 0, 0 },	/* 44 = profil */
 	{ AS(ktrace_args), (sy_call_t *)ktrace, AUE_KTRACE, NULL, 0, 0 },	/* 45 = ktrace */
 	{ compat(AS(osigaction_args),sigaction), AUE_SIGACTION, NULL, 0, 0 },	/* 46 = old sigaction */
 	{ 0, (sy_call_t *)getgid, AUE_GETGID, NULL, 0, 0 },		/* 47 = getgid */
 	{ compat(AS(osigprocmask_args),sigprocmask), AUE_SIGPROCMASK, NULL, 0, 0 },	/* 48 = old sigprocmask */
 	{ AS(getlogin_args), (sy_call_t *)getlogin, AUE_GETLOGIN, NULL, 0, 0 },	/* 49 = getlogin */
 	{ AS(setlogin_args), (sy_call_t *)setlogin, AUE_SETLOGIN, NULL, 0, 0 },	/* 50 = setlogin */
 	{ AS(acct_args), (sy_call_t *)acct, AUE_ACCT, NULL, 0, 0 },	/* 51 = acct */
 	{ compat(0,sigpending), AUE_SIGPENDING, NULL, 0, 0 },		/* 52 = old sigpending */
 	{ AS(sigaltstack_args), (sy_call_t *)sigaltstack, AUE_SIGALTSTACK, NULL, 0, 0 },	/* 53 = sigaltstack */
 	{ AS(ioctl_args), (sy_call_t *)ioctl, AUE_IOCTL, NULL, 0, 0 },	/* 54 = ioctl */
 	{ AS(reboot_args), (sy_call_t *)reboot, AUE_REBOOT, NULL, 0, 0 },	/* 55 = reboot */
 	{ AS(revoke_args), (sy_call_t *)revoke, AUE_REVOKE, NULL, 0, 0 },	/* 56 = revoke */
 	{ AS(symlink_args), (sy_call_t *)symlink, AUE_SYMLINK, NULL, 0, 0 },	/* 57 = symlink */
 	{ AS(readlink_args), (sy_call_t *)readlink, AUE_READLINK, NULL, 0, 0 },	/* 58 = readlink */
 	{ AS(execve_args), (sy_call_t *)execve, AUE_EXECVE, NULL, 0, 0 },	/* 59 = execve */
 	{ AS(umask_args), (sy_call_t *)umask, AUE_UMASK, NULL, 0, 0 },	/* 60 = umask */
 	{ AS(chroot_args), (sy_call_t *)chroot, AUE_CHROOT, NULL, 0, 0 },	/* 61 = chroot */
 	{ compat(AS(ofstat_args),fstat), AUE_FSTAT, NULL, 0, 0 },	/* 62 = old fstat */
 	{ compat(AS(getkerninfo_args),getkerninfo), AUE_NULL, NULL, 0, 0 },	/* 63 = old getkerninfo */
 	{ compat(0,getpagesize), AUE_NULL, NULL, 0, 0 },		/* 64 = old getpagesize */
 	{ AS(msync_args), (sy_call_t *)msync, AUE_MSYNC, NULL, 0, 0 },	/* 65 = msync */
 	{ 0, (sy_call_t *)vfork, AUE_VFORK, NULL, 0, 0 },		/* 66 = vfork */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 67 = obsolete vread */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 68 = obsolete vwrite */
 	{ AS(sbrk_args), (sy_call_t *)sbrk, AUE_SBRK, NULL, 0, 0 },	/* 69 = sbrk */
 	{ AS(sstk_args), (sy_call_t *)sstk, AUE_SSTK, NULL, 0, 0 },	/* 70 = sstk */
 	{ compat(AS(ommap_args),mmap), AUE_MMAP, NULL, 0, 0 },	/* 71 = old mmap */
 	{ AS(ovadvise_args), (sy_call_t *)ovadvise, AUE_O_VADVISE, NULL, 0, 0 },	/* 72 = vadvise */
 	{ AS(munmap_args), (sy_call_t *)munmap, AUE_MUNMAP, NULL, 0, 0 },	/* 73 = munmap */
 	{ AS(mprotect_args), (sy_call_t *)mprotect, AUE_MPROTECT, NULL, 0, 0 },	/* 74 = mprotect */
 	{ AS(madvise_args), (sy_call_t *)madvise, AUE_MADVISE, NULL, 0, 0 },	/* 75 = madvise */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 76 = obsolete vhangup */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 77 = obsolete vlimit */
 	{ AS(mincore_args), (sy_call_t *)mincore, AUE_MINCORE, NULL, 0, 0 },	/* 78 = mincore */
 	{ AS(getgroups_args), (sy_call_t *)getgroups, AUE_GETGROUPS, NULL, 0, 0 },	/* 79 = getgroups */
 	{ AS(setgroups_args), (sy_call_t *)setgroups, AUE_SETGROUPS, NULL, 0, 0 },	/* 80 = setgroups */
 	{ 0, (sy_call_t *)getpgrp, AUE_GETPGRP, NULL, 0, 0 },		/* 81 = getpgrp */
 	{ AS(setpgid_args), (sy_call_t *)setpgid, AUE_SETPGRP, NULL, 0, 0 },	/* 82 = setpgid */
 	{ AS(setitimer_args), (sy_call_t *)setitimer, AUE_SETITIMER, NULL, 0, 0 },	/* 83 = setitimer */
 	{ compat(0,wait), AUE_WAIT4, NULL, 0, 0 },			/* 84 = old wait */
 	{ AS(swapon_args), (sy_call_t *)swapon, AUE_SWAPON, NULL, 0, 0 },	/* 85 = swapon */
 	{ AS(getitimer_args), (sy_call_t *)getitimer, AUE_GETITIMER, NULL, 0, 0 },	/* 86 = getitimer */
 	{ compat(AS(gethostname_args),gethostname), AUE_SYSCTL, NULL, 0, 0 },	/* 87 = old gethostname */
 	{ compat(AS(sethostname_args),sethostname), AUE_SYSCTL, NULL, 0, 0 },	/* 88 = old sethostname */
 	{ 0, (sy_call_t *)getdtablesize, AUE_GETDTABLESIZE, NULL, 0, 0 },	/* 89 = getdtablesize */
 	{ AS(dup2_args), (sy_call_t *)dup2, AUE_DUP2, NULL, 0, 0 },	/* 90 = dup2 */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 91 = getdopt */
 	{ AS(fcntl_args), (sy_call_t *)fcntl, AUE_FCNTL, NULL, 0, 0 },	/* 92 = fcntl */
 	{ AS(select_args), (sy_call_t *)select, AUE_SELECT, NULL, 0, 0 },	/* 93 = select */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 94 = setdopt */
 	{ AS(fsync_args), (sy_call_t *)fsync, AUE_FSYNC, NULL, 0, 0 },	/* 95 = fsync */
 	{ AS(setpriority_args), (sy_call_t *)setpriority, AUE_SETPRIORITY, NULL, 0, 0 },	/* 96 = setpriority */
 	{ AS(socket_args), (sy_call_t *)socket, AUE_SOCKET, NULL, 0, 0 },	/* 97 = socket */
 	{ AS(connect_args), (sy_call_t *)connect, AUE_CONNECT, NULL, 0, 0 },	/* 98 = connect */
 	{ compat(AS(accept_args),accept), AUE_ACCEPT, NULL, 0, 0 },	/* 99 = old accept */
 	{ AS(getpriority_args), (sy_call_t *)getpriority, AUE_GETPRIORITY, NULL, 0, 0 },	/* 100 = getpriority */
 	{ compat(AS(osend_args),send), AUE_SEND, NULL, 0, 0 },	/* 101 = old send */
 	{ compat(AS(orecv_args),recv), AUE_RECV, NULL, 0, 0 },	/* 102 = old recv */
 	{ compat(AS(osigreturn_args),sigreturn), AUE_SIGRETURN, NULL, 0, 0 },	/* 103 = old sigreturn */
 	{ AS(bind_args), (sy_call_t *)bind, AUE_BIND, NULL, 0, 0 },	/* 104 = bind */
 	{ AS(setsockopt_args), (sy_call_t *)setsockopt, AUE_SETSOCKOPT, NULL, 0, 0 },	/* 105 = setsockopt */
 	{ AS(listen_args), (sy_call_t *)listen, AUE_LISTEN, NULL, 0, 0 },	/* 106 = listen */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 107 = obsolete vtimes */
 	{ compat(AS(osigvec_args),sigvec), AUE_NULL, NULL, 0, 0 },	/* 108 = old sigvec */
 	{ compat(AS(osigblock_args),sigblock), AUE_NULL, NULL, 0, 0 },	/* 109 = old sigblock */
 	{ compat(AS(osigsetmask_args),sigsetmask), AUE_NULL, NULL, 0, 0 },	/* 110 = old sigsetmask */
 	{ compat(AS(osigsuspend_args),sigsuspend), AUE_NULL, NULL, 0, 0 },	/* 111 = old sigsuspend */
 	{ compat(AS(osigstack_args),sigstack), AUE_NULL, NULL, 0, 0 },	/* 112 = old sigstack */
 	{ compat(AS(orecvmsg_args),recvmsg), AUE_RECVMSG, NULL, 0, 0 },	/* 113 = old recvmsg */
 	{ compat(AS(osendmsg_args),sendmsg), AUE_SENDMSG, NULL, 0, 0 },	/* 114 = old sendmsg */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 115 = obsolete vtrace */
 	{ AS(gettimeofday_args), (sy_call_t *)gettimeofday, AUE_GETTIMEOFDAY, NULL, 0, 0 },	/* 116 = gettimeofday */
 	{ AS(getrusage_args), (sy_call_t *)getrusage, AUE_GETRUSAGE, NULL, 0, 0 },	/* 117 = getrusage */
 	{ AS(getsockopt_args), (sy_call_t *)getsockopt, AUE_GETSOCKOPT, NULL, 0, 0 },	/* 118 = getsockopt */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 119 = resuba */
 	{ AS(readv_args), (sy_call_t *)readv, AUE_READV, NULL, 0, 0 },	/* 120 = readv */
 	{ AS(writev_args), (sy_call_t *)writev, AUE_WRITEV, NULL, 0, 0 },	/* 121 = writev */
 	{ AS(settimeofday_args), (sy_call_t *)settimeofday, AUE_SETTIMEOFDAY, NULL, 0, 0 },	/* 122 = settimeofday */
 	{ AS(fchown_args), (sy_call_t *)fchown, AUE_FCHOWN, NULL, 0, 0 },	/* 123 = fchown */
 	{ AS(fchmod_args), (sy_call_t *)fchmod, AUE_FCHMOD, NULL, 0, 0 },	/* 124 = fchmod */
 	{ compat(AS(recvfrom_args),recvfrom), AUE_RECVFROM, NULL, 0, 0 },	/* 125 = old recvfrom */
 	{ AS(setreuid_args), (sy_call_t *)setreuid, AUE_SETREUID, NULL, 0, 0 },	/* 126 = setreuid */
 	{ AS(setregid_args), (sy_call_t *)setregid, AUE_SETREGID, NULL, 0, 0 },	/* 127 = setregid */
 	{ AS(rename_args), (sy_call_t *)rename, AUE_RENAME, NULL, 0, 0 },	/* 128 = rename */
 	{ compat(AS(otruncate_args),truncate), AUE_TRUNCATE, NULL, 0, 0 },	/* 129 = old truncate */
 	{ compat(AS(oftruncate_args),ftruncate), AUE_FTRUNCATE, NULL, 0, 0 },	/* 130 = old ftruncate */
 	{ AS(flock_args), (sy_call_t *)flock, AUE_FLOCK, NULL, 0, 0 },	/* 131 = flock */
 	{ AS(mkfifo_args), (sy_call_t *)mkfifo, AUE_MKFIFO, NULL, 0, 0 },	/* 132 = mkfifo */
 	{ AS(sendto_args), (sy_call_t *)sendto, AUE_SENDTO, NULL, 0, 0 },	/* 133 = sendto */
 	{ AS(shutdown_args), (sy_call_t *)shutdown, AUE_SHUTDOWN, NULL, 0, 0 },	/* 134 = shutdown */
 	{ AS(socketpair_args), (sy_call_t *)socketpair, AUE_SOCKETPAIR, NULL, 0, 0 },	/* 135 = socketpair */
 	{ AS(mkdir_args), (sy_call_t *)mkdir, AUE_MKDIR, NULL, 0, 0 },	/* 136 = mkdir */
 	{ AS(rmdir_args), (sy_call_t *)rmdir, AUE_RMDIR, NULL, 0, 0 },	/* 137 = rmdir */
 	{ AS(utimes_args), (sy_call_t *)utimes, AUE_UTIMES, NULL, 0, 0 },	/* 138 = utimes */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 139 = obsolete 4.2 sigreturn */
 	{ AS(adjtime_args), (sy_call_t *)adjtime, AUE_ADJTIME, NULL, 0, 0 },	/* 140 = adjtime */
 	{ compat(AS(ogetpeername_args),getpeername), AUE_GETPEERNAME, NULL, 0, 0 },	/* 141 = old getpeername */
 	{ compat(0,gethostid), AUE_SYSCTL, NULL, 0, 0 },		/* 142 = old gethostid */
 	{ compat(AS(osethostid_args),sethostid), AUE_SYSCTL, NULL, 0, 0 },	/* 143 = old sethostid */
 	{ compat(AS(ogetrlimit_args),getrlimit), AUE_GETRLIMIT, NULL, 0, 0 },	/* 144 = old getrlimit */
 	{ compat(AS(osetrlimit_args),setrlimit), AUE_SETRLIMIT, NULL, 0, 0 },	/* 145 = old setrlimit */
 	{ compat(AS(okillpg_args),killpg), AUE_KILLPG, NULL, 0, 0 },	/* 146 = old killpg */
 	{ 0, (sy_call_t *)setsid, AUE_SETSID, NULL, 0, 0 },		/* 147 = setsid */
 	{ AS(quotactl_args), (sy_call_t *)quotactl, AUE_QUOTACTL, NULL, 0, 0 },	/* 148 = quotactl */
 	{ compat(0,quota), AUE_O_QUOTA, NULL, 0, 0 },			/* 149 = old quota */
 	{ compat(AS(getsockname_args),getsockname), AUE_GETSOCKNAME, NULL, 0, 0 },	/* 150 = old getsockname */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 151 = sem_lock */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 152 = sem_wakeup */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 153 = asyncdaemon */
 	{ AS(nlm_syscall_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 154 = nlm_syscall */
 	{ AS(nfssvc_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 155 = nfssvc */
 	{ compat(AS(ogetdirentries_args),getdirentries), AUE_GETDIRENTRIES, NULL, 0, 0 },	/* 156 = old getdirentries */
 	{ compat4(AS(freebsd4_statfs_args),statfs), AUE_STATFS, NULL, 0, 0 },	/* 157 = old statfs */
 	{ compat4(AS(freebsd4_fstatfs_args),fstatfs), AUE_FSTATFS, NULL, 0, 0 },	/* 158 = old fstatfs */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 159 = nosys */
 	{ AS(lgetfh_args), (sy_call_t *)lgetfh, AUE_LGETFH, NULL, 0, 0 },	/* 160 = lgetfh */
 	{ AS(getfh_args), (sy_call_t *)getfh, AUE_NFS_GETFH, NULL, 0, 0 },	/* 161 = getfh */
 	{ AS(getdomainname_args), (sy_call_t *)getdomainname, AUE_SYSCTL, NULL, 0, 0 },	/* 162 = getdomainname */
 	{ AS(setdomainname_args), (sy_call_t *)setdomainname, AUE_SYSCTL, NULL, 0, 0 },	/* 163 = setdomainname */
 	{ AS(uname_args), (sy_call_t *)uname, AUE_NULL, NULL, 0, 0 },	/* 164 = uname */
 	{ AS(sysarch_args), (sy_call_t *)sysarch, AUE_SYSARCH, NULL, 0, 0 },	/* 165 = sysarch */
 	{ AS(rtprio_args), (sy_call_t *)rtprio, AUE_RTPRIO, NULL, 0, 0 },	/* 166 = rtprio */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 167 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 168 = nosys */
 	{ AS(semsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 169 = semsys */
 	{ AS(msgsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 170 = msgsys */
 	{ AS(shmsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 171 = shmsys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 172 = nosys */
 	{ AS(freebsd6_pread_args), (sy_call_t *)freebsd6_pread, AUE_PREAD, NULL, 0, 0 },	/* 173 = freebsd6_pread */
 	{ AS(freebsd6_pwrite_args), (sy_call_t *)freebsd6_pwrite, AUE_PWRITE, NULL, 0, 0 },	/* 174 = freebsd6_pwrite */
-	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 175 = nosys */
+	{ AS(setfib_args), (sy_call_t *)setfib, AUE_NULL, NULL, 0, 0 },	/* 175 = setfib */
 	{ AS(ntp_adjtime_args), (sy_call_t *)ntp_adjtime, AUE_NTP_ADJTIME, NULL, 0, 0 },	/* 176 = ntp_adjtime */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 177 = sfork */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 178 = getdescriptor */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 179 = setdescriptor */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 180 = nosys */
 	{ AS(setgid_args), (sy_call_t *)setgid, AUE_SETGID, NULL, 0, 0 },	/* 181 = setgid */
 	{ AS(setegid_args), (sy_call_t *)setegid, AUE_SETEGID, NULL, 0, 0 },	/* 182 = setegid */
 	{ AS(seteuid_args), (sy_call_t *)seteuid, AUE_SETEUID, NULL, 0, 0 },	/* 183 = seteuid */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 184 = lfs_bmapv */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 185 = lfs_markv */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 186 = lfs_segclean */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 187 = lfs_segwait */
 	{ AS(stat_args), (sy_call_t *)stat, AUE_STAT, NULL, 0, 0 },	/* 188 = stat */
 	{ AS(fstat_args), (sy_call_t *)fstat, AUE_FSTAT, NULL, 0, 0 },	/* 189 = fstat */
 	{ AS(lstat_args), (sy_call_t *)lstat, AUE_LSTAT, NULL, 0, 0 },	/* 190 = lstat */
 	{ AS(pathconf_args), (sy_call_t *)pathconf, AUE_PATHCONF, NULL, 0, 0 },	/* 191 = pathconf */
 	{ AS(fpathconf_args), (sy_call_t *)fpathconf, AUE_FPATHCONF, NULL, 0, 0 },	/* 192 = fpathconf */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 193 = nosys */
 	{ AS(__getrlimit_args), (sy_call_t *)getrlimit, AUE_GETRLIMIT, NULL, 0, 0 },	/* 194 = getrlimit */
 	{ AS(__setrlimit_args), (sy_call_t *)setrlimit, AUE_SETRLIMIT, NULL, 0, 0 },	/* 195 = setrlimit */
 	{ AS(getdirentries_args), (sy_call_t *)getdirentries, AUE_GETDIRENTRIES, NULL, 0, 0 },	/* 196 = getdirentries */
 	{ AS(freebsd6_mmap_args), (sy_call_t *)freebsd6_mmap, AUE_MMAP, NULL, 0, 0 },	/* 197 = freebsd6_mmap */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },		/* 198 = __syscall */
 	{ AS(freebsd6_lseek_args), (sy_call_t *)freebsd6_lseek, AUE_LSEEK, NULL, 0, 0 },	/* 199 = freebsd6_lseek */
 	{ AS(freebsd6_truncate_args), (sy_call_t *)freebsd6_truncate, AUE_TRUNCATE, NULL, 0, 0 },	/* 200 = freebsd6_truncate */
 	{ AS(freebsd6_ftruncate_args), (sy_call_t *)freebsd6_ftruncate, AUE_FTRUNCATE, NULL, 0, 0 },	/* 201 = freebsd6_ftruncate */
 	{ AS(sysctl_args), (sy_call_t *)__sysctl, AUE_SYSCTL, NULL, 0, 0 },	/* 202 = __sysctl */
 	{ AS(mlock_args), (sy_call_t *)mlock, AUE_MLOCK, NULL, 0, 0 },	/* 203 = mlock */
 	{ AS(munlock_args), (sy_call_t *)munlock, AUE_MUNLOCK, NULL, 0, 0 },	/* 204 = munlock */
 	{ AS(undelete_args), (sy_call_t *)undelete, AUE_UNDELETE, NULL, 0, 0 },	/* 205 = undelete */
 	{ AS(futimes_args), (sy_call_t *)futimes, AUE_FUTIMES, NULL, 0, 0 },	/* 206 = futimes */
 	{ AS(getpgid_args), (sy_call_t *)getpgid, AUE_GETPGID, NULL, 0, 0 },	/* 207 = getpgid */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 208 = newreboot */
 	{ AS(poll_args), (sy_call_t *)poll, AUE_POLL, NULL, 0, 0 },	/* 209 = poll */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 210 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 211 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 212 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 213 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 214 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 215 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 216 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 217 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 218 = lkmnosys */
 	{ AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0 },	/* 219 = lkmnosys */
 	{ AS(__semctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 220 = __semctl */
 	{ AS(semget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 221 = semget */
 	{ AS(semop_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 222 = semop */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 223 = semconfig */
 	{ AS(msgctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 224 = msgctl */
 	{ AS(msgget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 225 = msgget */
 	{ AS(msgsnd_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 226 = msgsnd */
 	{ AS(msgrcv_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 227 = msgrcv */
 	{ AS(shmat_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 228 = shmat */
 	{ AS(shmctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 229 = shmctl */
 	{ AS(shmdt_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 230 = shmdt */
 	{ AS(shmget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 231 = shmget */
 	{ AS(clock_gettime_args), (sy_call_t *)clock_gettime, AUE_NULL, NULL, 0, 0 },	/* 232 = clock_gettime */
 	{ AS(clock_settime_args), (sy_call_t *)clock_settime, AUE_CLOCK_SETTIME, NULL, 0, 0 },	/* 233 = clock_settime */
 	{ AS(clock_getres_args), (sy_call_t *)clock_getres, AUE_NULL, NULL, 0, 0 },	/* 234 = clock_getres */
 	{ AS(ktimer_create_args), (sy_call_t *)ktimer_create, AUE_NULL, NULL, 0, 0 },	/* 235 = ktimer_create */
 	{ AS(ktimer_delete_args), (sy_call_t *)ktimer_delete, AUE_NULL, NULL, 0, 0 },	/* 236 = ktimer_delete */
 	{ AS(ktimer_settime_args), (sy_call_t *)ktimer_settime, AUE_NULL, NULL, 0, 0 },	/* 237 = ktimer_settime */
 	{ AS(ktimer_gettime_args), (sy_call_t *)ktimer_gettime, AUE_NULL, NULL, 0, 0 },	/* 238 = ktimer_gettime */
 	{ AS(ktimer_getoverrun_args), (sy_call_t *)ktimer_getoverrun, AUE_NULL, NULL, 0, 0 },	/* 239 = ktimer_getoverrun */
 	{ AS(nanosleep_args), (sy_call_t *)nanosleep, AUE_NULL, NULL, 0, 0 },	/* 240 = nanosleep */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 241 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 242 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 243 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 244 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 245 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 246 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 247 = nosys */
 	{ AS(ntp_gettime_args), (sy_call_t *)ntp_gettime, AUE_NULL, NULL, 0, 0 },	/* 248 = ntp_gettime */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 249 = nosys */
 	{ AS(minherit_args), (sy_call_t *)minherit, AUE_MINHERIT, NULL, 0, 0 },	/* 250 = minherit */
 	{ AS(rfork_args), (sy_call_t *)rfork, AUE_RFORK, NULL, 0, 0 },	/* 251 = rfork */
 	{ AS(openbsd_poll_args), (sy_call_t *)openbsd_poll, AUE_POLL, NULL, 0, 0 },	/* 252 = openbsd_poll */
 	{ 0, (sy_call_t *)issetugid, AUE_ISSETUGID, NULL, 0, 0 },	/* 253 = issetugid */
 	{ AS(lchown_args), (sy_call_t *)lchown, AUE_LCHOWN, NULL, 0, 0 },	/* 254 = lchown */
 	{ AS(aio_read_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 255 = aio_read */
 	{ AS(aio_write_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 256 = aio_write */
 	{ AS(lio_listio_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 257 = lio_listio */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 258 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 259 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 260 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 261 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 262 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 263 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 264 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 265 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 266 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 267 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 268 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 269 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 270 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 271 = nosys */
 	{ AS(getdents_args), (sy_call_t *)getdents, AUE_O_GETDENTS, NULL, 0, 0 },	/* 272 = getdents */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 273 = nosys */
 	{ AS(lchmod_args), (sy_call_t *)lchmod, AUE_LCHMOD, NULL, 0, 0 },	/* 274 = lchmod */
 	{ AS(lchown_args), (sy_call_t *)lchown, AUE_LCHOWN, NULL, 0, 0 },	/* 275 = netbsd_lchown */
 	{ AS(lutimes_args), (sy_call_t *)lutimes, AUE_LUTIMES, NULL, 0, 0 },	/* 276 = lutimes */
 	{ AS(msync_args), (sy_call_t *)msync, AUE_MSYNC, NULL, 0, 0 },	/* 277 = netbsd_msync */
 	{ AS(nstat_args), (sy_call_t *)nstat, AUE_STAT, NULL, 0, 0 },	/* 278 = nstat */
 	{ AS(nfstat_args), (sy_call_t *)nfstat, AUE_FSTAT, NULL, 0, 0 },	/* 279 = nfstat */
 	{ AS(nlstat_args), (sy_call_t *)nlstat, AUE_LSTAT, NULL, 0, 0 },	/* 280 = nlstat */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 281 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 282 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 283 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 284 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 285 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 286 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 287 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 288 = nosys */
 	{ AS(preadv_args), (sy_call_t *)preadv, AUE_PREADV, NULL, 0, 0 },	/* 289 = preadv */
 	{ AS(pwritev_args), (sy_call_t *)pwritev, AUE_PWRITEV, NULL, 0, 0 },	/* 290 = pwritev */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 291 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 292 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 293 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 294 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 295 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 296 = nosys */
 	{ compat4(AS(freebsd4_fhstatfs_args),fhstatfs), AUE_FHSTATFS, NULL, 0, 0 },	/* 297 = old fhstatfs */
 	{ AS(fhopen_args), (sy_call_t *)fhopen, AUE_FHOPEN, NULL, 0, 0 },	/* 298 = fhopen */
 	{ AS(fhstat_args), (sy_call_t *)fhstat, AUE_FHSTAT, NULL, 0, 0 },	/* 299 = fhstat */
 	{ AS(modnext_args), (sy_call_t *)modnext, AUE_NULL, NULL, 0, 0 },	/* 300 = modnext */
 	{ AS(modstat_args), (sy_call_t *)modstat, AUE_NULL, NULL, 0, 0 },	/* 301 = modstat */
 	{ AS(modfnext_args), (sy_call_t *)modfnext, AUE_NULL, NULL, 0, 0 },	/* 302 = modfnext */
 	{ AS(modfind_args), (sy_call_t *)modfind, AUE_NULL, NULL, 0, 0 },	/* 303 = modfind */
 	{ AS(kldload_args), (sy_call_t *)kldload, AUE_MODLOAD, NULL, 0, 0 },	/* 304 = kldload */
 	{ AS(kldunload_args), (sy_call_t *)kldunload, AUE_MODUNLOAD, NULL, 0, 0 },	/* 305 = kldunload */
 	{ AS(kldfind_args), (sy_call_t *)kldfind, AUE_NULL, NULL, 0, 0 },	/* 306 = kldfind */
 	{ AS(kldnext_args), (sy_call_t *)kldnext, AUE_NULL, NULL, 0, 0 },	/* 307 = kldnext */
 	{ AS(kldstat_args), (sy_call_t *)kldstat, AUE_NULL, NULL, 0, 0 },	/* 308 = kldstat */
 	{ AS(kldfirstmod_args), (sy_call_t *)kldfirstmod, AUE_NULL, NULL, 0, 0 },	/* 309 = kldfirstmod */
 	{ AS(getsid_args), (sy_call_t *)getsid, AUE_GETSID, NULL, 0, 0 },	/* 310 = getsid */
 	{ AS(setresuid_args), (sy_call_t *)setresuid, AUE_SETRESUID, NULL, 0, 0 },	/* 311 = setresuid */
 	{ AS(setresgid_args), (sy_call_t *)setresgid, AUE_SETRESGID, NULL, 0, 0 },	/* 312 = setresgid */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 313 = obsolete signanosleep */
 	{ AS(aio_return_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 314 = aio_return */
 	{ AS(aio_suspend_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 315 = aio_suspend */
 	{ AS(aio_cancel_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 316 = aio_cancel */
 	{ AS(aio_error_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 317 = aio_error */
 	{ AS(oaio_read_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 318 = oaio_read */
 	{ AS(oaio_write_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 319 = oaio_write */
 	{ AS(olio_listio_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 320 = olio_listio */
 	{ 0, (sy_call_t *)yield, AUE_NULL, NULL, 0, 0 },		/* 321 = yield */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 322 = obsolete thr_sleep */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 323 = obsolete thr_wakeup */
 	{ AS(mlockall_args), (sy_call_t *)mlockall, AUE_MLOCKALL, NULL, 0, 0 },	/* 324 = mlockall */
 	{ 0, (sy_call_t *)munlockall, AUE_MUNLOCKALL, NULL, 0, 0 },	/* 325 = munlockall */
 	{ AS(__getcwd_args), (sy_call_t *)__getcwd, AUE_GETCWD, NULL, 0, 0 },	/* 326 = __getcwd */
 	{ AS(sched_setparam_args), (sy_call_t *)sched_setparam, AUE_NULL, NULL, 0, 0 },	/* 327 = sched_setparam */
 	{ AS(sched_getparam_args), (sy_call_t *)sched_getparam, AUE_NULL, NULL, 0, 0 },	/* 328 = sched_getparam */
 	{ AS(sched_setscheduler_args), (sy_call_t *)sched_setscheduler, AUE_NULL, NULL, 0, 0 },	/* 329 = sched_setscheduler */
 	{ AS(sched_getscheduler_args), (sy_call_t *)sched_getscheduler, AUE_NULL, NULL, 0, 0 },	/* 330 = sched_getscheduler */
 	{ 0, (sy_call_t *)sched_yield, AUE_NULL, NULL, 0, 0 },	/* 331 = sched_yield */
 	{ AS(sched_get_priority_max_args), (sy_call_t *)sched_get_priority_max, AUE_NULL, NULL, 0, 0 },	/* 332 = sched_get_priority_max */
 	{ AS(sched_get_priority_min_args), (sy_call_t *)sched_get_priority_min, AUE_NULL, NULL, 0, 0 },	/* 333 = sched_get_priority_min */
 	{ AS(sched_rr_get_interval_args), (sy_call_t *)sched_rr_get_interval, AUE_NULL, NULL, 0, 0 },	/* 334 = sched_rr_get_interval */
 	{ AS(utrace_args), (sy_call_t *)utrace, AUE_NULL, NULL, 0, 0 },	/* 335 = utrace */
 	{ compat4(AS(freebsd4_sendfile_args),sendfile), AUE_SENDFILE, NULL, 0, 0 },	/* 336 = old sendfile */
 	{ AS(kldsym_args), (sy_call_t *)kldsym, AUE_NULL, NULL, 0, 0 },	/* 337 = kldsym */
 	{ AS(jail_args), (sy_call_t *)jail, AUE_JAIL, NULL, 0, 0 },	/* 338 = jail */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 339 = pioctl */
 	{ AS(sigprocmask_args), (sy_call_t *)sigprocmask, AUE_SIGPROCMASK, NULL, 0, 0 },	/* 340 = sigprocmask */
 	{ AS(sigsuspend_args), (sy_call_t *)sigsuspend, AUE_SIGSUSPEND, NULL, 0, 0 },	/* 341 = sigsuspend */
 	{ compat4(AS(freebsd4_sigaction_args),sigaction), AUE_SIGACTION, NULL, 0, 0 },	/* 342 = old sigaction */
 	{ AS(sigpending_args), (sy_call_t *)sigpending, AUE_SIGPENDING, NULL, 0, 0 },	/* 343 = sigpending */
 	{ compat4(AS(freebsd4_sigreturn_args),sigreturn), AUE_SIGRETURN, NULL, 0, 0 },	/* 344 = old sigreturn */
 	{ AS(sigtimedwait_args), (sy_call_t *)sigtimedwait, AUE_SIGWAIT, NULL, 0, 0 },	/* 345 = sigtimedwait */
 	{ AS(sigwaitinfo_args), (sy_call_t *)sigwaitinfo, AUE_NULL, NULL, 0, 0 },	/* 346 = sigwaitinfo */
 	{ AS(__acl_get_file_args), (sy_call_t *)__acl_get_file, AUE_NULL, NULL, 0, 0 },	/* 347 = __acl_get_file */
 	{ AS(__acl_set_file_args), (sy_call_t *)__acl_set_file, AUE_NULL, NULL, 0, 0 },	/* 348 = __acl_set_file */
 	{ AS(__acl_get_fd_args), (sy_call_t *)__acl_get_fd, AUE_NULL, NULL, 0, 0 },	/* 349 = __acl_get_fd */
 	{ AS(__acl_set_fd_args), (sy_call_t *)__acl_set_fd, AUE_NULL, NULL, 0, 0 },	/* 350 = __acl_set_fd */
 	{ AS(__acl_delete_file_args), (sy_call_t *)__acl_delete_file, AUE_NULL, NULL, 0, 0 },	/* 351 = __acl_delete_file */
 	{ AS(__acl_delete_fd_args), (sy_call_t *)__acl_delete_fd, AUE_NULL, NULL, 0, 0 },	/* 352 = __acl_delete_fd */
 	{ AS(__acl_aclcheck_file_args), (sy_call_t *)__acl_aclcheck_file, AUE_NULL, NULL, 0, 0 },	/* 353 = __acl_aclcheck_file */
 	{ AS(__acl_aclcheck_fd_args), (sy_call_t *)__acl_aclcheck_fd, AUE_NULL, NULL, 0, 0 },	/* 354 = __acl_aclcheck_fd */
 	{ AS(extattrctl_args), (sy_call_t *)extattrctl, AUE_EXTATTRCTL, NULL, 0, 0 },	/* 355 = extattrctl */
 	{ AS(extattr_set_file_args), (sy_call_t *)extattr_set_file, AUE_EXTATTR_SET_FILE, NULL, 0, 0 },	/* 356 = extattr_set_file */
 	{ AS(extattr_get_file_args), (sy_call_t *)extattr_get_file, AUE_EXTATTR_GET_FILE, NULL, 0, 0 },	/* 357 = extattr_get_file */
 	{ AS(extattr_delete_file_args), (sy_call_t *)extattr_delete_file, AUE_EXTATTR_DELETE_FILE, NULL, 0, 0 },	/* 358 = extattr_delete_file */
 	{ AS(aio_waitcomplete_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 359 = aio_waitcomplete */
 	{ AS(getresuid_args), (sy_call_t *)getresuid, AUE_GETRESUID, NULL, 0, 0 },	/* 360 = getresuid */
 	{ AS(getresgid_args), (sy_call_t *)getresgid, AUE_GETRESGID, NULL, 0, 0 },	/* 361 = getresgid */
 	{ 0, (sy_call_t *)kqueue, AUE_KQUEUE, NULL, 0, 0 },		/* 362 = kqueue */
 	{ AS(kevent_args), (sy_call_t *)kevent, AUE_NULL, NULL, 0, 0 },	/* 363 = kevent */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 364 = __cap_get_proc */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 365 = __cap_set_proc */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 366 = __cap_get_fd */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 367 = __cap_get_file */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 368 = __cap_set_fd */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 369 = __cap_set_file */
 	{ AS(nosys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 370 = lkmressys */
 	{ AS(extattr_set_fd_args), (sy_call_t *)extattr_set_fd, AUE_EXTATTR_SET_FD, NULL, 0, 0 },	/* 371 = extattr_set_fd */
 	{ AS(extattr_get_fd_args), (sy_call_t *)extattr_get_fd, AUE_EXTATTR_GET_FD, NULL, 0, 0 },	/* 372 = extattr_get_fd */
 	{ AS(extattr_delete_fd_args), (sy_call_t *)extattr_delete_fd, AUE_EXTATTR_DELETE_FD, NULL, 0, 0 },	/* 373 = extattr_delete_fd */
 	{ AS(__setugid_args), (sy_call_t *)__setugid, AUE_NULL, NULL, 0, 0 },	/* 374 = __setugid */
 	{ AS(nfsclnt_args), (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },	/* 375 = nfsclnt */
 	{ AS(eaccess_args), (sy_call_t *)eaccess, AUE_EACCESS, NULL, 0, 0 },	/* 376 = eaccess */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 377 = afs_syscall */
 	{ AS(nmount_args), (sy_call_t *)nmount, AUE_NMOUNT, NULL, 0, 0 },	/* 378 = nmount */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 379 = kse_exit */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 380 = kse_wakeup */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 381 = kse_create */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 382 = kse_thr_interrupt */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 383 = kse_release */
 	{ AS(__mac_get_proc_args), (sy_call_t *)__mac_get_proc, AUE_NULL, NULL, 0, 0 },	/* 384 = __mac_get_proc */
 	{ AS(__mac_set_proc_args), (sy_call_t *)__mac_set_proc, AUE_NULL, NULL, 0, 0 },	/* 385 = __mac_set_proc */
 	{ AS(__mac_get_fd_args), (sy_call_t *)__mac_get_fd, AUE_NULL, NULL, 0, 0 },	/* 386 = __mac_get_fd */
 	{ AS(__mac_get_file_args), (sy_call_t *)__mac_get_file, AUE_NULL, NULL, 0, 0 },	/* 387 = __mac_get_file */
 	{ AS(__mac_set_fd_args), (sy_call_t *)__mac_set_fd, AUE_NULL, NULL, 0, 0 },	/* 388 = __mac_set_fd */
 	{ AS(__mac_set_file_args), (sy_call_t *)__mac_set_file, AUE_NULL, NULL, 0, 0 },	/* 389 = __mac_set_file */
 	{ AS(kenv_args), (sy_call_t *)kenv, AUE_NULL, NULL, 0, 0 },	/* 390 = kenv */
 	{ AS(lchflags_args), (sy_call_t *)lchflags, AUE_LCHFLAGS, NULL, 0, 0 },	/* 391 = lchflags */
 	{ AS(uuidgen_args), (sy_call_t *)uuidgen, AUE_NULL, NULL, 0, 0 },	/* 392 = uuidgen */
 	{ AS(sendfile_args), (sy_call_t *)sendfile, AUE_SENDFILE, NULL, 0, 0 },	/* 393 = sendfile */
 	{ AS(mac_syscall_args), (sy_call_t *)mac_syscall, AUE_NULL, NULL, 0, 0 },	/* 394 = mac_syscall */
 	{ AS(getfsstat_args), (sy_call_t *)getfsstat, AUE_GETFSSTAT, NULL, 0, 0 },	/* 395 = getfsstat */
 	{ AS(statfs_args), (sy_call_t *)statfs, AUE_STATFS, NULL, 0, 0 },	/* 396 = statfs */
 	{ AS(fstatfs_args), (sy_call_t *)fstatfs, AUE_FSTATFS, NULL, 0, 0 },	/* 397 = fstatfs */
 	{ AS(fhstatfs_args), (sy_call_t *)fhstatfs, AUE_FHSTATFS, NULL, 0, 0 },	/* 398 = fhstatfs */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 399 = nosys */
 	{ AS(ksem_close_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 400 = ksem_close */
 	{ AS(ksem_post_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 401 = ksem_post */
 	{ AS(ksem_wait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 402 = ksem_wait */
 	{ AS(ksem_trywait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 403 = ksem_trywait */
 	{ AS(ksem_init_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 404 = ksem_init */
 	{ AS(ksem_open_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 405 = ksem_open */
 	{ AS(ksem_unlink_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 406 = ksem_unlink */
 	{ AS(ksem_getvalue_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 407 = ksem_getvalue */
 	{ AS(ksem_destroy_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 408 = ksem_destroy */
 	{ AS(__mac_get_pid_args), (sy_call_t *)__mac_get_pid, AUE_NULL, NULL, 0, 0 },	/* 409 = __mac_get_pid */
 	{ AS(__mac_get_link_args), (sy_call_t *)__mac_get_link, AUE_NULL, NULL, 0, 0 },	/* 410 = __mac_get_link */
 	{ AS(__mac_set_link_args), (sy_call_t *)__mac_set_link, AUE_NULL, NULL, 0, 0 },	/* 411 = __mac_set_link */
 	{ AS(extattr_set_link_args), (sy_call_t *)extattr_set_link, AUE_EXTATTR_SET_LINK, NULL, 0, 0 },	/* 412 = extattr_set_link */
 	{ AS(extattr_get_link_args), (sy_call_t *)extattr_get_link, AUE_EXTATTR_GET_LINK, NULL, 0, 0 },	/* 413 = extattr_get_link */
 	{ AS(extattr_delete_link_args), (sy_call_t *)extattr_delete_link, AUE_EXTATTR_DELETE_LINK, NULL, 0, 0 },	/* 414 = extattr_delete_link */
 	{ AS(__mac_execve_args), (sy_call_t *)__mac_execve, AUE_NULL, NULL, 0, 0 },	/* 415 = __mac_execve */
 	{ AS(sigaction_args), (sy_call_t *)sigaction, AUE_SIGACTION, NULL, 0, 0 },	/* 416 = sigaction */
 	{ AS(sigreturn_args), (sy_call_t *)sigreturn, AUE_SIGRETURN, NULL, 0, 0 },	/* 417 = sigreturn */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 418 = __xstat */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 419 = __xfstat */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 420 = __xlstat */
 	{ AS(getcontext_args), (sy_call_t *)getcontext, AUE_NULL, NULL, 0, 0 },	/* 421 = getcontext */
 	{ AS(setcontext_args), (sy_call_t *)setcontext, AUE_NULL, NULL, 0, 0 },	/* 422 = setcontext */
 	{ AS(swapcontext_args), (sy_call_t *)swapcontext, AUE_NULL, NULL, 0, 0 },	/* 423 = swapcontext */
 	{ AS(swapoff_args), (sy_call_t *)swapoff, AUE_SWAPOFF, NULL, 0, 0 },	/* 424 = swapoff */
 	{ AS(__acl_get_link_args), (sy_call_t *)__acl_get_link, AUE_NULL, NULL, 0, 0 },	/* 425 = __acl_get_link */
 	{ AS(__acl_set_link_args), (sy_call_t *)__acl_set_link, AUE_NULL, NULL, 0, 0 },	/* 426 = __acl_set_link */
 	{ AS(__acl_delete_link_args), (sy_call_t *)__acl_delete_link, AUE_NULL, NULL, 0, 0 },	/* 427 = __acl_delete_link */
 	{ AS(__acl_aclcheck_link_args), (sy_call_t *)__acl_aclcheck_link, AUE_NULL, NULL, 0, 0 },	/* 428 = __acl_aclcheck_link */
 	{ AS(sigwait_args), (sy_call_t *)sigwait, AUE_SIGWAIT, NULL, 0, 0 },	/* 429 = sigwait */
 	{ AS(thr_create_args), (sy_call_t *)thr_create, AUE_NULL, NULL, 0, 0 },	/* 430 = thr_create */
 	{ AS(thr_exit_args), (sy_call_t *)thr_exit, AUE_NULL, NULL, 0, 0 },	/* 431 = thr_exit */
 	{ AS(thr_self_args), (sy_call_t *)thr_self, AUE_NULL, NULL, 0, 0 },	/* 432 = thr_self */
 	{ AS(thr_kill_args), (sy_call_t *)thr_kill, AUE_NULL, NULL, 0, 0 },	/* 433 = thr_kill */
 	{ AS(_umtx_lock_args), (sy_call_t *)_umtx_lock, AUE_NULL, NULL, 0, 0 },	/* 434 = _umtx_lock */
 	{ AS(_umtx_unlock_args), (sy_call_t *)_umtx_unlock, AUE_NULL, NULL, 0, 0 },	/* 435 = _umtx_unlock */
 	{ AS(jail_attach_args), (sy_call_t *)jail_attach, AUE_NULL, NULL, 0, 0 },	/* 436 = jail_attach */
 	{ AS(extattr_list_fd_args), (sy_call_t *)extattr_list_fd, AUE_EXTATTR_LIST_FD, NULL, 0, 0 },	/* 437 = extattr_list_fd */
 	{ AS(extattr_list_file_args), (sy_call_t *)extattr_list_file, AUE_EXTATTR_LIST_FILE, NULL, 0, 0 },	/* 438 = extattr_list_file */
 	{ AS(extattr_list_link_args), (sy_call_t *)extattr_list_link, AUE_EXTATTR_LIST_LINK, NULL, 0, 0 },	/* 439 = extattr_list_link */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 440 = kse_switchin */
 	{ AS(ksem_timedwait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 441 = ksem_timedwait */
 	{ AS(thr_suspend_args), (sy_call_t *)thr_suspend, AUE_NULL, NULL, 0, 0 },	/* 442 = thr_suspend */
 	{ AS(thr_wake_args), (sy_call_t *)thr_wake, AUE_NULL, NULL, 0, 0 },	/* 443 = thr_wake */
 	{ AS(kldunloadf_args), (sy_call_t *)kldunloadf, AUE_MODUNLOAD, NULL, 0, 0 },	/* 444 = kldunloadf */
 	{ AS(audit_args), (sy_call_t *)audit, AUE_AUDIT, NULL, 0, 0 },	/* 445 = audit */
 	{ AS(auditon_args), (sy_call_t *)auditon, AUE_AUDITON, NULL, 0, 0 },	/* 446 = auditon */
 	{ AS(getauid_args), (sy_call_t *)getauid, AUE_GETAUID, NULL, 0, 0 },	/* 447 = getauid */
 	{ AS(setauid_args), (sy_call_t *)setauid, AUE_SETAUID, NULL, 0, 0 },	/* 448 = setauid */
 	{ AS(getaudit_args), (sy_call_t *)getaudit, AUE_GETAUDIT, NULL, 0, 0 },	/* 449 = getaudit */
 	{ AS(setaudit_args), (sy_call_t *)setaudit, AUE_SETAUDIT, NULL, 0, 0 },	/* 450 = setaudit */
 	{ AS(getaudit_addr_args), (sy_call_t *)getaudit_addr, AUE_GETAUDIT_ADDR, NULL, 0, 0 },	/* 451 = getaudit_addr */
 	{ AS(setaudit_addr_args), (sy_call_t *)setaudit_addr, AUE_SETAUDIT_ADDR, NULL, 0, 0 },	/* 452 = setaudit_addr */
 	{ AS(auditctl_args), (sy_call_t *)auditctl, AUE_AUDITCTL, NULL, 0, 0 },	/* 453 = auditctl */
 	{ AS(_umtx_op_args), (sy_call_t *)_umtx_op, AUE_NULL, NULL, 0, 0 },	/* 454 = _umtx_op */
 	{ AS(thr_new_args), (sy_call_t *)thr_new, AUE_NULL, NULL, 0, 0 },	/* 455 = thr_new */
 	{ AS(sigqueue_args), (sy_call_t *)sigqueue, AUE_NULL, NULL, 0, 0 },	/* 456 = sigqueue */
 	{ AS(kmq_open_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 457 = kmq_open */
 	{ AS(kmq_setattr_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 458 = kmq_setattr */
 	{ AS(kmq_timedreceive_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 459 = kmq_timedreceive */
 	{ AS(kmq_timedsend_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 460 = kmq_timedsend */
 	{ AS(kmq_notify_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 461 = kmq_notify */
 	{ AS(kmq_unlink_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 462 = kmq_unlink */
 	{ AS(abort2_args), (sy_call_t *)abort2, AUE_NULL, NULL, 0, 0 },	/* 463 = abort2 */
 	{ AS(thr_set_name_args), (sy_call_t *)thr_set_name, AUE_NULL, NULL, 0, 0 },	/* 464 = thr_set_name */
 	{ AS(aio_fsync_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0 },	/* 465 = aio_fsync */
 	{ AS(rtprio_thread_args), (sy_call_t *)rtprio_thread, AUE_RTPRIO, NULL, 0, 0 },	/* 466 = rtprio_thread */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 467 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 468 = nosys */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 469 = __getpath_fromfd */
 	{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0 },			/* 470 = __getpath_fromaddr */
 	{ AS(sctp_peeloff_args), (sy_call_t *)sctp_peeloff, AUE_NULL, NULL, 0, 0 },	/* 471 = sctp_peeloff */
 	{ AS(sctp_generic_sendmsg_args), (sy_call_t *)sctp_generic_sendmsg, AUE_NULL, NULL, 0, 0 },	/* 472 = sctp_generic_sendmsg */
 	{ AS(sctp_generic_sendmsg_iov_args), (sy_call_t *)sctp_generic_sendmsg_iov, AUE_NULL, NULL, 0, 0 },	/* 473 = sctp_generic_sendmsg_iov */
 	{ AS(sctp_generic_recvmsg_args), (sy_call_t *)sctp_generic_recvmsg, AUE_NULL, NULL, 0, 0 },	/* 474 = sctp_generic_recvmsg */
 	{ AS(pread_args), (sy_call_t *)pread, AUE_PREAD, NULL, 0, 0 },	/* 475 = pread */
 	{ AS(pwrite_args), (sy_call_t *)pwrite, AUE_PWRITE, NULL, 0, 0 },	/* 476 = pwrite */
 	{ AS(mmap_args), (sy_call_t *)mmap, AUE_MMAP, NULL, 0, 0 },	/* 477 = mmap */
 	{ AS(lseek_args), (sy_call_t *)lseek, AUE_LSEEK, NULL, 0, 0 },	/* 478 = lseek */
 	{ AS(truncate_args), (sy_call_t *)truncate, AUE_TRUNCATE, NULL, 0, 0 },	/* 479 = truncate */
 	{ AS(ftruncate_args), (sy_call_t *)ftruncate, AUE_FTRUNCATE, NULL, 0, 0 },	/* 480 = ftruncate */
 	{ AS(thr_kill2_args), (sy_call_t *)thr_kill2, AUE_KILL, NULL, 0, 0 },	/* 481 = thr_kill2 */
 	{ AS(shm_open_args), (sy_call_t *)shm_open, AUE_SHMOPEN, NULL, 0, 0 },	/* 482 = shm_open */
 	{ AS(shm_unlink_args), (sy_call_t *)shm_unlink, AUE_SHMUNLINK, NULL, 0, 0 },	/* 483 = shm_unlink */
 	{ AS(cpuset_args), (sy_call_t *)cpuset, AUE_NULL, NULL, 0, 0 },	/* 484 = cpuset */
 	{ AS(cpuset_setid_args), (sy_call_t *)cpuset_setid, AUE_NULL, NULL, 0, 0 },	/* 485 = cpuset_setid */
 	{ AS(cpuset_getid_args), (sy_call_t *)cpuset_getid, AUE_NULL, NULL, 0, 0 },	/* 486 = cpuset_getid */
 	{ AS(cpuset_getaffinity_args), (sy_call_t *)cpuset_getaffinity, AUE_NULL, NULL, 0, 0 },	/* 487 = cpuset_getaffinity */
 	{ AS(cpuset_setaffinity_args), (sy_call_t *)cpuset_setaffinity, AUE_NULL, NULL, 0, 0 },	/* 488 = cpuset_setaffinity */
 	{ AS(faccessat_args), (sy_call_t *)faccessat, AUE_FACCESSAT, NULL, 0, 0 },	/* 489 = faccessat */
 	{ AS(fchmodat_args), (sy_call_t *)fchmodat, AUE_FCHMODAT, NULL, 0, 0 },	/* 490 = fchmodat */
 	{ AS(fchownat_args), (sy_call_t *)fchownat, AUE_FCHOWNAT, NULL, 0, 0 },	/* 491 = fchownat */
 	{ AS(fexecve_args), (sy_call_t *)fexecve, AUE_FEXECVE, NULL, 0, 0 },	/* 492 = fexecve */
 	{ AS(fstatat_args), (sy_call_t *)fstatat, AUE_FSTATAT, NULL, 0, 0 },	/* 493 = fstatat */
 	{ AS(futimesat_args), (sy_call_t *)futimesat, AUE_FUTIMESAT, NULL, 0, 0 },	/* 494 = futimesat */
 	{ AS(linkat_args), (sy_call_t *)linkat, AUE_LINKAT, NULL, 0, 0 },	/* 495 = linkat */
 	{ AS(mkdirat_args), (sy_call_t *)mkdirat, AUE_MKDIRAT, NULL, 0, 0 },	/* 496 = mkdirat */
 	{ AS(mkfifoat_args), (sy_call_t *)mkfifoat, AUE_MKFIFOAT, NULL, 0, 0 },	/* 497 = mkfifoat */
 	{ AS(mknodat_args), (sy_call_t *)mknodat, AUE_MKNODAT, NULL, 0, 0 },	/* 498 = mknodat */
 	{ AS(openat_args), (sy_call_t *)openat, AUE_OPENAT_RWTC, NULL, 0, 0 },	/* 499 = openat */
 	{ AS(readlinkat_args), (sy_call_t *)readlinkat, AUE_READLINKAT, NULL, 0, 0 },	/* 500 = readlinkat */
 	{ AS(renameat_args), (sy_call_t *)renameat, AUE_RENAMEAT, NULL, 0, 0 },	/* 501 = renameat */
 	{ AS(symlinkat_args), (sy_call_t *)symlinkat, AUE_SYMLINKAT, NULL, 0, 0 },	/* 502 = symlinkat */
 	{ AS(unlinkat_args), (sy_call_t *)unlinkat, AUE_UNLINKAT, NULL, 0, 0 },	/* 503 = unlinkat */
 };
Index: head/sys/kern/sys_socket.c
===================================================================
--- head/sys/kern/sys_socket.c	(revision 178887)
+++ head/sys/kern/sys_socket.c	(revision 178888)
@@ -1,285 +1,285 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sys_socket.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/filio.h>			/* XXX */
 #include <sys/sockio.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <security/mac/mac_framework.h>
 
 struct fileops	socketops = {
 	.fo_read = soo_read,
 	.fo_write = soo_write,
 	.fo_truncate = soo_truncate,
 	.fo_ioctl = soo_ioctl,
 	.fo_poll = soo_poll,
 	.fo_kqfilter = soo_kqfilter,
 	.fo_stat = soo_stat,
 	.fo_close = soo_close,
 	.fo_flags = DFLAG_PASSABLE
 };
 
 /* ARGSUSED */
 int
 soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct socket *so = fp->f_data;
 #ifdef MAC
 	int error;
 
 	SOCK_LOCK(so);
 	error = mac_socket_check_receive(active_cred, so);
 	SOCK_UNLOCK(so);
 	if (error)
 		return (error);
 #endif
 	return (soreceive(so, 0, uio, 0, 0, 0));
 }
 
 /* ARGSUSED */
 int
 soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error;
 
 #ifdef MAC
 	SOCK_LOCK(so);
 	error = mac_socket_check_send(active_cred, so);
 	SOCK_UNLOCK(so);
 	if (error)
 		return (error);
 #endif
 	error = sosend(so, 0, uio, 0, 0, 0, uio->uio_td);
 	if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
 		PROC_LOCK(uio->uio_td->td_proc);
 		psignal(uio->uio_td->td_proc, SIGPIPE);
 		PROC_UNLOCK(uio->uio_td->td_proc);
 	}
 	return (error);
 }
 
 int
 soo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
     struct thread *td)
 {
 
 	return (EINVAL);
 }
 
 int
 soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error = 0;
 
 	switch (cmd) {
 	case FIONBIO:
 		SOCK_LOCK(so);
 		if (*(int *)data)
 			so->so_state |= SS_NBIO;
 		else
 			so->so_state &= ~SS_NBIO;
 		SOCK_UNLOCK(so);
 		break;
 
 	case FIOASYNC:
 		/*
 		 * XXXRW: This code separately acquires SOCK_LOCK(so) and
 		 * SOCKBUF_LOCK(&so->so_rcv) even though they are the same
 		 * mutex to avoid introducing the assumption that they are
 		 * the same.
 		 */
 		if (*(int *)data) {
 			SOCK_LOCK(so);
 			so->so_state |= SS_ASYNC;
 			SOCK_UNLOCK(so);
 			SOCKBUF_LOCK(&so->so_rcv);
 			so->so_rcv.sb_flags |= SB_ASYNC;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			SOCKBUF_LOCK(&so->so_snd);
 			so->so_snd.sb_flags |= SB_ASYNC;
 			SOCKBUF_UNLOCK(&so->so_snd);
 		} else {
 			SOCK_LOCK(so);
 			so->so_state &= ~SS_ASYNC;
 			SOCK_UNLOCK(so);
 			SOCKBUF_LOCK(&so->so_rcv);
 			so->so_rcv.sb_flags &= ~SB_ASYNC;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			SOCKBUF_LOCK(&so->so_snd);
 			so->so_snd.sb_flags &= ~SB_ASYNC;
 			SOCKBUF_UNLOCK(&so->so_snd);
 		}
 		break;
 
 	case FIONREAD:
 		/* Unlocked read. */
 		*(int *)data = so->so_rcv.sb_cc;
 		break;
 
 	case FIOSETOWN:
 		error = fsetown(*(int *)data, &so->so_sigio);
 		break;
 
 	case FIOGETOWN:
 		*(int *)data = fgetown(&so->so_sigio);
 		break;
 
 	case SIOCSPGRP:
 		error = fsetown(-(*(int *)data), &so->so_sigio);
 		break;
 
 	case SIOCGPGRP:
 		*(int *)data = -fgetown(&so->so_sigio);
 		break;
 
 	case SIOCATMARK:
 		/* Unlocked read. */
 		*(int *)data = (so->so_rcv.sb_state & SBS_RCVATMARK) != 0;
 		break;
 	default:
 		/*
 		 * Interface/routing/protocol specific ioctls: interface and
 		 * routing ioctls should have a different entry since a
 		 * socket is unnecessary.
 		 */
 		if (IOCGROUP(cmd) == 'i')
 			error = ifioctl(so, cmd, data, td);
 		else if (IOCGROUP(cmd) == 'r')
-			error = rtioctl(cmd, data);
+			error = rtioctl_fib(cmd, data, so->so_fibnum);
 		else
 			error = ((*so->so_proto->pr_usrreqs->pru_control)
 			    (so, cmd, data, 0, td));
 		break;
 	}
 	return (error);
 }
 
 int
 soo_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 #ifdef MAC
 	int error;
 
 	SOCK_LOCK(so);
 	error = mac_socket_check_poll(active_cred, so);
 	SOCK_UNLOCK(so);
 	if (error)
 		return (error);
 #endif
 	return (sopoll(so, events, fp->f_cred, td));
 }
 
 int
 soo_stat(struct file *fp, struct stat *ub, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 #ifdef MAC
 	int error;
 #endif
 
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFSOCK;
 #ifdef MAC
 	SOCK_LOCK(so);
 	error = mac_socket_check_stat(active_cred, so);
 	SOCK_UNLOCK(so);
 	if (error)
 		return (error);
 #endif
 	/*
 	 * If SBS_CANTRCVMORE is set, but there's still data left in the
 	 * receive buffer, the socket is still readable.
 	 *
 	 * XXXRW: perhaps should lock socket buffer so st_size result is
 	 * consistent.
 	 */
 	/* Unlocked read. */
 	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 ||
 	    so->so_rcv.sb_cc != 0)
 		ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH;
 	if ((so->so_snd.sb_state & SBS_CANTSENDMORE) == 0)
 		ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
 	ub->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 	ub->st_uid = so->so_cred->cr_uid;
 	ub->st_gid = so->so_cred->cr_gid;
 	return (*so->so_proto->pr_usrreqs->pru_sense)(so, ub);
 }
 
 /*
  * API socket close on file pointer.  We call soclose() to close the socket
  * (including initiating closing protocols).  soclose() will sorele() the
  * file reference but the actual socket will not go away until the socket's
  * ref count hits 0.
  */
 /* ARGSUSED */
 int
 soo_close(struct file *fp, struct thread *td)
 {
 	int error = 0;
 	struct socket *so;
 
 	so = fp->f_data;
 	fp->f_ops = &badfileops;
 	fp->f_data = NULL;
 
 	if (so)
 		error = soclose(so);
 	return (error);
 }
Index: head/sys/kern/syscalls.c
===================================================================
--- head/sys/kern/syscalls.c	(revision 178887)
+++ head/sys/kern/syscalls.c	(revision 178888)
@@ -1,514 +1,514 @@
 /*
  * System call names.
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
  * created from FreeBSD: src/sys/kern/syscalls.master,v 1.242 2008/03/31 12:06:55 kib Exp 
  */
 
 const char *syscallnames[] = {
 	"syscall",			/* 0 = syscall */
 	"exit",			/* 1 = exit */
 	"fork",			/* 2 = fork */
 	"read",			/* 3 = read */
 	"write",			/* 4 = write */
 	"open",			/* 5 = open */
 	"close",			/* 6 = close */
 	"wait4",			/* 7 = wait4 */
 	"compat.creat",		/* 8 = old creat */
 	"link",			/* 9 = link */
 	"unlink",			/* 10 = unlink */
 	"obs_execv",			/* 11 = obsolete execv */
 	"chdir",			/* 12 = chdir */
 	"fchdir",			/* 13 = fchdir */
 	"mknod",			/* 14 = mknod */
 	"chmod",			/* 15 = chmod */
 	"chown",			/* 16 = chown */
 	"break",			/* 17 = break */
 	"compat4.getfsstat",		/* 18 = old getfsstat */
 	"compat.lseek",		/* 19 = old lseek */
 	"getpid",			/* 20 = getpid */
 	"mount",			/* 21 = mount */
 	"unmount",			/* 22 = unmount */
 	"setuid",			/* 23 = setuid */
 	"getuid",			/* 24 = getuid */
 	"geteuid",			/* 25 = geteuid */
 	"ptrace",			/* 26 = ptrace */
 	"recvmsg",			/* 27 = recvmsg */
 	"sendmsg",			/* 28 = sendmsg */
 	"recvfrom",			/* 29 = recvfrom */
 	"accept",			/* 30 = accept */
 	"getpeername",			/* 31 = getpeername */
 	"getsockname",			/* 32 = getsockname */
 	"access",			/* 33 = access */
 	"chflags",			/* 34 = chflags */
 	"fchflags",			/* 35 = fchflags */
 	"sync",			/* 36 = sync */
 	"kill",			/* 37 = kill */
 	"compat.stat",		/* 38 = old stat */
 	"getppid",			/* 39 = getppid */
 	"compat.lstat",		/* 40 = old lstat */
 	"dup",			/* 41 = dup */
 	"pipe",			/* 42 = pipe */
 	"getegid",			/* 43 = getegid */
 	"profil",			/* 44 = profil */
 	"ktrace",			/* 45 = ktrace */
 	"compat.sigaction",		/* 46 = old sigaction */
 	"getgid",			/* 47 = getgid */
 	"compat.sigprocmask",		/* 48 = old sigprocmask */
 	"getlogin",			/* 49 = getlogin */
 	"setlogin",			/* 50 = setlogin */
 	"acct",			/* 51 = acct */
 	"compat.sigpending",		/* 52 = old sigpending */
 	"sigaltstack",			/* 53 = sigaltstack */
 	"ioctl",			/* 54 = ioctl */
 	"reboot",			/* 55 = reboot */
 	"revoke",			/* 56 = revoke */
 	"symlink",			/* 57 = symlink */
 	"readlink",			/* 58 = readlink */
 	"execve",			/* 59 = execve */
 	"umask",			/* 60 = umask */
 	"chroot",			/* 61 = chroot */
 	"compat.fstat",		/* 62 = old fstat */
 	"compat.getkerninfo",		/* 63 = old getkerninfo */
 	"compat.getpagesize",		/* 64 = old getpagesize */
 	"msync",			/* 65 = msync */
 	"vfork",			/* 66 = vfork */
 	"obs_vread",			/* 67 = obsolete vread */
 	"obs_vwrite",			/* 68 = obsolete vwrite */
 	"sbrk",			/* 69 = sbrk */
 	"sstk",			/* 70 = sstk */
 	"compat.mmap",		/* 71 = old mmap */
 	"vadvise",			/* 72 = vadvise */
 	"munmap",			/* 73 = munmap */
 	"mprotect",			/* 74 = mprotect */
 	"madvise",			/* 75 = madvise */
 	"obs_vhangup",			/* 76 = obsolete vhangup */
 	"obs_vlimit",			/* 77 = obsolete vlimit */
 	"mincore",			/* 78 = mincore */
 	"getgroups",			/* 79 = getgroups */
 	"setgroups",			/* 80 = setgroups */
 	"getpgrp",			/* 81 = getpgrp */
 	"setpgid",			/* 82 = setpgid */
 	"setitimer",			/* 83 = setitimer */
 	"compat.wait",		/* 84 = old wait */
 	"swapon",			/* 85 = swapon */
 	"getitimer",			/* 86 = getitimer */
 	"compat.gethostname",		/* 87 = old gethostname */
 	"compat.sethostname",		/* 88 = old sethostname */
 	"getdtablesize",			/* 89 = getdtablesize */
 	"dup2",			/* 90 = dup2 */
 	"#91",			/* 91 = getdopt */
 	"fcntl",			/* 92 = fcntl */
 	"select",			/* 93 = select */
 	"#94",			/* 94 = setdopt */
 	"fsync",			/* 95 = fsync */
 	"setpriority",			/* 96 = setpriority */
 	"socket",			/* 97 = socket */
 	"connect",			/* 98 = connect */
 	"compat.accept",		/* 99 = old accept */
 	"getpriority",			/* 100 = getpriority */
 	"compat.send",		/* 101 = old send */
 	"compat.recv",		/* 102 = old recv */
 	"compat.sigreturn",		/* 103 = old sigreturn */
 	"bind",			/* 104 = bind */
 	"setsockopt",			/* 105 = setsockopt */
 	"listen",			/* 106 = listen */
 	"obs_vtimes",			/* 107 = obsolete vtimes */
 	"compat.sigvec",		/* 108 = old sigvec */
 	"compat.sigblock",		/* 109 = old sigblock */
 	"compat.sigsetmask",		/* 110 = old sigsetmask */
 	"compat.sigsuspend",		/* 111 = old sigsuspend */
 	"compat.sigstack",		/* 112 = old sigstack */
 	"compat.recvmsg",		/* 113 = old recvmsg */
 	"compat.sendmsg",		/* 114 = old sendmsg */
 	"obs_vtrace",			/* 115 = obsolete vtrace */
 	"gettimeofday",			/* 116 = gettimeofday */
 	"getrusage",			/* 117 = getrusage */
 	"getsockopt",			/* 118 = getsockopt */
 	"#119",			/* 119 = resuba */
 	"readv",			/* 120 = readv */
 	"writev",			/* 121 = writev */
 	"settimeofday",			/* 122 = settimeofday */
 	"fchown",			/* 123 = fchown */
 	"fchmod",			/* 124 = fchmod */
 	"compat.recvfrom",		/* 125 = old recvfrom */
 	"setreuid",			/* 126 = setreuid */
 	"setregid",			/* 127 = setregid */
 	"rename",			/* 128 = rename */
 	"compat.truncate",		/* 129 = old truncate */
 	"compat.ftruncate",		/* 130 = old ftruncate */
 	"flock",			/* 131 = flock */
 	"mkfifo",			/* 132 = mkfifo */
 	"sendto",			/* 133 = sendto */
 	"shutdown",			/* 134 = shutdown */
 	"socketpair",			/* 135 = socketpair */
 	"mkdir",			/* 136 = mkdir */
 	"rmdir",			/* 137 = rmdir */
 	"utimes",			/* 138 = utimes */
 	"obs_4.2",			/* 139 = obsolete 4.2 sigreturn */
 	"adjtime",			/* 140 = adjtime */
 	"compat.getpeername",		/* 141 = old getpeername */
 	"compat.gethostid",		/* 142 = old gethostid */
 	"compat.sethostid",		/* 143 = old sethostid */
 	"compat.getrlimit",		/* 144 = old getrlimit */
 	"compat.setrlimit",		/* 145 = old setrlimit */
 	"compat.killpg",		/* 146 = old killpg */
 	"setsid",			/* 147 = setsid */
 	"quotactl",			/* 148 = quotactl */
 	"compat.quota",		/* 149 = old quota */
 	"compat.getsockname",		/* 150 = old getsockname */
 	"#151",			/* 151 = sem_lock */
 	"#152",			/* 152 = sem_wakeup */
 	"#153",			/* 153 = asyncdaemon */
 	"nlm_syscall",			/* 154 = nlm_syscall */
 	"nfssvc",			/* 155 = nfssvc */
 	"compat.getdirentries",		/* 156 = old getdirentries */
 	"compat4.statfs",		/* 157 = old statfs */
 	"compat4.fstatfs",		/* 158 = old fstatfs */
 	"#159",			/* 159 = nosys */
 	"lgetfh",			/* 160 = lgetfh */
 	"getfh",			/* 161 = getfh */
 	"getdomainname",			/* 162 = getdomainname */
 	"setdomainname",			/* 163 = setdomainname */
 	"uname",			/* 164 = uname */
 	"sysarch",			/* 165 = sysarch */
 	"rtprio",			/* 166 = rtprio */
 	"#167",			/* 167 = nosys */
 	"#168",			/* 168 = nosys */
 	"semsys",			/* 169 = semsys */
 	"msgsys",			/* 170 = msgsys */
 	"shmsys",			/* 171 = shmsys */
 	"#172",			/* 172 = nosys */
 	"freebsd6_pread",			/* 173 = freebsd6_pread */
 	"freebsd6_pwrite",			/* 174 = freebsd6_pwrite */
-	"#175",			/* 175 = nosys */
+	"setfib",			/* 175 = setfib */
 	"ntp_adjtime",			/* 176 = ntp_adjtime */
 	"#177",			/* 177 = sfork */
 	"#178",			/* 178 = getdescriptor */
 	"#179",			/* 179 = setdescriptor */
 	"#180",			/* 180 = nosys */
 	"setgid",			/* 181 = setgid */
 	"setegid",			/* 182 = setegid */
 	"seteuid",			/* 183 = seteuid */
 	"#184",			/* 184 = lfs_bmapv */
 	"#185",			/* 185 = lfs_markv */
 	"#186",			/* 186 = lfs_segclean */
 	"#187",			/* 187 = lfs_segwait */
 	"stat",			/* 188 = stat */
 	"fstat",			/* 189 = fstat */
 	"lstat",			/* 190 = lstat */
 	"pathconf",			/* 191 = pathconf */
 	"fpathconf",			/* 192 = fpathconf */
 	"#193",			/* 193 = nosys */
 	"getrlimit",			/* 194 = getrlimit */
 	"setrlimit",			/* 195 = setrlimit */
 	"getdirentries",			/* 196 = getdirentries */
 	"freebsd6_mmap",			/* 197 = freebsd6_mmap */
 	"__syscall",			/* 198 = __syscall */
 	"freebsd6_lseek",			/* 199 = freebsd6_lseek */
 	"freebsd6_truncate",			/* 200 = freebsd6_truncate */
 	"freebsd6_ftruncate",			/* 201 = freebsd6_ftruncate */
 	"__sysctl",			/* 202 = __sysctl */
 	"mlock",			/* 203 = mlock */
 	"munlock",			/* 204 = munlock */
 	"undelete",			/* 205 = undelete */
 	"futimes",			/* 206 = futimes */
 	"getpgid",			/* 207 = getpgid */
 	"#208",			/* 208 = newreboot */
 	"poll",			/* 209 = poll */
 	"lkmnosys",			/* 210 = lkmnosys */
 	"lkmnosys",			/* 211 = lkmnosys */
 	"lkmnosys",			/* 212 = lkmnosys */
 	"lkmnosys",			/* 213 = lkmnosys */
 	"lkmnosys",			/* 214 = lkmnosys */
 	"lkmnosys",			/* 215 = lkmnosys */
 	"lkmnosys",			/* 216 = lkmnosys */
 	"lkmnosys",			/* 217 = lkmnosys */
 	"lkmnosys",			/* 218 = lkmnosys */
 	"lkmnosys",			/* 219 = lkmnosys */
 	"__semctl",			/* 220 = __semctl */
 	"semget",			/* 221 = semget */
 	"semop",			/* 222 = semop */
 	"#223",			/* 223 = semconfig */
 	"msgctl",			/* 224 = msgctl */
 	"msgget",			/* 225 = msgget */
 	"msgsnd",			/* 226 = msgsnd */
 	"msgrcv",			/* 227 = msgrcv */
 	"shmat",			/* 228 = shmat */
 	"shmctl",			/* 229 = shmctl */
 	"shmdt",			/* 230 = shmdt */
 	"shmget",			/* 231 = shmget */
 	"clock_gettime",			/* 232 = clock_gettime */
 	"clock_settime",			/* 233 = clock_settime */
 	"clock_getres",			/* 234 = clock_getres */
 	"ktimer_create",			/* 235 = ktimer_create */
 	"ktimer_delete",			/* 236 = ktimer_delete */
 	"ktimer_settime",			/* 237 = ktimer_settime */
 	"ktimer_gettime",			/* 238 = ktimer_gettime */
 	"ktimer_getoverrun",			/* 239 = ktimer_getoverrun */
 	"nanosleep",			/* 240 = nanosleep */
 	"#241",			/* 241 = nosys */
 	"#242",			/* 242 = nosys */
 	"#243",			/* 243 = nosys */
 	"#244",			/* 244 = nosys */
 	"#245",			/* 245 = nosys */
 	"#246",			/* 246 = nosys */
 	"#247",			/* 247 = nosys */
 	"ntp_gettime",			/* 248 = ntp_gettime */
 	"#249",			/* 249 = nosys */
 	"minherit",			/* 250 = minherit */
 	"rfork",			/* 251 = rfork */
 	"openbsd_poll",			/* 252 = openbsd_poll */
 	"issetugid",			/* 253 = issetugid */
 	"lchown",			/* 254 = lchown */
 	"aio_read",			/* 255 = aio_read */
 	"aio_write",			/* 256 = aio_write */
 	"lio_listio",			/* 257 = lio_listio */
 	"#258",			/* 258 = nosys */
 	"#259",			/* 259 = nosys */
 	"#260",			/* 260 = nosys */
 	"#261",			/* 261 = nosys */
 	"#262",			/* 262 = nosys */
 	"#263",			/* 263 = nosys */
 	"#264",			/* 264 = nosys */
 	"#265",			/* 265 = nosys */
 	"#266",			/* 266 = nosys */
 	"#267",			/* 267 = nosys */
 	"#268",			/* 268 = nosys */
 	"#269",			/* 269 = nosys */
 	"#270",			/* 270 = nosys */
 	"#271",			/* 271 = nosys */
 	"getdents",			/* 272 = getdents */
 	"#273",			/* 273 = nosys */
 	"lchmod",			/* 274 = lchmod */
 	"netbsd_lchown",			/* 275 = netbsd_lchown */
 	"lutimes",			/* 276 = lutimes */
 	"netbsd_msync",			/* 277 = netbsd_msync */
 	"nstat",			/* 278 = nstat */
 	"nfstat",			/* 279 = nfstat */
 	"nlstat",			/* 280 = nlstat */
 	"#281",			/* 281 = nosys */
 	"#282",			/* 282 = nosys */
 	"#283",			/* 283 = nosys */
 	"#284",			/* 284 = nosys */
 	"#285",			/* 285 = nosys */
 	"#286",			/* 286 = nosys */
 	"#287",			/* 287 = nosys */
 	"#288",			/* 288 = nosys */
 	"preadv",			/* 289 = preadv */
 	"pwritev",			/* 290 = pwritev */
 	"#291",			/* 291 = nosys */
 	"#292",			/* 292 = nosys */
 	"#293",			/* 293 = nosys */
 	"#294",			/* 294 = nosys */
 	"#295",			/* 295 = nosys */
 	"#296",			/* 296 = nosys */
 	"compat4.fhstatfs",		/* 297 = old fhstatfs */
 	"fhopen",			/* 298 = fhopen */
 	"fhstat",			/* 299 = fhstat */
 	"modnext",			/* 300 = modnext */
 	"modstat",			/* 301 = modstat */
 	"modfnext",			/* 302 = modfnext */
 	"modfind",			/* 303 = modfind */
 	"kldload",			/* 304 = kldload */
 	"kldunload",			/* 305 = kldunload */
 	"kldfind",			/* 306 = kldfind */
 	"kldnext",			/* 307 = kldnext */
 	"kldstat",			/* 308 = kldstat */
 	"kldfirstmod",			/* 309 = kldfirstmod */
 	"getsid",			/* 310 = getsid */
 	"setresuid",			/* 311 = setresuid */
 	"setresgid",			/* 312 = setresgid */
 	"obs_signanosleep",			/* 313 = obsolete signanosleep */
 	"aio_return",			/* 314 = aio_return */
 	"aio_suspend",			/* 315 = aio_suspend */
 	"aio_cancel",			/* 316 = aio_cancel */
 	"aio_error",			/* 317 = aio_error */
 	"oaio_read",			/* 318 = oaio_read */
 	"oaio_write",			/* 319 = oaio_write */
 	"olio_listio",			/* 320 = olio_listio */
 	"yield",			/* 321 = yield */
 	"obs_thr_sleep",			/* 322 = obsolete thr_sleep */
 	"obs_thr_wakeup",			/* 323 = obsolete thr_wakeup */
 	"mlockall",			/* 324 = mlockall */
 	"munlockall",			/* 325 = munlockall */
 	"__getcwd",			/* 326 = __getcwd */
 	"sched_setparam",			/* 327 = sched_setparam */
 	"sched_getparam",			/* 328 = sched_getparam */
 	"sched_setscheduler",			/* 329 = sched_setscheduler */
 	"sched_getscheduler",			/* 330 = sched_getscheduler */
 	"sched_yield",			/* 331 = sched_yield */
 	"sched_get_priority_max",			/* 332 = sched_get_priority_max */
 	"sched_get_priority_min",			/* 333 = sched_get_priority_min */
 	"sched_rr_get_interval",			/* 334 = sched_rr_get_interval */
 	"utrace",			/* 335 = utrace */
 	"compat4.sendfile",		/* 336 = old sendfile */
 	"kldsym",			/* 337 = kldsym */
 	"jail",			/* 338 = jail */
 	"#339",			/* 339 = pioctl */
 	"sigprocmask",			/* 340 = sigprocmask */
 	"sigsuspend",			/* 341 = sigsuspend */
 	"compat4.sigaction",		/* 342 = old sigaction */
 	"sigpending",			/* 343 = sigpending */
 	"compat4.sigreturn",		/* 344 = old sigreturn */
 	"sigtimedwait",			/* 345 = sigtimedwait */
 	"sigwaitinfo",			/* 346 = sigwaitinfo */
 	"__acl_get_file",			/* 347 = __acl_get_file */
 	"__acl_set_file",			/* 348 = __acl_set_file */
 	"__acl_get_fd",			/* 349 = __acl_get_fd */
 	"__acl_set_fd",			/* 350 = __acl_set_fd */
 	"__acl_delete_file",			/* 351 = __acl_delete_file */
 	"__acl_delete_fd",			/* 352 = __acl_delete_fd */
 	"__acl_aclcheck_file",			/* 353 = __acl_aclcheck_file */
 	"__acl_aclcheck_fd",			/* 354 = __acl_aclcheck_fd */
 	"extattrctl",			/* 355 = extattrctl */
 	"extattr_set_file",			/* 356 = extattr_set_file */
 	"extattr_get_file",			/* 357 = extattr_get_file */
 	"extattr_delete_file",			/* 358 = extattr_delete_file */
 	"aio_waitcomplete",			/* 359 = aio_waitcomplete */
 	"getresuid",			/* 360 = getresuid */
 	"getresgid",			/* 361 = getresgid */
 	"kqueue",			/* 362 = kqueue */
 	"kevent",			/* 363 = kevent */
 	"#364",			/* 364 = __cap_get_proc */
 	"#365",			/* 365 = __cap_set_proc */
 	"#366",			/* 366 = __cap_get_fd */
 	"#367",			/* 367 = __cap_get_file */
 	"#368",			/* 368 = __cap_set_fd */
 	"#369",			/* 369 = __cap_set_file */
 	"lkmressys",			/* 370 = lkmressys */
 	"extattr_set_fd",			/* 371 = extattr_set_fd */
 	"extattr_get_fd",			/* 372 = extattr_get_fd */
 	"extattr_delete_fd",			/* 373 = extattr_delete_fd */
 	"__setugid",			/* 374 = __setugid */
 	"nfsclnt",			/* 375 = nfsclnt */
 	"eaccess",			/* 376 = eaccess */
 	"#377",			/* 377 = afs_syscall */
 	"nmount",			/* 378 = nmount */
 	"#379",			/* 379 = kse_exit */
 	"#380",			/* 380 = kse_wakeup */
 	"#381",			/* 381 = kse_create */
 	"#382",			/* 382 = kse_thr_interrupt */
 	"#383",			/* 383 = kse_release */
 	"__mac_get_proc",			/* 384 = __mac_get_proc */
 	"__mac_set_proc",			/* 385 = __mac_set_proc */
 	"__mac_get_fd",			/* 386 = __mac_get_fd */
 	"__mac_get_file",			/* 387 = __mac_get_file */
 	"__mac_set_fd",			/* 388 = __mac_set_fd */
 	"__mac_set_file",			/* 389 = __mac_set_file */
 	"kenv",			/* 390 = kenv */
 	"lchflags",			/* 391 = lchflags */
 	"uuidgen",			/* 392 = uuidgen */
 	"sendfile",			/* 393 = sendfile */
 	"mac_syscall",			/* 394 = mac_syscall */
 	"getfsstat",			/* 395 = getfsstat */
 	"statfs",			/* 396 = statfs */
 	"fstatfs",			/* 397 = fstatfs */
 	"fhstatfs",			/* 398 = fhstatfs */
 	"#399",			/* 399 = nosys */
 	"ksem_close",			/* 400 = ksem_close */
 	"ksem_post",			/* 401 = ksem_post */
 	"ksem_wait",			/* 402 = ksem_wait */
 	"ksem_trywait",			/* 403 = ksem_trywait */
 	"ksem_init",			/* 404 = ksem_init */
 	"ksem_open",			/* 405 = ksem_open */
 	"ksem_unlink",			/* 406 = ksem_unlink */
 	"ksem_getvalue",			/* 407 = ksem_getvalue */
 	"ksem_destroy",			/* 408 = ksem_destroy */
 	"__mac_get_pid",			/* 409 = __mac_get_pid */
 	"__mac_get_link",			/* 410 = __mac_get_link */
 	"__mac_set_link",			/* 411 = __mac_set_link */
 	"extattr_set_link",			/* 412 = extattr_set_link */
 	"extattr_get_link",			/* 413 = extattr_get_link */
 	"extattr_delete_link",			/* 414 = extattr_delete_link */
 	"__mac_execve",			/* 415 = __mac_execve */
 	"sigaction",			/* 416 = sigaction */
 	"sigreturn",			/* 417 = sigreturn */
 	"#418",			/* 418 = __xstat */
 	"#419",			/* 419 = __xfstat */
 	"#420",			/* 420 = __xlstat */
 	"getcontext",			/* 421 = getcontext */
 	"setcontext",			/* 422 = setcontext */
 	"swapcontext",			/* 423 = swapcontext */
 	"swapoff",			/* 424 = swapoff */
 	"__acl_get_link",			/* 425 = __acl_get_link */
 	"__acl_set_link",			/* 426 = __acl_set_link */
 	"__acl_delete_link",			/* 427 = __acl_delete_link */
 	"__acl_aclcheck_link",			/* 428 = __acl_aclcheck_link */
 	"sigwait",			/* 429 = sigwait */
 	"thr_create",			/* 430 = thr_create */
 	"thr_exit",			/* 431 = thr_exit */
 	"thr_self",			/* 432 = thr_self */
 	"thr_kill",			/* 433 = thr_kill */
 	"_umtx_lock",			/* 434 = _umtx_lock */
 	"_umtx_unlock",			/* 435 = _umtx_unlock */
 	"jail_attach",			/* 436 = jail_attach */
 	"extattr_list_fd",			/* 437 = extattr_list_fd */
 	"extattr_list_file",			/* 438 = extattr_list_file */
 	"extattr_list_link",			/* 439 = extattr_list_link */
 	"#440",			/* 440 = kse_switchin */
 	"ksem_timedwait",			/* 441 = ksem_timedwait */
 	"thr_suspend",			/* 442 = thr_suspend */
 	"thr_wake",			/* 443 = thr_wake */
 	"kldunloadf",			/* 444 = kldunloadf */
 	"audit",			/* 445 = audit */
 	"auditon",			/* 446 = auditon */
 	"getauid",			/* 447 = getauid */
 	"setauid",			/* 448 = setauid */
 	"getaudit",			/* 449 = getaudit */
 	"setaudit",			/* 450 = setaudit */
 	"getaudit_addr",			/* 451 = getaudit_addr */
 	"setaudit_addr",			/* 452 = setaudit_addr */
 	"auditctl",			/* 453 = auditctl */
 	"_umtx_op",			/* 454 = _umtx_op */
 	"thr_new",			/* 455 = thr_new */
 	"sigqueue",			/* 456 = sigqueue */
 	"kmq_open",			/* 457 = kmq_open */
 	"kmq_setattr",			/* 458 = kmq_setattr */
 	"kmq_timedreceive",			/* 459 = kmq_timedreceive */
 	"kmq_timedsend",			/* 460 = kmq_timedsend */
 	"kmq_notify",			/* 461 = kmq_notify */
 	"kmq_unlink",			/* 462 = kmq_unlink */
 	"abort2",			/* 463 = abort2 */
 	"thr_set_name",			/* 464 = thr_set_name */
 	"aio_fsync",			/* 465 = aio_fsync */
 	"rtprio_thread",			/* 466 = rtprio_thread */
 	"#467",			/* 467 = nosys */
 	"#468",			/* 468 = nosys */
 	"#469",			/* 469 = __getpath_fromfd */
 	"#470",			/* 470 = __getpath_fromaddr */
 	"sctp_peeloff",			/* 471 = sctp_peeloff */
 	"sctp_generic_sendmsg",			/* 472 = sctp_generic_sendmsg */
 	"sctp_generic_sendmsg_iov",			/* 473 = sctp_generic_sendmsg_iov */
 	"sctp_generic_recvmsg",			/* 474 = sctp_generic_recvmsg */
 	"pread",			/* 475 = pread */
 	"pwrite",			/* 476 = pwrite */
 	"mmap",			/* 477 = mmap */
 	"lseek",			/* 478 = lseek */
 	"truncate",			/* 479 = truncate */
 	"ftruncate",			/* 480 = ftruncate */
 	"thr_kill2",			/* 481 = thr_kill2 */
 	"shm_open",			/* 482 = shm_open */
 	"shm_unlink",			/* 483 = shm_unlink */
 	"cpuset",			/* 484 = cpuset */
 	"cpuset_setid",			/* 485 = cpuset_setid */
 	"cpuset_getid",			/* 486 = cpuset_getid */
 	"cpuset_getaffinity",			/* 487 = cpuset_getaffinity */
 	"cpuset_setaffinity",			/* 488 = cpuset_setaffinity */
 	"faccessat",			/* 489 = faccessat */
 	"fchmodat",			/* 490 = fchmodat */
 	"fchownat",			/* 491 = fchownat */
 	"fexecve",			/* 492 = fexecve */
 	"fstatat",			/* 493 = fstatat */
 	"futimesat",			/* 494 = futimesat */
 	"linkat",			/* 495 = linkat */
 	"mkdirat",			/* 496 = mkdirat */
 	"mkfifoat",			/* 497 = mkfifoat */
 	"mknodat",			/* 498 = mknodat */
 	"openat",			/* 499 = openat */
 	"readlinkat",			/* 500 = readlinkat */
 	"renameat",			/* 501 = renameat */
 	"symlinkat",			/* 502 = symlinkat */
 	"unlinkat",			/* 503 = unlinkat */
 };
Index: head/sys/kern/syscalls.master
===================================================================
--- head/sys/kern/syscalls.master	(revision 178887)
+++ head/sys/kern/syscalls.master	(revision 178888)
@@ -1,890 +1,890 @@
  $FreeBSD$
 ;	from: @(#)syscalls.master	8.2 (Berkeley) 1/13/94
 ;
 ; System call name/number master file.
 ; Processed to created init_sysent.c, syscalls.c and syscall.h.
 
 ; Columns: number audit type name alt{name,tag,rtyp}/comments
 ;	number	system call number, must be in order
 ;	audit	the audit event associated with the system call
 ;		A value of AUE_NULL means no auditing, but it also means that
 ;		there is no audit event for the call at this time. For the
 ;		case where the event exists, but we don't want auditing, the
 ;		event should be #defined to AUE_NULL in audit_kevents.h.
 ;	type	one of STD, OBSOL, UNIMPL, COMPAT, CPT_NOA, LIBCOMPAT,
 ;		NODEF, NOARGS, NOPROTO, NOIMPL, NOSTD, COMPAT4
 ;	name	psuedo-prototype of syscall routine
 ;		If one of the following alts is different, then all appear:
 ;	altname	name of system call if different
 ;	alttag	name of args struct tag if different from [o]`name'"_args"
 ;	altrtyp	return type if not int (bogus - syscalls always return int)
 ;		for UNIMPL/OBSOL, name continues with comments
 
 ; types:
 ;	STD	always included
 ;	COMPAT	included on COMPAT #ifdef
 ;	COMPAT4	included on COMPAT4 #ifdef (FreeBSD 4 compat)
 ;	LIBCOMPAT included on COMPAT #ifdef, and placed in syscall.h
 ;	OBSOL	obsolete, not included in system, only specifies name
 ;	UNIMPL	not implemented, placeholder only
 ;	NOSTD	implemented but as a lkm that can be statically
 ;		compiled in; sysent entry will be filled with lkmsys
 ;		so the SYSCALL_MODULE macro works
 ;
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
 
 ; #ifdef's, etc. may be included, and are copied to the output files.
 
 #include <sys/param.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 
 ; Reserved/unimplemented system calls in the range 0-150 inclusive
 ; are reserved for use in future Berkeley releases.
 ; Additional system calls implemented in vendor and other
 ; redistributions should be placed in the reserved range at the end
 ; of the current calls.
 
 0	AUE_NULL	STD	{ int nosys(void); } syscall nosys_args int
 1	AUE_EXIT	STD	{ void sys_exit(int rval); } exit \
 				    sys_exit_args void
 2	AUE_FORK	STD	{ int fork(void); }
 3	AUE_NULL	STD	{ ssize_t read(int fd, void *buf, \
 				    size_t nbyte); }
 4	AUE_NULL	STD	{ ssize_t write(int fd, const void *buf, \
 				    size_t nbyte); }
 5	AUE_OPEN_RWTC	STD	{ int open(char *path, int flags, int mode); }
 ; XXX should be		{ int open(const char *path, int flags, ...); }
 ; but we're not ready for `const' or varargs.
 ; XXX man page says `mode_t mode'.
 6	AUE_CLOSE	STD	{ int close(int fd); }
 7	AUE_WAIT4	STD	{ int wait4(int pid, int *status, \
 				    int options, struct rusage *rusage); } \
 				    wait4 wait_args int
 8	AUE_CREAT	COMPAT	{ int creat(char *path, int mode); }
 9	AUE_LINK	STD	{ int link(char *path, char *link); }
 10	AUE_UNLINK	STD	{ int unlink(char *path); }
 11	AUE_NULL	OBSOL	execv
 12	AUE_CHDIR	STD	{ int chdir(char *path); }
 13	AUE_FCHDIR	STD	{ int fchdir(int fd); }
 14	AUE_MKNOD	STD	{ int mknod(char *path, int mode, int dev); }
 15	AUE_CHMOD	STD	{ int chmod(char *path, int mode); }
 16	AUE_CHOWN	STD	{ int chown(char *path, int uid, int gid); }
 17	AUE_NULL	STD	{ int obreak(char *nsize); } break \
 				    obreak_args int
 18	AUE_GETFSSTAT	COMPAT4	{ int getfsstat(struct ostatfs *buf, \
 				    long bufsize, int flags); }
 19	AUE_LSEEK	COMPAT	{ long lseek(int fd, long offset, \
 				    int whence); }
 20	AUE_GETPID	STD	{ pid_t getpid(void); }
 21	AUE_MOUNT	STD	{ int mount(char *type, char *path, \
 				    int flags, caddr_t data); }
 ; XXX `path' should have type `const char *' but we're not ready for that.
 22	AUE_UMOUNT	STD	{ int unmount(char *path, int flags); }
 23	AUE_SETUID	STD	{ int setuid(uid_t uid); }
 24	AUE_GETUID	STD	{ uid_t getuid(void); }
 25	AUE_GETEUID	STD	{ uid_t geteuid(void); }
 26	AUE_PTRACE	STD	{ int ptrace(int req, pid_t pid, \
 				    caddr_t addr, int data); }
 27	AUE_RECVMSG	STD	{ int recvmsg(int s, struct msghdr *msg, \
 				    int flags); }
 28	AUE_SENDMSG	STD	{ int sendmsg(int s, struct msghdr *msg, \
 				    int flags); }
 29	AUE_RECVFROM	STD	{ int recvfrom(int s, caddr_t buf, \
 				    size_t len, int flags, \
 				    struct sockaddr * __restrict from, \
 				    __socklen_t * __restrict fromlenaddr); }
 30	AUE_ACCEPT	STD	{ int accept(int s, \
 				    struct sockaddr * __restrict name, \
 				    __socklen_t * __restrict anamelen); }
 31	AUE_GETPEERNAME	STD	{ int getpeername(int fdes, \
 				    struct sockaddr * __restrict asa, \
 				    __socklen_t * __restrict alen); }
 32	AUE_GETSOCKNAME	STD	{ int getsockname(int fdes, \
 				    struct sockaddr * __restrict asa, \
 				    __socklen_t * __restrict alen); }
 33	AUE_ACCESS	STD	{ int access(char *path, int flags); }
 34	AUE_CHFLAGS	STD	{ int chflags(char *path, int flags); }
 35	AUE_FCHFLAGS	STD	{ int fchflags(int fd, int flags); }
 36	AUE_SYNC	STD	{ int sync(void); }
 37	AUE_KILL	STD	{ int kill(int pid, int signum); }
 38	AUE_STAT	COMPAT	{ int stat(char *path, struct ostat *ub); }
 39	AUE_GETPPID	STD	{ pid_t getppid(void); }
 40	AUE_LSTAT	COMPAT	{ int lstat(char *path, struct ostat *ub); }
 41	AUE_DUP		STD	{ int dup(u_int fd); }
 42	AUE_PIPE	STD	{ int pipe(void); }
 43	AUE_GETEGID	STD	{ gid_t getegid(void); }
 44	AUE_PROFILE	STD	{ int profil(caddr_t samples, size_t size, \
 				    size_t offset, u_int scale); }
 45	AUE_KTRACE	STD	{ int ktrace(const char *fname, int ops, \
 				    int facs, int pid); }
 46	AUE_SIGACTION	COMPAT	{ int sigaction(int signum, \
 				    struct osigaction *nsa, \
 				    struct osigaction *osa); }
 47	AUE_GETGID	STD	{ gid_t getgid(void); }
 48	AUE_SIGPROCMASK	COMPAT	{ int sigprocmask(int how, osigset_t mask); }
 ; XXX note nonstandard (bogus) calling convention - the libc stub passes
 ; us the mask, not a pointer to it, and we return the old mask as the
 ; (int) return value.
 49	AUE_GETLOGIN	STD	{ int getlogin(char *namebuf, u_int \
 				    namelen); }
 50	AUE_SETLOGIN	STD	{ int setlogin(char *namebuf); }
 51	AUE_ACCT	STD	{ int acct(char *path); }
 52	AUE_SIGPENDING	COMPAT	{ int sigpending(void); }
 53	AUE_SIGALTSTACK	STD	{ int sigaltstack(stack_t *ss, \
 				    stack_t *oss); }
 54	AUE_IOCTL	STD	{ int ioctl(int fd, u_long com, \
 				    caddr_t data); }
 55	AUE_REBOOT	STD	{ int reboot(int opt); }
 56	AUE_REVOKE	STD	{ int revoke(char *path); }
 57	AUE_SYMLINK	STD	{ int symlink(char *path, char *link); }
 58	AUE_READLINK	STD	{ ssize_t readlink(char *path, char *buf, \
 				    size_t count); }
 59	AUE_EXECVE	STD	{ int execve(char *fname, char **argv, \
 				    char **envv); }
 60	AUE_UMASK	STD	{ int umask(int newmask); } umask umask_args \
 				    int
 61	AUE_CHROOT	STD	{ int chroot(char *path); }
 62	AUE_FSTAT	COMPAT	{ int fstat(int fd, struct ostat *sb); }
 63	AUE_NULL	COMPAT	{ int getkerninfo(int op, char *where, \
 				    size_t *size, int arg); } getkerninfo \
 				    getkerninfo_args int
 64	AUE_NULL	COMPAT	{ int getpagesize(void); } getpagesize \
 				    getpagesize_args int
 65	AUE_MSYNC	STD	{ int msync(void *addr, size_t len, \
 				    int flags); }
 66	AUE_VFORK	STD	{ int vfork(void); }
 67	AUE_NULL	OBSOL	vread
 68	AUE_NULL	OBSOL	vwrite
 69	AUE_SBRK	STD	{ int sbrk(int incr); }
 70	AUE_SSTK	STD	{ int sstk(int incr); }
 71	AUE_MMAP	COMPAT	{ int mmap(void *addr, int len, int prot, \
 				    int flags, int fd, long pos); }
 72	AUE_O_VADVISE	STD	{ int ovadvise(int anom); } vadvise \
 				    ovadvise_args int
 73	AUE_MUNMAP	STD	{ int munmap(void *addr, size_t len); }
 74	AUE_MPROTECT	STD	{ int mprotect(const void *addr, size_t len, \
 				    int prot); }
 75	AUE_MADVISE	STD	{ int madvise(void *addr, size_t len, \
 				    int behav); }
 76	AUE_NULL	OBSOL	vhangup
 77	AUE_NULL	OBSOL	vlimit
 78	AUE_MINCORE	STD	{ int mincore(const void *addr, size_t len, \
 				    char *vec); }
 79	AUE_GETGROUPS	STD	{ int getgroups(u_int gidsetsize, \
 				    gid_t *gidset); }
 80	AUE_SETGROUPS	STD	{ int setgroups(u_int gidsetsize, \
 				    gid_t *gidset); }
 81	AUE_GETPGRP	STD	{ int getpgrp(void); }
 82	AUE_SETPGRP	STD	{ int setpgid(int pid, int pgid); }
 83	AUE_SETITIMER	STD	{ int setitimer(u_int which, struct \
 				    itimerval *itv, struct itimerval *oitv); }
 84	AUE_WAIT4	COMPAT	{ int wait(void); }
 85	AUE_SWAPON	STD	{ int swapon(char *name); }
 86	AUE_GETITIMER	STD	{ int getitimer(u_int which, \
 				    struct itimerval *itv); }
 87	AUE_SYSCTL	COMPAT	{ int gethostname(char *hostname, \
 				    u_int len); } gethostname \
 				    gethostname_args int
 88	AUE_SYSCTL	COMPAT	{ int sethostname(char *hostname, \
 				    u_int len); } sethostname \
 				    sethostname_args int
 89	AUE_GETDTABLESIZE	STD	{ int getdtablesize(void); }
 90	AUE_DUP2	STD	{ int dup2(u_int from, u_int to); }
 91	AUE_NULL	UNIMPL	getdopt
 92	AUE_FCNTL	STD	{ int fcntl(int fd, int cmd, long arg); }
 ; XXX should be	{ int fcntl(int fd, int cmd, ...); }
 ; but we're not ready for varargs.
 93	AUE_SELECT	STD	{ int select(int nd, fd_set *in, fd_set *ou, \
 				    fd_set *ex, struct timeval *tv); }
 94	AUE_NULL	UNIMPL	setdopt
 95	AUE_FSYNC	STD	{ int fsync(int fd); }
 96	AUE_SETPRIORITY	STD	{ int setpriority(int which, int who, \
 				    int prio); }
 97	AUE_SOCKET	STD	{ int socket(int domain, int type, \
 				    int protocol); }
 98	AUE_CONNECT	STD	{ int connect(int s, caddr_t name, \
 				    int namelen); }
 99	AUE_ACCEPT	CPT_NOA	{ int accept(int s, caddr_t name, \
 				    int *anamelen); } accept accept_args int
 100	AUE_GETPRIORITY	STD	{ int getpriority(int which, int who); }
 101	AUE_SEND	COMPAT	{ int send(int s, caddr_t buf, int len, \
 				    int flags); }
 102	AUE_RECV	COMPAT	{ int recv(int s, caddr_t buf, int len, \
 				    int flags); }
 103	AUE_SIGRETURN	COMPAT	{ int sigreturn( \
 				    struct osigcontext *sigcntxp); }
 104	AUE_BIND	STD	{ int bind(int s, caddr_t name, \
 				    int namelen); }
 105	AUE_SETSOCKOPT	STD	{ int setsockopt(int s, int level, int name, \
 				    caddr_t val, int valsize); }
 106	AUE_LISTEN	STD	{ int listen(int s, int backlog); }
 107	AUE_NULL	OBSOL	vtimes
 108	AUE_NULL	COMPAT	{ int sigvec(int signum, struct sigvec *nsv, \
 				    struct sigvec *osv); }
 109	AUE_NULL	COMPAT	{ int sigblock(int mask); }
 110	AUE_NULL	COMPAT	{ int sigsetmask(int mask); }
 111	AUE_NULL	COMPAT	{ int sigsuspend(osigset_t mask); }
 ; XXX note nonstandard (bogus) calling convention - the libc stub passes
 ; us the mask, not a pointer to it.
 112	AUE_NULL	COMPAT	{ int sigstack(struct sigstack *nss, \
 				    struct sigstack *oss); }
 113	AUE_RECVMSG	COMPAT	{ int recvmsg(int s, struct omsghdr *msg, \
 				    int flags); }
 114	AUE_SENDMSG	COMPAT	{ int sendmsg(int s, caddr_t msg, \
 				    int flags); }
 115	AUE_NULL	OBSOL	vtrace
 116	AUE_GETTIMEOFDAY	STD	{ int gettimeofday(struct timeval *tp, \
 				    struct timezone *tzp); }
 117	AUE_GETRUSAGE	STD	{ int getrusage(int who, \
 				    struct rusage *rusage); }
 118	AUE_GETSOCKOPT	STD	{ int getsockopt(int s, int level, int name, \
 				    caddr_t val, int *avalsize); }
 119	AUE_NULL	UNIMPL	resuba (BSD/OS 2.x)
 120	AUE_READV	STD	{ int readv(int fd, struct iovec *iovp, \
 				    u_int iovcnt); }
 121	AUE_WRITEV	STD	{ int writev(int fd, struct iovec *iovp, \
 				    u_int iovcnt); }
 122	AUE_SETTIMEOFDAY	STD	{ int settimeofday(struct timeval *tv, \
 				    struct timezone *tzp); }
 123	AUE_FCHOWN	STD	{ int fchown(int fd, int uid, int gid); }
 124	AUE_FCHMOD	STD	{ int fchmod(int fd, int mode); }
 125	AUE_RECVFROM	CPT_NOA	{ int recvfrom(int s, caddr_t buf, \
 				    size_t len, int flags, caddr_t from, int \
 				    *fromlenaddr); } recvfrom recvfrom_args \
 				    int
 126	AUE_SETREUID	STD	{ int setreuid(int ruid, int euid); }
 127	AUE_SETREGID	STD	{ int setregid(int rgid, int egid); }
 128	AUE_RENAME	STD	{ int rename(char *from, char *to); }
 129	AUE_TRUNCATE	COMPAT	{ int truncate(char *path, long length); }
 130	AUE_FTRUNCATE	COMPAT	{ int ftruncate(int fd, long length); }
 131	AUE_FLOCK	STD	{ int flock(int fd, int how); }
 132	AUE_MKFIFO	STD	{ int mkfifo(char *path, int mode); }
 133	AUE_SENDTO	STD	{ int sendto(int s, caddr_t buf, size_t len, \
 				    int flags, caddr_t to, int tolen); }
 134	AUE_SHUTDOWN	STD	{ int shutdown(int s, int how); }
 135	AUE_SOCKETPAIR	STD	{ int socketpair(int domain, int type, \
 				    int protocol, int *rsv); }
 136	AUE_MKDIR	STD	{ int mkdir(char *path, int mode); }
 137	AUE_RMDIR	STD	{ int rmdir(char *path); }
 138	AUE_UTIMES	STD	{ int utimes(char *path, \
 				    struct timeval *tptr); }
 139	AUE_NULL	OBSOL	4.2 sigreturn
 140	AUE_ADJTIME	STD	{ int adjtime(struct timeval *delta, \
 				    struct timeval *olddelta); }
 141	AUE_GETPEERNAME	COMPAT	{ int getpeername(int fdes, caddr_t asa, \
 				    int *alen); }
 142	AUE_SYSCTL	COMPAT	{ long gethostid(void); }
 143	AUE_SYSCTL	COMPAT	{ int sethostid(long hostid); }
 144	AUE_GETRLIMIT	COMPAT	{ int getrlimit(u_int which, struct \
 				    orlimit *rlp); }
 145	AUE_SETRLIMIT	COMPAT	{ int setrlimit(u_int which, \
 				    struct orlimit *rlp); }
 146	AUE_KILLPG	COMPAT	{ int killpg(int pgid, int signum); }
 147	AUE_SETSID	STD	{ int setsid(void); }
 148	AUE_QUOTACTL	STD	{ int quotactl(char *path, int cmd, int uid, \
 				    caddr_t arg); }
 149	AUE_O_QUOTA	COMPAT	{ int quota(void); }
 150	AUE_GETSOCKNAME	CPT_NOA	{ int getsockname(int fdec, \
 				    caddr_t asa, int *alen); } getsockname \
 				    getsockname_args int
 
 ; Syscalls 151-180 inclusive are reserved for vendor-specific
 ; system calls.  (This includes various calls added for compatibity
 ; with other Unix variants.)
 ; Some of these calls are now supported by BSD...
 151	AUE_NULL	UNIMPL	sem_lock (BSD/OS 2.x)
 152	AUE_NULL	UNIMPL	sem_wakeup (BSD/OS 2.x)
 153	AUE_NULL	UNIMPL	asyncdaemon (BSD/OS 2.x)
 ; 154 is initialised by the NLM code, if present.
 154	AUE_NULL	NOSTD	{ int nlm_syscall(int debug_level, int grace_period, int addr_count, char **addrs); }
 ; 155 is initialized by the NFS code, if present.
 155	AUE_NFS_SVC	NOSTD	{ int nfssvc(int flag, caddr_t argp); }
 156	AUE_GETDIRENTRIES	COMPAT	{ int getdirentries(int fd, char *buf, \
 				    u_int count, long *basep); }
 157	AUE_STATFS	COMPAT4	{ int statfs(char *path, \
 				    struct ostatfs *buf); }
 158	AUE_FSTATFS	COMPAT4	{ int fstatfs(int fd, \
 				    struct ostatfs *buf); }
 159	AUE_NULL	UNIMPL	nosys
 160	AUE_LGETFH	STD	{ int lgetfh(char *fname, \
 				    struct fhandle *fhp); }
 161	AUE_NFS_GETFH	STD	{ int getfh(char *fname, \
 				    struct fhandle *fhp); }
 162	AUE_SYSCTL	STD	{ int getdomainname(char *domainname, \
 				    int len); }
 163	AUE_SYSCTL	STD	{ int setdomainname(char *domainname, \
 				    int len); }
 164	AUE_NULL	STD	{ int uname(struct utsname *name); }
 165	AUE_SYSARCH	STD	{ int sysarch(int op, char *parms); }
 166	AUE_RTPRIO	STD	{ int rtprio(int function, pid_t pid, \
 				    struct rtprio *rtp); }
 167	AUE_NULL	UNIMPL	nosys
 168	AUE_NULL	UNIMPL	nosys
 ; 169 is initialized by the SYSVSEM code if present or loaded
 169	AUE_SEMSYS	NOSTD	{ int semsys(int which, int a2, int a3, \
 				    int a4, int a5); }
 ; XXX should be	{ int semsys(int which, ...); }
 ; 170 is initialized by the SYSVMSG code if present or loaded
 170	AUE_MSGSYS	NOSTD	{ int msgsys(int which, int a2, int a3, \
 				    int a4, int a5, int a6); }
 ; XXX should be	{ int msgsys(int which, ...); }
 ; 171 is initialized by the SYSVSHM code if present or loaded
 171	AUE_SHMSYS	NOSTD	{ int shmsys(int which, int a2, int a3, \
 				    int a4); }
 ; XXX should be	{ int shmsys(int which, ...); }
 172	AUE_NULL	UNIMPL	nosys
 173	AUE_PREAD	STD	{ ssize_t freebsd6_pread(int fd, void *buf, \
 				    size_t nbyte, int pad, off_t offset); }
 174	AUE_PWRITE	STD	{ ssize_t freebsd6_pwrite(int fd, \
 				    const void *buf, \
 				    size_t nbyte, int pad, off_t offset); }
-175	AUE_NULL	UNIMPL	nosys
+175	AUE_NULL	STD	{ int setfib(int fibnum); }
 176	AUE_NTP_ADJTIME	STD	{ int ntp_adjtime(struct timex *tp); }
 177	AUE_NULL	UNIMPL	sfork (BSD/OS 2.x)
 178	AUE_NULL	UNIMPL	getdescriptor (BSD/OS 2.x)
 179	AUE_NULL	UNIMPL	setdescriptor (BSD/OS 2.x)
 180	AUE_NULL	UNIMPL	nosys
 
 ; Syscalls 181-199 are used by/reserved for BSD
 181	AUE_SETGID	STD	{ int setgid(gid_t gid); }
 182	AUE_SETEGID	STD	{ int setegid(gid_t egid); }
 183	AUE_SETEUID	STD	{ int seteuid(uid_t euid); }
 184	AUE_NULL	UNIMPL	lfs_bmapv
 185	AUE_NULL	UNIMPL	lfs_markv
 186	AUE_NULL	UNIMPL	lfs_segclean
 187	AUE_NULL	UNIMPL	lfs_segwait
 188	AUE_STAT	STD	{ int stat(char *path, struct stat *ub); }
 189	AUE_FSTAT	STD	{ int fstat(int fd, struct stat *sb); }
 190	AUE_LSTAT	STD	{ int lstat(char *path, struct stat *ub); }
 191	AUE_PATHCONF	STD	{ int pathconf(char *path, int name); }
 192	AUE_FPATHCONF	STD	{ int fpathconf(int fd, int name); }
 193	AUE_NULL	UNIMPL	nosys
 194	AUE_GETRLIMIT	STD	{ int getrlimit(u_int which, \
 				    struct rlimit *rlp); } getrlimit \
 				    __getrlimit_args int
 195	AUE_SETRLIMIT	STD	{ int setrlimit(u_int which, \
 				    struct rlimit *rlp); } setrlimit \
 				    __setrlimit_args int
 196	AUE_GETDIRENTRIES	STD	{ int getdirentries(int fd, char *buf, \
 				    u_int count, long *basep); }
 197	AUE_MMAP	STD	{ caddr_t freebsd6_mmap(caddr_t addr, \
 				    size_t len, int prot, int flags, int fd, \
 				    int pad, off_t pos); }
 198	AUE_NULL	STD	{ int nosys(void); } __syscall \
 				    __syscall_args int
 199	AUE_LSEEK	STD	{ off_t freebsd6_lseek(int fd, int pad, \
 				    off_t offset, int whence); }
 200	AUE_TRUNCATE	STD	{ int freebsd6_truncate(char *path, int pad, \
 				    off_t length); }
 201	AUE_FTRUNCATE	STD	{ int freebsd6_ftruncate(int fd, int pad, \
 				    off_t length); }
 202	AUE_SYSCTL	STD	{ int __sysctl(int *name, u_int namelen, \
 				    void *old, size_t *oldlenp, void *new, \
 				    size_t newlen); } __sysctl sysctl_args int
 203	AUE_MLOCK	STD	{ int mlock(const void *addr, size_t len); }
 204	AUE_MUNLOCK	STD	{ int munlock(const void *addr, size_t len); }
 205	AUE_UNDELETE	STD	{ int undelete(char *path); }
 206	AUE_FUTIMES	STD	{ int futimes(int fd, struct timeval *tptr); }
 207	AUE_GETPGID	STD	{ int getpgid(pid_t pid); }
 208	AUE_NULL	UNIMPL	newreboot (NetBSD)
 209	AUE_POLL	STD	{ int poll(struct pollfd *fds, u_int nfds, \
 				    int timeout); }
 
 ;
 ; The following are reserved for loadable syscalls
 ;
 210	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 211	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 212	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 213	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 214	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 215	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 216	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 217	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 218	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 219	AUE_NULL	NODEF	lkmnosys lkmnosys nosys_args int
 
 ;
 ; The following were introduced with NetBSD/4.4Lite-2
 220	AUE_SEMCTL	NOSTD	{ int __semctl(int semid, int semnum, \
 				    int cmd, union semun *arg); }
 221	AUE_SEMGET	NOSTD	{ int semget(key_t key, int nsems, \
 				    int semflg); }
 222	AUE_SEMOP	NOSTD	{ int semop(int semid, struct sembuf *sops, \
 				    size_t nsops); }
 223	AUE_NULL	UNIMPL	semconfig
 224	AUE_MSGCTL	NOSTD	{ int msgctl(int msqid, int cmd, \
 				    struct msqid_ds *buf); }
 225	AUE_MSGGET	NOSTD	{ int msgget(key_t key, int msgflg); }
 226	AUE_MSGSND	NOSTD	{ int msgsnd(int msqid, const void *msgp, \
 				    size_t msgsz, int msgflg); }
 227	AUE_MSGRCV	NOSTD	{ int msgrcv(int msqid, void *msgp, \
 				    size_t msgsz, long msgtyp, int msgflg); }
 228	AUE_SHMAT	NOSTD	{ int shmat(int shmid, const void *shmaddr, \
 				    int shmflg); }
 229	AUE_SHMCTL	NOSTD	{ int shmctl(int shmid, int cmd, \
 				    struct shmid_ds *buf); }
 230	AUE_SHMDT	NOSTD	{ int shmdt(const void *shmaddr); }
 231	AUE_SHMGET	NOSTD	{ int shmget(key_t key, size_t size, \
 				    int shmflg); }
 ;
 232	AUE_NULL	STD	{ int clock_gettime(clockid_t clock_id, \
 				    struct timespec *tp); }
 233	AUE_CLOCK_SETTIME	STD	{ int clock_settime( \
 				    clockid_t clock_id, \
 				    const struct timespec *tp); }
 234	AUE_NULL	STD	{ int clock_getres(clockid_t clock_id, \
 				    struct timespec *tp); }
 235	AUE_NULL	STD	{ int ktimer_create(clockid_t clock_id, \
 				    struct sigevent *evp, int *timerid); }
 236	AUE_NULL	STD	{ int ktimer_delete(int timerid); }
 237	AUE_NULL	STD	{ int ktimer_settime(int timerid, int flags, \
 				    const struct itimerspec *value, \
 				    struct itimerspec *ovalue); }
 238	AUE_NULL	STD	{ int ktimer_gettime(int timerid, struct \
 				    itimerspec *value); }
 239	AUE_NULL	STD	{ int ktimer_getoverrun(int timerid); }
 240	AUE_NULL	STD	{ int nanosleep(const struct timespec *rqtp, \
 				    struct timespec *rmtp); }
 241	AUE_NULL	UNIMPL	nosys
 242	AUE_NULL	UNIMPL	nosys
 243	AUE_NULL	UNIMPL	nosys
 244	AUE_NULL	UNIMPL	nosys
 245	AUE_NULL	UNIMPL	nosys
 246	AUE_NULL	UNIMPL	nosys
 247	AUE_NULL	UNIMPL	nosys
 248	AUE_NULL	STD	{ int ntp_gettime(struct ntptimeval *ntvp); }
 249	AUE_NULL	UNIMPL	nosys
 ; syscall numbers initially used in OpenBSD
 250	AUE_MINHERIT	STD	{ int minherit(void *addr, size_t len, \
 				    int inherit); }
 251	AUE_RFORK	STD	{ int rfork(int flags); }
 252	AUE_POLL	STD	{ int openbsd_poll(struct pollfd *fds, \
 				    u_int nfds, int timeout); }
 253	AUE_ISSETUGID	STD	{ int issetugid(void); }
 254	AUE_LCHOWN	STD	{ int lchown(char *path, int uid, int gid); }
 255	AUE_NULL	NOSTD	{ int aio_read(struct aiocb *aiocbp); }
 256	AUE_NULL	NOSTD	{ int aio_write(struct aiocb *aiocbp); }
 257	AUE_NULL	NOSTD	{ int lio_listio(int mode, \
 				    struct aiocb * const *acb_list, \
 				    int nent, struct sigevent *sig); }
 258	AUE_NULL	UNIMPL	nosys
 259	AUE_NULL	UNIMPL	nosys
 260	AUE_NULL	UNIMPL	nosys
 261	AUE_NULL	UNIMPL	nosys
 262	AUE_NULL	UNIMPL	nosys
 263	AUE_NULL	UNIMPL	nosys
 264	AUE_NULL	UNIMPL	nosys
 265	AUE_NULL	UNIMPL	nosys
 266	AUE_NULL	UNIMPL	nosys
 267	AUE_NULL	UNIMPL	nosys
 268	AUE_NULL	UNIMPL	nosys
 269	AUE_NULL	UNIMPL	nosys
 270	AUE_NULL	UNIMPL	nosys
 271	AUE_NULL	UNIMPL	nosys
 272	AUE_O_GETDENTS	STD	{ int getdents(int fd, char *buf, \
 				    size_t count); }
 273	AUE_NULL	UNIMPL	nosys
 274	AUE_LCHMOD	STD	{ int lchmod(char *path, mode_t mode); }
 275	AUE_LCHOWN	NOPROTO	{ int lchown(char *path, uid_t uid, \
 				    gid_t gid); } netbsd_lchown lchown_args \
 				    int
 276	AUE_LUTIMES	STD	{ int lutimes(char *path, \
 				    struct timeval *tptr); }
 277	AUE_MSYNC	NOPROTO	{ int msync(void *addr, size_t len, \
 				    int flags); } netbsd_msync msync_args int
 278	AUE_STAT	STD	{ int nstat(char *path, struct nstat *ub); }
 279	AUE_FSTAT	STD	{ int nfstat(int fd, struct nstat *sb); }
 280	AUE_LSTAT	STD	{ int nlstat(char *path, struct nstat *ub); }
 281	AUE_NULL	UNIMPL	nosys
 282	AUE_NULL	UNIMPL	nosys
 283	AUE_NULL	UNIMPL	nosys
 284	AUE_NULL	UNIMPL	nosys
 285	AUE_NULL	UNIMPL	nosys
 286	AUE_NULL	UNIMPL	nosys
 287	AUE_NULL	UNIMPL	nosys
 288	AUE_NULL	UNIMPL	nosys
 ; 289 and 290 from NetBSD (OpenBSD: 267 and 268)
 289	AUE_PREADV	STD	{ ssize_t preadv(int fd, struct iovec *iovp, \
 					u_int iovcnt, off_t offset); }
 290	AUE_PWRITEV	STD	{ ssize_t pwritev(int fd, struct iovec *iovp, \
 					u_int iovcnt, off_t offset); }
 291	AUE_NULL	UNIMPL	nosys
 292	AUE_NULL	UNIMPL	nosys
 293	AUE_NULL	UNIMPL	nosys
 294	AUE_NULL	UNIMPL	nosys
 295	AUE_NULL	UNIMPL	nosys
 296	AUE_NULL	UNIMPL	nosys
 ; XXX 297 is 300 in NetBSD 
 297	AUE_FHSTATFS	COMPAT4	{ int fhstatfs( \
 				    const struct fhandle *u_fhp, \
 				    struct ostatfs *buf); }
 298	AUE_FHOPEN	STD	{ int fhopen(const struct fhandle *u_fhp, \
 				    int flags); }
 299	AUE_FHSTAT	STD	{ int fhstat(const struct fhandle *u_fhp, \
 				    struct stat *sb); }
 ; syscall numbers for FreeBSD
 300	AUE_NULL	STD	{ int modnext(int modid); }
 301	AUE_NULL	STD	{ int modstat(int modid, \
 				    struct module_stat *stat); }
 302	AUE_NULL	STD	{ int modfnext(int modid); }
 303	AUE_NULL	STD	{ int modfind(const char *name); }
 304	AUE_MODLOAD	STD	{ int kldload(const char *file); }
 305	AUE_MODUNLOAD	STD	{ int kldunload(int fileid); }
 306	AUE_NULL	STD	{ int kldfind(const char *file); }
 307	AUE_NULL	STD	{ int kldnext(int fileid); }
 308	AUE_NULL	STD	{ int kldstat(int fileid, struct \
 				    kld_file_stat* stat); }
 309	AUE_NULL	STD	{ int kldfirstmod(int fileid); }
 310	AUE_GETSID	STD	{ int getsid(pid_t pid); }
 311	AUE_SETRESUID	STD	{ int setresuid(uid_t ruid, uid_t euid, \
 				    uid_t suid); }
 312	AUE_SETRESGID	STD	{ int setresgid(gid_t rgid, gid_t egid, \
 				    gid_t sgid); }
 313	AUE_NULL	OBSOL	signanosleep
 314	AUE_NULL	NOSTD	{ int aio_return(struct aiocb *aiocbp); }
 315	AUE_NULL	NOSTD	{ int aio_suspend( \
 				    struct aiocb * const * aiocbp, int nent, \
 				    const struct timespec *timeout); }
 316	AUE_NULL	NOSTD	{ int aio_cancel(int fd, \
 				    struct aiocb *aiocbp); }
 317	AUE_NULL	NOSTD	{ int aio_error(struct aiocb *aiocbp); }
 318	AUE_NULL	NOSTD	{ int oaio_read(struct oaiocb *aiocbp); }
 319	AUE_NULL	NOSTD	{ int oaio_write(struct oaiocb *aiocbp); }
 320	AUE_NULL	NOSTD	{ int olio_listio(int mode, \
 				    struct oaiocb * const *acb_list, \
 				    int nent, struct osigevent *sig); }
 321	AUE_NULL	STD	{ int yield(void); }
 322	AUE_NULL	OBSOL	thr_sleep
 323	AUE_NULL	OBSOL	thr_wakeup
 324	AUE_MLOCKALL	STD	{ int mlockall(int how); }
 325	AUE_MUNLOCKALL	STD	{ int munlockall(void); }
 326	AUE_GETCWD	STD	{ int __getcwd(u_char *buf, u_int buflen); }
 
 327	AUE_NULL	STD	{ int sched_setparam (pid_t pid, \
 				    const struct sched_param *param); }
 328	AUE_NULL	STD	{ int sched_getparam (pid_t pid, struct \
 				    sched_param *param); }
 
 329	AUE_NULL	STD	{ int sched_setscheduler (pid_t pid, int \
 				    policy, const struct sched_param \
 				    *param); }
 330	AUE_NULL	STD	{ int sched_getscheduler (pid_t pid); }
 
 331	AUE_NULL	STD	{ int sched_yield (void); }
 332	AUE_NULL	STD	{ int sched_get_priority_max (int policy); }
 333	AUE_NULL	STD	{ int sched_get_priority_min (int policy); }
 334	AUE_NULL	STD	{ int sched_rr_get_interval (pid_t pid, \
 				    struct timespec *interval); }
 335	AUE_NULL	STD	{ int utrace(const void *addr, size_t len); }
 336	AUE_SENDFILE	COMPAT4	{ int sendfile(int fd, int s, \
 				    off_t offset, size_t nbytes, \
 				    struct sf_hdtr *hdtr, off_t *sbytes, \
 				    int flags); }
 337	AUE_NULL	STD	{ int kldsym(int fileid, int cmd, \
 				    void *data); }
 338	AUE_JAIL	STD	{ int jail(struct jail *jail); }
 339	AUE_NULL	UNIMPL	pioctl
 340	AUE_SIGPROCMASK	STD	{ int sigprocmask(int how, \
 				    const sigset_t *set, sigset_t *oset); }
 341	AUE_SIGSUSPEND	STD	{ int sigsuspend(const sigset_t *sigmask); }
 342	AUE_SIGACTION	COMPAT4	{ int sigaction(int sig, const \
 				    struct sigaction *act, \
 				    struct sigaction *oact); }
 343	AUE_SIGPENDING	STD	{ int sigpending(sigset_t *set); }
 344	AUE_SIGRETURN	COMPAT4	{ int sigreturn( \
 				    const struct ucontext4 *sigcntxp); }
 345	AUE_SIGWAIT	STD	{ int sigtimedwait(const sigset_t *set, \
 				    siginfo_t *info, \
 				    const struct timespec *timeout); }
 346	AUE_NULL	STD	{ int sigwaitinfo(const sigset_t *set, \
 				    siginfo_t *info); }
 347	AUE_NULL	STD	{ int __acl_get_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 348	AUE_NULL	STD	{ int __acl_set_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 349	AUE_NULL	STD	{ int __acl_get_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 350	AUE_NULL	STD	{ int __acl_set_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 351	AUE_NULL	STD	{ int __acl_delete_file(const char *path, \
 				    acl_type_t type); }
 352	AUE_NULL	STD	{ int __acl_delete_fd(int filedes, \
 				    acl_type_t type); }
 353	AUE_NULL	STD	{ int __acl_aclcheck_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 354	AUE_NULL	STD	{ int __acl_aclcheck_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 355	AUE_EXTATTRCTL	STD	{ int extattrctl(const char *path, int cmd, \
 				    const char *filename, int attrnamespace, \
 				    const char *attrname); }
 356	AUE_EXTATTR_SET_FILE	STD	{ int extattr_set_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 357	AUE_EXTATTR_GET_FILE	STD	{ ssize_t extattr_get_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 358	AUE_EXTATTR_DELETE_FILE	STD	{ int extattr_delete_file(const char *path, \
 				    int attrnamespace, \
 				    const char *attrname); }
 359	AUE_NULL	NOSTD	{ int aio_waitcomplete( \
 				    struct aiocb **aiocbp, \
 				    struct timespec *timeout); }
 360	AUE_GETRESUID	STD	{ int getresuid(uid_t *ruid, uid_t *euid, \
 				    uid_t *suid); }
 361	AUE_GETRESGID	STD	{ int getresgid(gid_t *rgid, gid_t *egid, \
 				    gid_t *sgid); }
 362	AUE_KQUEUE	STD	{ int kqueue(void); }
 363	AUE_NULL	STD	{ int kevent(int fd, \
 				    struct kevent *changelist, int nchanges, \
 				    struct kevent *eventlist, int nevents, \
 				    const struct timespec *timeout); }
 364	AUE_NULL	UNIMPL	__cap_get_proc
 365	AUE_NULL	UNIMPL	__cap_set_proc
 366	AUE_NULL	UNIMPL	__cap_get_fd
 367	AUE_NULL	UNIMPL	__cap_get_file
 368	AUE_NULL	UNIMPL	__cap_set_fd
 369	AUE_NULL	UNIMPL	__cap_set_file
 370	AUE_NULL	NODEF	lkmressys lkmressys nosys_args int
 371	AUE_EXTATTR_SET_FD	STD	{ int extattr_set_fd(int fd, \
 				    int attrnamespace, const char *attrname, \
 				    void *data, size_t nbytes); }
 372	AUE_EXTATTR_GET_FD	STD	{ ssize_t extattr_get_fd(int fd, \
 				    int attrnamespace, const char *attrname, \
 				    void *data, size_t nbytes); }
 373	AUE_EXTATTR_DELETE_FD	STD	{ int extattr_delete_fd(int fd, \
 				    int attrnamespace, \
 				    const char *attrname); }
 374	AUE_NULL	STD	{ int __setugid(int flag); }
 375	AUE_NULL	NOIMPL	{ int nfsclnt(int flag, caddr_t argp); }
 376	AUE_EACCESS	STD	{ int eaccess(char *path, int flags); }
 377	AUE_NULL	UNIMPL	afs_syscall
 378	AUE_NMOUNT	STD	{ int nmount(struct iovec *iovp, \
 				    unsigned int iovcnt, int flags); }
 379	AUE_NULL	UNIMPL	kse_exit
 380	AUE_NULL	UNIMPL	kse_wakeup
 381	AUE_NULL	UNIMPL	kse_create
 382	AUE_NULL	UNIMPL	kse_thr_interrupt
 383	AUE_NULL	UNIMPL	kse_release
 384	AUE_NULL	STD	{ int __mac_get_proc(struct mac *mac_p); }
 385	AUE_NULL	STD	{ int __mac_set_proc(struct mac *mac_p); }
 386	AUE_NULL	STD	{ int __mac_get_fd(int fd, \
 				    struct mac *mac_p); }
 387	AUE_NULL	STD	{ int __mac_get_file(const char *path_p, \
 				    struct mac *mac_p); }
 388	AUE_NULL	STD	{ int __mac_set_fd(int fd, \
 				    struct mac *mac_p); }
 389	AUE_NULL	STD	{ int __mac_set_file(const char *path_p, \
 				    struct mac *mac_p); }
 390	AUE_NULL	STD	{ int kenv(int what, const char *name, \
 				    char *value, int len); }
 391	AUE_LCHFLAGS	STD	{ int lchflags(const char *path, int flags); }
 392	AUE_NULL	STD	{ int uuidgen(struct uuid *store, \
 				    int count); }
 393	AUE_SENDFILE	STD	{ int sendfile(int fd, int s, off_t offset, \
 				    size_t nbytes, struct sf_hdtr *hdtr, \
 				    off_t *sbytes, int flags); }
 394	AUE_NULL	STD	{ int mac_syscall(const char *policy, \
 				    int call, void *arg); }
 395	AUE_GETFSSTAT	STD	{ int getfsstat(struct statfs *buf, \
 				    long bufsize, int flags); }
 396	AUE_STATFS	STD	{ int statfs(char *path, \
 				    struct statfs *buf); }
 397	AUE_FSTATFS	STD	{ int fstatfs(int fd, struct statfs *buf); }
 398	AUE_FHSTATFS	STD	{ int fhstatfs(const struct fhandle *u_fhp, \
 				    struct statfs *buf); }
 399	AUE_NULL	UNIMPL	nosys
 400	AUE_NULL	NOSTD	{ int ksem_close(semid_t id); }
 401	AUE_NULL	NOSTD	{ int ksem_post(semid_t id); }
 402	AUE_NULL	NOSTD	{ int ksem_wait(semid_t id); }
 403	AUE_NULL	NOSTD	{ int ksem_trywait(semid_t id); }
 404	AUE_NULL	NOSTD	{ int ksem_init(semid_t *idp, \
 				    unsigned int value); }
 405	AUE_NULL	NOSTD	{ int ksem_open(semid_t *idp, \
 				    const char *name, int oflag, \
 				    mode_t mode, unsigned int value); }
 406	AUE_NULL	NOSTD	{ int ksem_unlink(const char *name); }
 407	AUE_NULL	NOSTD	{ int ksem_getvalue(semid_t id, int *val); }
 408	AUE_NULL	NOSTD	{ int ksem_destroy(semid_t id); }
 409	AUE_NULL	STD	{ int __mac_get_pid(pid_t pid, \
 				    struct mac *mac_p); }
 410	AUE_NULL	STD	{ int __mac_get_link(const char *path_p, \
 				    struct mac *mac_p); }
 411	AUE_NULL	STD	{ int __mac_set_link(const char *path_p, \
 				    struct mac *mac_p); }
 412	AUE_EXTATTR_SET_LINK	STD	{ int extattr_set_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 413	AUE_EXTATTR_GET_LINK	STD	{ ssize_t extattr_get_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 414	AUE_EXTATTR_DELETE_LINK	STD	{ int extattr_delete_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname); }
 415	AUE_NULL	STD	{ int __mac_execve(char *fname, char **argv, \
 				    char **envv, struct mac *mac_p); }
 416	AUE_SIGACTION	STD	{ int sigaction(int sig, \
 				    const struct sigaction *act, \
 				    struct sigaction *oact); }
 417	AUE_SIGRETURN	STD	{ int sigreturn( \
 				    const struct __ucontext *sigcntxp); }
 418	AUE_NULL	UNIMPL	__xstat
 419	AUE_NULL	UNIMPL	__xfstat
 420	AUE_NULL	UNIMPL	__xlstat
 421	AUE_NULL	STD	{ int getcontext(struct __ucontext *ucp); }
 422	AUE_NULL	STD	{ int setcontext( \
 				    const struct __ucontext *ucp); }
 423	AUE_NULL	STD	{ int swapcontext(struct __ucontext *oucp, \
 				    const struct __ucontext *ucp); }
 424	AUE_SWAPOFF	STD	{ int swapoff(const char *name); }
 425	AUE_NULL	STD	{ int __acl_get_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 426	AUE_NULL	STD	{ int __acl_set_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 427	AUE_NULL	STD	{ int __acl_delete_link(const char *path, \
 				    acl_type_t type); }
 428	AUE_NULL	STD	{ int __acl_aclcheck_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 429	AUE_SIGWAIT	STD	{ int sigwait(const sigset_t *set, \
 				    int *sig); }
 430	AUE_NULL	STD	{ int thr_create(ucontext_t *ctx, long *id, \
 				    int flags); }
 431	AUE_NULL	STD	{ void thr_exit(long *state); }
 432	AUE_NULL	STD	{ int thr_self(long *id); }
 433	AUE_NULL	STD	{ int thr_kill(long id, int sig); }
 434	AUE_NULL	STD	{ int _umtx_lock(struct umtx *umtx); }
 435	AUE_NULL	STD	{ int _umtx_unlock(struct umtx *umtx); }
 436	AUE_NULL	STD	{ int jail_attach(int jid); }
 437	AUE_EXTATTR_LIST_FD	STD	{ ssize_t extattr_list_fd(int fd, \
 				    int attrnamespace, void *data, \
 				    size_t nbytes); }
 438	AUE_EXTATTR_LIST_FILE	STD	{ ssize_t extattr_list_file( \
 				    const char *path, int attrnamespace, \
 				    void *data, size_t nbytes); }
 439	AUE_EXTATTR_LIST_LINK	STD	{ ssize_t extattr_list_link( \
 				    const char *path, int attrnamespace, \
 				    void *data, size_t nbytes); }
 440	AUE_NULL	UNIMPL	kse_switchin
 441	AUE_NULL	NOSTD	{ int ksem_timedwait(semid_t id, \
 				    const struct timespec *abstime); }
 442	AUE_NULL	STD	{ int thr_suspend( \
 				    const struct timespec *timeout); }
 443	AUE_NULL	STD	{ int thr_wake(long id); }
 444	AUE_MODUNLOAD	STD	{ int kldunloadf(int fileid, int flags); }
 445	AUE_AUDIT	STD	{ int audit(const void *record, \
 				    u_int length); }
 446	AUE_AUDITON	STD	{ int auditon(int cmd, void *data, \
 				    u_int length); }
 447	AUE_GETAUID	STD	{ int getauid(uid_t *auid); }
 448	AUE_SETAUID	STD	{ int setauid(uid_t *auid); }
 449	AUE_GETAUDIT	STD	{ int getaudit(struct auditinfo *auditinfo); }
 450	AUE_SETAUDIT	STD	{ int setaudit(struct auditinfo *auditinfo); }
 451	AUE_GETAUDIT_ADDR	STD	{ int getaudit_addr( \
 				    struct auditinfo_addr *auditinfo_addr, \
 				    u_int length); }
 452	AUE_SETAUDIT_ADDR	STD	{ int setaudit_addr( \
 				    struct auditinfo_addr *auditinfo_addr, \
 				    u_int length); }
 453	AUE_AUDITCTL	STD	{ int auditctl(char *path); }
 454	AUE_NULL	STD	{ int _umtx_op(void *obj, int op, \
 				    u_long val, void *uaddr1, void *uaddr2); }
 455	AUE_NULL	STD	{ int thr_new(struct thr_param *param, \
 				    int param_size); }
 456	AUE_NULL	STD	{ int sigqueue(pid_t pid, int signum, void *value); }
 457	AUE_NULL	NOSTD	{ int kmq_open(const char *path, int flags, \
 				    mode_t mode, const struct mq_attr *attr); }
 458	AUE_NULL	NOSTD	{ int kmq_setattr(int mqd,		\
 				    const struct mq_attr *attr,		\
 				    struct mq_attr *oattr); }
 459	AUE_NULL	NOSTD	{ int kmq_timedreceive(int mqd,	\
 				    char *msg_ptr, size_t msg_len,	\
 				    unsigned *msg_prio,			\
 				    const struct timespec *abs_timeout); }
 460	AUE_NULL	NOSTD	{ int kmq_timedsend(int mqd,		\
 				    const char *msg_ptr, size_t msg_len,\
 				    unsigned msg_prio,			\
 				    const struct timespec *abs_timeout);}
 461	AUE_NULL	NOSTD	{ int kmq_notify(int mqd,		\
 				    const struct sigevent *sigev); }
 462	AUE_NULL	NOSTD	{ int kmq_unlink(const char *path); }
 463	AUE_NULL	STD	{ int abort2(const char *why, int nargs, void **args); }
 464	AUE_NULL	STD	{ int thr_set_name(long id, const char *name); }
 465	AUE_NULL	NOSTD	{ int aio_fsync(int op, struct aiocb *aiocbp); }
 466	AUE_RTPRIO	STD	{ int rtprio_thread(int function, \
 				    lwpid_t lwpid, struct rtprio *rtp); }
 467	AUE_NULL	UNIMPL	nosys
 468	AUE_NULL	UNIMPL	nosys
 469	AUE_NULL	UNIMPL	__getpath_fromfd
 470	AUE_NULL	UNIMPL	__getpath_fromaddr
 471	AUE_NULL	STD	{ int sctp_peeloff(int sd, uint32_t name); }
 472     AUE_NULL        STD    { int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \
 	                            caddr_t to, __socklen_t tolen, \
 				    struct sctp_sndrcvinfo *sinfo, int flags); }
 473     AUE_NULL        STD    { int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \
 	                            caddr_t to, __socklen_t tolen, \
 				    struct sctp_sndrcvinfo *sinfo, int flags); }
 474     AUE_NULL        STD    { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \
 				    struct sockaddr * from, __socklen_t *fromlenaddr, \
 				    struct sctp_sndrcvinfo *sinfo, int *msg_flags); }
 475	AUE_PREAD	STD	{ ssize_t pread(int fd, void *buf, \
 				    size_t nbyte, off_t offset); }
 476	AUE_PWRITE	STD	{ ssize_t pwrite(int fd, const void *buf, \
 				    size_t nbyte, off_t offset); }
 477	AUE_MMAP	STD	{ caddr_t mmap(caddr_t addr, size_t len, \
 				    int prot, int flags, int fd, off_t pos); }
 478	AUE_LSEEK	STD	{ off_t lseek(int fd, off_t offset, \
 				    int whence); }
 479	AUE_TRUNCATE	STD	{ int truncate(char *path, off_t length); }
 480	AUE_FTRUNCATE	STD	{ int ftruncate(int fd, off_t length); }
 481	AUE_KILL	STD	{ int thr_kill2(pid_t pid, long id, int sig); }
 482	AUE_SHMOPEN	STD	{ int shm_open(const char *path, int flags, \
 				    mode_t mode); }
 483	AUE_SHMUNLINK	STD	{ int shm_unlink(const char *path); }
 484	AUE_NULL	STD	{ int cpuset(cpusetid_t *setid); }
 485	AUE_NULL	STD	{ int cpuset_setid(cpuwhich_t which, id_t id, \
 				    cpusetid_t setid); }
 486	AUE_NULL	STD	{ int cpuset_getid(cpulevel_t level, \
 				    cpuwhich_t which, id_t id, \
 				    cpusetid_t *setid); }
 487	AUE_NULL	STD	{ int cpuset_getaffinity(cpulevel_t level, \
 				    cpuwhich_t which, id_t id, size_t cpusetsize, \
 				    cpuset_t *mask); }
 488	AUE_NULL	STD	{ int cpuset_setaffinity(cpulevel_t level, \
 				    cpuwhich_t which, id_t id, size_t cpusetsize, \
 				    const cpuset_t *mask); }
 489	AUE_FACCESSAT	STD	{ int faccessat(int fd, char *path, int mode, \
 				    int flag); }
 490	AUE_FCHMODAT	STD	{ int fchmodat(int fd, char *path, mode_t mode, \
 				    int flag); }
 491	AUE_FCHOWNAT	STD	{ int fchownat(int fd, char *path, uid_t uid, \
 				    gid_t gid, int flag); }
 492	AUE_FEXECVE	STD	{ int fexecve(int fd, char **argv, \
 				    char **envv); }
 493	AUE_FSTATAT	STD	{ int fstatat(int fd, char *path, \
 				    struct stat *buf, int flag); }
 494	AUE_FUTIMESAT	STD	{ int futimesat(int fd, char *path, \
 				    struct timeval *times); }
 495	AUE_LINKAT	STD	{ int linkat(int fd1, char *path1, int fd2, \
 				    char *path2, int flag); }
 496	AUE_MKDIRAT	STD	{ int mkdirat(int fd, char *path, mode_t mode); }
 497	AUE_MKFIFOAT	STD	{ int mkfifoat(int fd, char *path, mode_t mode); }
 498	AUE_MKNODAT	STD	{ int mknodat(int fd, char *path, mode_t mode, \
 				    dev_t dev); }
 ; XXX: see the comment for open
 499	AUE_OPENAT_RWTC	STD	{ int openat(int fd, char *path, int flag, \
 				    mode_t mode); }
 500	AUE_READLINKAT	STD	{ int readlinkat(int fd, char *path, char *buf, \
 				    size_t bufsize); }
 501	AUE_RENAMEAT	STD	{ int renameat(int oldfd, char *old, int newfd, \
 				     char *new); }
 502	AUE_SYMLINKAT	STD	{ int symlinkat(char *path1, int fd, \
 				     char *path2); }
 503	AUE_UNLINKAT	STD	{ int unlinkat(int fd, char *path, int flag); }
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
Index: head/sys/kern/systrace_args.c
===================================================================
--- head/sys/kern/systrace_args.c	(revision 178887)
+++ head/sys/kern/systrace_args.c	(revision 178888)
@@ -1,8094 +1,8101 @@
 /*
  * System call argument to DTrace register array converstion.
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
  * This file is part of the DTrace syscall provider.
  */
 
 static void
 systrace_args(int sysnum, void *params, u_int64_t *uarg, int *n_args)
 {
 	int64_t *iarg  = (int64_t *) uarg;
 	switch (sysnum) {
 	/* nosys */
 	case 0: {
 		*n_args = 0;
 		break;
 	}
 	/* sys_exit */
 	case 1: {
 		struct sys_exit_args *p = params;
 		iarg[0] = p->rval; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* fork */
 	case 2: {
 		*n_args = 0;
 		break;
 	}
 	/* read */
 	case 3: {
 		struct read_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* void * */
 		uarg[2] = p->nbyte; /* size_t */
 		*n_args = 3;
 		break;
 	}
 	/* write */
 	case 4: {
 		struct write_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* const void * */
 		uarg[2] = p->nbyte; /* size_t */
 		*n_args = 3;
 		break;
 	}
 	/* open */
 	case 5: {
 		struct open_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->flags; /* int */
 		iarg[2] = p->mode; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* close */
 	case 6: {
 		struct close_args *p = params;
 		iarg[0] = p->fd; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* wait4 */
 	case 7: {
 		struct wait_args *p = params;
 		iarg[0] = p->pid; /* int */
 		uarg[1] = (intptr_t) p->status; /* int * */
 		iarg[2] = p->options; /* int */
 		uarg[3] = (intptr_t) p->rusage; /* struct rusage * */
 		*n_args = 4;
 		break;
 	}
 	/* link */
 	case 9: {
 		struct link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->link; /* char * */
 		*n_args = 2;
 		break;
 	}
 	/* unlink */
 	case 10: {
 		struct unlink_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* chdir */
 	case 12: {
 		struct chdir_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* fchdir */
 	case 13: {
 		struct fchdir_args *p = params;
 		iarg[0] = p->fd; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* mknod */
 	case 14: {
 		struct mknod_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->mode; /* int */
 		iarg[2] = p->dev; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* chmod */
 	case 15: {
 		struct chmod_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->mode; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* chown */
 	case 16: {
 		struct chown_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->uid; /* int */
 		iarg[2] = p->gid; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* obreak */
 	case 17: {
 		struct obreak_args *p = params;
 		uarg[0] = (intptr_t) p->nsize; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* getpid */
 	case 20: {
 		*n_args = 0;
 		break;
 	}
 	/* mount */
 	case 21: {
 		struct mount_args *p = params;
 		uarg[0] = (intptr_t) p->type; /* char * */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		iarg[2] = p->flags; /* int */
 		uarg[3] = (intptr_t) p->data; /* caddr_t */
 		*n_args = 4;
 		break;
 	}
 	/* unmount */
 	case 22: {
 		struct unmount_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->flags; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* setuid */
 	case 23: {
 		struct setuid_args *p = params;
 		uarg[0] = p->uid; /* uid_t */
 		*n_args = 1;
 		break;
 	}
 	/* getuid */
 	case 24: {
 		*n_args = 0;
 		break;
 	}
 	/* geteuid */
 	case 25: {
 		*n_args = 0;
 		break;
 	}
 	/* ptrace */
 	case 26: {
 		struct ptrace_args *p = params;
 		iarg[0] = p->req; /* int */
 		iarg[1] = p->pid; /* pid_t */
 		uarg[2] = (intptr_t) p->addr; /* caddr_t */
 		iarg[3] = p->data; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* recvmsg */
 	case 27: {
 		struct recvmsg_args *p = params;
 		iarg[0] = p->s; /* int */
 		uarg[1] = (intptr_t) p->msg; /* struct msghdr * */
 		iarg[2] = p->flags; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* sendmsg */
 	case 28: {
 		struct sendmsg_args *p = params;
 		iarg[0] = p->s; /* int */
 		uarg[1] = (intptr_t) p->msg; /* struct msghdr * */
 		iarg[2] = p->flags; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* recvfrom */
 	case 29: {
 		struct recvfrom_args *p = params;
 		iarg[0] = p->s; /* int */
 		uarg[1] = (intptr_t) p->buf; /* caddr_t */
 		uarg[2] = p->len; /* size_t */
 		iarg[3] = p->flags; /* int */
 		uarg[4] = (intptr_t) p->from; /* struct sockaddr *__restrict */
 		uarg[5] = (intptr_t) p->fromlenaddr; /* __socklen_t *__restrict */
 		*n_args = 6;
 		break;
 	}
 	/* accept */
 	case 30: {
 		struct accept_args *p = params;
 		iarg[0] = p->s; /* int */
 		uarg[1] = (intptr_t) p->name; /* struct sockaddr *__restrict */
 		uarg[2] = (intptr_t) p->anamelen; /* __socklen_t *__restrict */
 		*n_args = 3;
 		break;
 	}
 	/* getpeername */
 	case 31: {
 		struct getpeername_args *p = params;
 		iarg[0] = p->fdes; /* int */
 		uarg[1] = (intptr_t) p->asa; /* struct sockaddr *__restrict */
 		uarg[2] = (intptr_t) p->alen; /* __socklen_t *__restrict */
 		*n_args = 3;
 		break;
 	}
 	/* getsockname */
 	case 32: {
 		struct getsockname_args *p = params;
 		iarg[0] = p->fdes; /* int */
 		uarg[1] = (intptr_t) p->asa; /* struct sockaddr *__restrict */
 		uarg[2] = (intptr_t) p->alen; /* __socklen_t *__restrict */
 		*n_args = 3;
 		break;
 	}
 	/* access */
 	case 33: {
 		struct access_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->flags; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* chflags */
 	case 34: {
 		struct chflags_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->flags; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* fchflags */
 	case 35: {
 		struct fchflags_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->flags; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* sync */
 	case 36: {
 		*n_args = 0;
 		break;
 	}
 	/* kill */
 	case 37: {
 		struct kill_args *p = params;
 		iarg[0] = p->pid; /* int */
 		iarg[1] = p->signum; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* getppid */
 	case 39: {
 		*n_args = 0;
 		break;
 	}
 	/* dup */
 	case 41: {
 		struct dup_args *p = params;
 		uarg[0] = p->fd; /* u_int */
 		*n_args = 1;
 		break;
 	}
 	/* pipe */
 	case 42: {
 		*n_args = 0;
 		break;
 	}
 	/* getegid */
 	case 43: {
 		*n_args = 0;
 		break;
 	}
 	/* profil */
 	case 44: {
 		struct profil_args *p = params;
 		uarg[0] = (intptr_t) p->samples; /* caddr_t */
 		uarg[1] = p->size; /* size_t */
 		uarg[2] = p->offset; /* size_t */
 		uarg[3] = p->scale; /* u_int */
 		*n_args = 4;
 		break;
 	}
 	/* ktrace */
 	case 45: {
 		struct ktrace_args *p = params;
 		uarg[0] = (intptr_t) p->fname; /* const char * */
 		iarg[1] = p->ops; /* int */
 		iarg[2] = p->facs; /* int */
 		iarg[3] = p->pid; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* getgid */
 	case 47: {
 		*n_args = 0;
 		break;
 	}
 	/* getlogin */
 	case 49: {
 		struct getlogin_args *p = params;
 		uarg[0] = (intptr_t) p->namebuf; /* char * */
 		uarg[1] = p->namelen; /* u_int */
 		*n_args = 2;
 		break;
 	}
 	/* setlogin */
 	case 50: {
 		struct setlogin_args *p = params;
 		uarg[0] = (intptr_t) p->namebuf; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* acct */
 	case 51: {
 		struct acct_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* sigaltstack */
 	case 53: {
 		struct sigaltstack_args *p = params;
 		uarg[0] = (intptr_t) p->ss; /* stack_t * */
 		uarg[1] = (intptr_t) p->oss; /* stack_t * */
 		*n_args = 2;
 		break;
 	}
 	/* ioctl */
 	case 54: {
 		struct ioctl_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = p->com; /* u_long */
 		uarg[2] = (intptr_t) p->data; /* caddr_t */
 		*n_args = 3;
 		break;
 	}
 	/* reboot */
 	case 55: {
 		struct reboot_args *p = params;
 		iarg[0] = p->opt; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* revoke */
 	case 56: {
 		struct revoke_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* symlink */
 	case 57: {
 		struct symlink_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->link; /* char * */
 		*n_args = 2;
 		break;
 	}
 	/* readlink */
 	case 58: {
 		struct readlink_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->buf; /* char * */
 		uarg[2] = p->count; /* size_t */
 		*n_args = 3;
 		break;
 	}
 	/* execve */
 	case 59: {
 		struct execve_args *p = params;
 		uarg[0] = (intptr_t) p->fname; /* char * */
 		uarg[1] = (intptr_t) p->argv; /* char ** */
 		uarg[2] = (intptr_t) p->envv; /* char ** */
 		*n_args = 3;
 		break;
 	}
 	/* umask */
 	case 60: {
 		struct umask_args *p = params;
 		iarg[0] = p->newmask; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* chroot */
 	case 61: {
 		struct chroot_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* msync */
 	case 65: {
 		struct msync_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* void * */
 		uarg[1] = p->len; /* size_t */
 		iarg[2] = p->flags; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* vfork */
 	case 66: {
 		*n_args = 0;
 		break;
 	}
 	/* sbrk */
 	case 69: {
 		struct sbrk_args *p = params;
 		iarg[0] = p->incr; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* sstk */
 	case 70: {
 		struct sstk_args *p = params;
 		iarg[0] = p->incr; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* ovadvise */
 	case 72: {
 		struct ovadvise_args *p = params;
 		iarg[0] = p->anom; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* munmap */
 	case 73: {
 		struct munmap_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* void * */
 		uarg[1] = p->len; /* size_t */
 		*n_args = 2;
 		break;
 	}
 	/* mprotect */
 	case 74: {
 		struct mprotect_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* const void * */
 		uarg[1] = p->len; /* size_t */
 		iarg[2] = p->prot; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* madvise */
 	case 75: {
 		struct madvise_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* void * */
 		uarg[1] = p->len; /* size_t */
 		iarg[2] = p->behav; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* mincore */
 	case 78: {
 		struct mincore_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* const void * */
 		uarg[1] = p->len; /* size_t */
 		uarg[2] = (intptr_t) p->vec; /* char * */
 		*n_args = 3;
 		break;
 	}
 	/* getgroups */
 	case 79: {
 		struct getgroups_args *p = params;
 		uarg[0] = p->gidsetsize; /* u_int */
 		uarg[1] = (intptr_t) p->gidset; /* gid_t * */
 		*n_args = 2;
 		break;
 	}
 	/* setgroups */
 	case 80: {
 		struct setgroups_args *p = params;
 		uarg[0] = p->gidsetsize; /* u_int */
 		uarg[1] = (intptr_t) p->gidset; /* gid_t * */
 		*n_args = 2;
 		break;
 	}
 	/* getpgrp */
 	case 81: {
 		*n_args = 0;
 		break;
 	}
 	/* setpgid */
 	case 82: {
 		struct setpgid_args *p = params;
 		iarg[0] = p->pid; /* int */
 		iarg[1] = p->pgid; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* setitimer */
 	case 83: {
 		struct setitimer_args *p = params;
 		uarg[0] = p->which; /* u_int */
 		uarg[1] = (intptr_t) p->itv; /* struct itimerval * */
 		uarg[2] = (intptr_t) p->oitv; /* struct itimerval * */
 		*n_args = 3;
 		break;
 	}
 	/* swapon */
 	case 85: {
 		struct swapon_args *p = params;
 		uarg[0] = (intptr_t) p->name; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* getitimer */
 	case 86: {
 		struct getitimer_args *p = params;
 		uarg[0] = p->which; /* u_int */
 		uarg[1] = (intptr_t) p->itv; /* struct itimerval * */
 		*n_args = 2;
 		break;
 	}
 	/* getdtablesize */
 	case 89: {
 		*n_args = 0;
 		break;
 	}
 	/* dup2 */
 	case 90: {
 		struct dup2_args *p = params;
 		uarg[0] = p->from; /* u_int */
 		uarg[1] = p->to; /* u_int */
 		*n_args = 2;
 		break;
 	}
 	/* fcntl */
 	case 92: {
 		struct fcntl_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->cmd; /* int */
 		iarg[2] = p->arg; /* long */
 		*n_args = 3;
 		break;
 	}
 	/* select */
 	case 93: {
 		struct select_args *p = params;
 		iarg[0] = p->nd; /* int */
 		uarg[1] = (intptr_t) p->in; /* fd_set * */
 		uarg[2] = (intptr_t) p->ou; /* fd_set * */
 		uarg[3] = (intptr_t) p->ex; /* fd_set * */
 		uarg[4] = (intptr_t) p->tv; /* struct timeval * */
 		*n_args = 5;
 		break;
 	}
 	/* fsync */
 	case 95: {
 		struct fsync_args *p = params;
 		iarg[0] = p->fd; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* setpriority */
 	case 96: {
 		struct setpriority_args *p = params;
 		iarg[0] = p->which; /* int */
 		iarg[1] = p->who; /* int */
 		iarg[2] = p->prio; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* socket */
 	case 97: {
 		struct socket_args *p = params;
 		iarg[0] = p->domain; /* int */
 		iarg[1] = p->type; /* int */
 		iarg[2] = p->protocol; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* connect */
 	case 98: {
 		struct connect_args *p = params;
 		iarg[0] = p->s; /* int */
 		uarg[1] = (intptr_t) p->name; /* caddr_t */
 		iarg[2] = p->namelen; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* getpriority */
 	case 100: {
 		struct getpriority_args *p = params;
 		iarg[0] = p->which; /* int */
 		iarg[1] = p->who; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* bind */
 	case 104: {
 		struct bind_args *p = params;
 		iarg[0] = p->s; /* int */
 		uarg[1] = (intptr_t) p->name; /* caddr_t */
 		iarg[2] = p->namelen; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* setsockopt */
 	case 105: {
 		struct setsockopt_args *p = params;
 		iarg[0] = p->s; /* int */
 		iarg[1] = p->level; /* int */
 		iarg[2] = p->name; /* int */
 		uarg[3] = (intptr_t) p->val; /* caddr_t */
 		iarg[4] = p->valsize; /* int */
 		*n_args = 5;
 		break;
 	}
 	/* listen */
 	case 106: {
 		struct listen_args *p = params;
 		iarg[0] = p->s; /* int */
 		iarg[1] = p->backlog; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* gettimeofday */
 	case 116: {
 		struct gettimeofday_args *p = params;
 		uarg[0] = (intptr_t) p->tp; /* struct timeval * */
 		uarg[1] = (intptr_t) p->tzp; /* struct timezone * */
 		*n_args = 2;
 		break;
 	}
 	/* getrusage */
 	case 117: {
 		struct getrusage_args *p = params;
 		iarg[0] = p->who; /* int */
 		uarg[1] = (intptr_t) p->rusage; /* struct rusage * */
 		*n_args = 2;
 		break;
 	}
 	/* getsockopt */
 	case 118: {
 		struct getsockopt_args *p = params;
 		iarg[0] = p->s; /* int */
 		iarg[1] = p->level; /* int */
 		iarg[2] = p->name; /* int */
 		uarg[3] = (intptr_t) p->val; /* caddr_t */
 		uarg[4] = (intptr_t) p->avalsize; /* int * */
 		*n_args = 5;
 		break;
 	}
 	/* readv */
 	case 120: {
 		struct readv_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->iovp; /* struct iovec * */
 		uarg[2] = p->iovcnt; /* u_int */
 		*n_args = 3;
 		break;
 	}
 	/* writev */
 	case 121: {
 		struct writev_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->iovp; /* struct iovec * */
 		uarg[2] = p->iovcnt; /* u_int */
 		*n_args = 3;
 		break;
 	}
 	/* settimeofday */
 	case 122: {
 		struct settimeofday_args *p = params;
 		uarg[0] = (intptr_t) p->tv; /* struct timeval * */
 		uarg[1] = (intptr_t) p->tzp; /* struct timezone * */
 		*n_args = 2;
 		break;
 	}
 	/* fchown */
 	case 123: {
 		struct fchown_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->uid; /* int */
 		iarg[2] = p->gid; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* fchmod */
 	case 124: {
 		struct fchmod_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->mode; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* setreuid */
 	case 126: {
 		struct setreuid_args *p = params;
 		iarg[0] = p->ruid; /* int */
 		iarg[1] = p->euid; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* setregid */
 	case 127: {
 		struct setregid_args *p = params;
 		iarg[0] = p->rgid; /* int */
 		iarg[1] = p->egid; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* rename */
 	case 128: {
 		struct rename_args *p = params;
 		uarg[0] = (intptr_t) p->from; /* char * */
 		uarg[1] = (intptr_t) p->to; /* char * */
 		*n_args = 2;
 		break;
 	}
 	/* flock */
 	case 131: {
 		struct flock_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->how; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* mkfifo */
 	case 132: {
 		struct mkfifo_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->mode; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* sendto */
 	case 133: {
 		struct sendto_args *p = params;
 		iarg[0] = p->s; /* int */
 		uarg[1] = (intptr_t) p->buf; /* caddr_t */
 		uarg[2] = p->len; /* size_t */
 		iarg[3] = p->flags; /* int */
 		uarg[4] = (intptr_t) p->to; /* caddr_t */
 		iarg[5] = p->tolen; /* int */
 		*n_args = 6;
 		break;
 	}
 	/* shutdown */
 	case 134: {
 		struct shutdown_args *p = params;
 		iarg[0] = p->s; /* int */
 		iarg[1] = p->how; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* socketpair */
 	case 135: {
 		struct socketpair_args *p = params;
 		iarg[0] = p->domain; /* int */
 		iarg[1] = p->type; /* int */
 		iarg[2] = p->protocol; /* int */
 		uarg[3] = (intptr_t) p->rsv; /* int * */
 		*n_args = 4;
 		break;
 	}
 	/* mkdir */
 	case 136: {
 		struct mkdir_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->mode; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* rmdir */
 	case 137: {
 		struct rmdir_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* utimes */
 	case 138: {
 		struct utimes_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->tptr; /* struct timeval * */
 		*n_args = 2;
 		break;
 	}
 	/* adjtime */
 	case 140: {
 		struct adjtime_args *p = params;
 		uarg[0] = (intptr_t) p->delta; /* struct timeval * */
 		uarg[1] = (intptr_t) p->olddelta; /* struct timeval * */
 		*n_args = 2;
 		break;
 	}
 	/* setsid */
 	case 147: {
 		*n_args = 0;
 		break;
 	}
 	/* quotactl */
 	case 148: {
 		struct quotactl_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->cmd; /* int */
 		iarg[2] = p->uid; /* int */
 		uarg[3] = (intptr_t) p->arg; /* caddr_t */
 		*n_args = 4;
 		break;
 	}
 	/* nlm_syscall */
 	case 154: {
 		struct nlm_syscall_args *p = params;
 		iarg[0] = p->debug_level; /* int */
 		iarg[1] = p->grace_period; /* int */
 		iarg[2] = p->addr_count; /* int */
 		uarg[3] = (intptr_t) p->addrs; /* char ** */
 		*n_args = 4;
 		break;
 	}
 	/* nfssvc */
 	case 155: {
 		struct nfssvc_args *p = params;
 		iarg[0] = p->flag; /* int */
 		uarg[1] = (intptr_t) p->argp; /* caddr_t */
 		*n_args = 2;
 		break;
 	}
 	/* lgetfh */
 	case 160: {
 		struct lgetfh_args *p = params;
 		uarg[0] = (intptr_t) p->fname; /* char * */
 		uarg[1] = (intptr_t) p->fhp; /* struct fhandle * */
 		*n_args = 2;
 		break;
 	}
 	/* getfh */
 	case 161: {
 		struct getfh_args *p = params;
 		uarg[0] = (intptr_t) p->fname; /* char * */
 		uarg[1] = (intptr_t) p->fhp; /* struct fhandle * */
 		*n_args = 2;
 		break;
 	}
 	/* getdomainname */
 	case 162: {
 		struct getdomainname_args *p = params;
 		uarg[0] = (intptr_t) p->domainname; /* char * */
 		iarg[1] = p->len; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* setdomainname */
 	case 163: {
 		struct setdomainname_args *p = params;
 		uarg[0] = (intptr_t) p->domainname; /* char * */
 		iarg[1] = p->len; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* uname */
 	case 164: {
 		struct uname_args *p = params;
 		uarg[0] = (intptr_t) p->name; /* struct utsname * */
 		*n_args = 1;
 		break;
 	}
 	/* sysarch */
 	case 165: {
 		struct sysarch_args *p = params;
 		iarg[0] = p->op; /* int */
 		uarg[1] = (intptr_t) p->parms; /* char * */
 		*n_args = 2;
 		break;
 	}
 	/* rtprio */
 	case 166: {
 		struct rtprio_args *p = params;
 		iarg[0] = p->function; /* int */
 		iarg[1] = p->pid; /* pid_t */
 		uarg[2] = (intptr_t) p->rtp; /* struct rtprio * */
 		*n_args = 3;
 		break;
 	}
 	/* semsys */
 	case 169: {
 		struct semsys_args *p = params;
 		iarg[0] = p->which; /* int */
 		iarg[1] = p->a2; /* int */
 		iarg[2] = p->a3; /* int */
 		iarg[3] = p->a4; /* int */
 		iarg[4] = p->a5; /* int */
 		*n_args = 5;
 		break;
 	}
 	/* msgsys */
 	case 170: {
 		struct msgsys_args *p = params;
 		iarg[0] = p->which; /* int */
 		iarg[1] = p->a2; /* int */
 		iarg[2] = p->a3; /* int */
 		iarg[3] = p->a4; /* int */
 		iarg[4] = p->a5; /* int */
 		iarg[5] = p->a6; /* int */
 		*n_args = 6;
 		break;
 	}
 	/* shmsys */
 	case 171: {
 		struct shmsys_args *p = params;
 		iarg[0] = p->which; /* int */
 		iarg[1] = p->a2; /* int */
 		iarg[2] = p->a3; /* int */
 		iarg[3] = p->a4; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* freebsd6_pread */
 	case 173: {
 		struct freebsd6_pread_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* void * */
 		uarg[2] = p->nbyte; /* size_t */
 		iarg[3] = p->pad; /* int */
 		iarg[4] = p->offset; /* off_t */
 		*n_args = 5;
 		break;
 	}
 	/* freebsd6_pwrite */
 	case 174: {
 		struct freebsd6_pwrite_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* const void * */
 		uarg[2] = p->nbyte; /* size_t */
 		iarg[3] = p->pad; /* int */
 		iarg[4] = p->offset; /* off_t */
 		*n_args = 5;
 		break;
 	}
+	/* setfib */
+	case 175: {
+		struct setfib_args *p = params;
+		iarg[0] = p->fibnum; /* int */
+		*n_args = 1;
+		break;
+	}
 	/* ntp_adjtime */
 	case 176: {
 		struct ntp_adjtime_args *p = params;
 		uarg[0] = (intptr_t) p->tp; /* struct timex * */
 		*n_args = 1;
 		break;
 	}
 	/* setgid */
 	case 181: {
 		struct setgid_args *p = params;
 		iarg[0] = p->gid; /* gid_t */
 		*n_args = 1;
 		break;
 	}
 	/* setegid */
 	case 182: {
 		struct setegid_args *p = params;
 		iarg[0] = p->egid; /* gid_t */
 		*n_args = 1;
 		break;
 	}
 	/* seteuid */
 	case 183: {
 		struct seteuid_args *p = params;
 		uarg[0] = p->euid; /* uid_t */
 		*n_args = 1;
 		break;
 	}
 	/* stat */
 	case 188: {
 		struct stat_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->ub; /* struct stat * */
 		*n_args = 2;
 		break;
 	}
 	/* fstat */
 	case 189: {
 		struct fstat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->sb; /* struct stat * */
 		*n_args = 2;
 		break;
 	}
 	/* lstat */
 	case 190: {
 		struct lstat_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->ub; /* struct stat * */
 		*n_args = 2;
 		break;
 	}
 	/* pathconf */
 	case 191: {
 		struct pathconf_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->name; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* fpathconf */
 	case 192: {
 		struct fpathconf_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->name; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* getrlimit */
 	case 194: {
 		struct __getrlimit_args *p = params;
 		uarg[0] = p->which; /* u_int */
 		uarg[1] = (intptr_t) p->rlp; /* struct rlimit * */
 		*n_args = 2;
 		break;
 	}
 	/* setrlimit */
 	case 195: {
 		struct __setrlimit_args *p = params;
 		uarg[0] = p->which; /* u_int */
 		uarg[1] = (intptr_t) p->rlp; /* struct rlimit * */
 		*n_args = 2;
 		break;
 	}
 	/* getdirentries */
 	case 196: {
 		struct getdirentries_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* char * */
 		uarg[2] = p->count; /* u_int */
 		uarg[3] = (intptr_t) p->basep; /* long * */
 		*n_args = 4;
 		break;
 	}
 	/* freebsd6_mmap */
 	case 197: {
 		struct freebsd6_mmap_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* caddr_t */
 		uarg[1] = p->len; /* size_t */
 		iarg[2] = p->prot; /* int */
 		iarg[3] = p->flags; /* int */
 		iarg[4] = p->fd; /* int */
 		iarg[5] = p->pad; /* int */
 		iarg[6] = p->pos; /* off_t */
 		*n_args = 7;
 		break;
 	}
 	/* nosys */
 	case 198: {
 		*n_args = 0;
 		break;
 	}
 	/* freebsd6_lseek */
 	case 199: {
 		struct freebsd6_lseek_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->pad; /* int */
 		iarg[2] = p->offset; /* off_t */
 		iarg[3] = p->whence; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* freebsd6_truncate */
 	case 200: {
 		struct freebsd6_truncate_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->pad; /* int */
 		iarg[2] = p->length; /* off_t */
 		*n_args = 3;
 		break;
 	}
 	/* freebsd6_ftruncate */
 	case 201: {
 		struct freebsd6_ftruncate_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->pad; /* int */
 		iarg[2] = p->length; /* off_t */
 		*n_args = 3;
 		break;
 	}
 	/* __sysctl */
 	case 202: {
 		struct sysctl_args *p = params;
 		uarg[0] = (intptr_t) p->name; /* int * */
 		uarg[1] = p->namelen; /* u_int */
 		uarg[2] = (intptr_t) p->old; /* void * */
 		uarg[3] = (intptr_t) p->oldlenp; /* size_t * */
 		uarg[4] = (intptr_t) p->new; /* void * */
 		uarg[5] = p->newlen; /* size_t */
 		*n_args = 6;
 		break;
 	}
 	/* mlock */
 	case 203: {
 		struct mlock_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* const void * */
 		uarg[1] = p->len; /* size_t */
 		*n_args = 2;
 		break;
 	}
 	/* munlock */
 	case 204: {
 		struct munlock_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* const void * */
 		uarg[1] = p->len; /* size_t */
 		*n_args = 2;
 		break;
 	}
 	/* undelete */
 	case 205: {
 		struct undelete_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* futimes */
 	case 206: {
 		struct futimes_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->tptr; /* struct timeval * */
 		*n_args = 2;
 		break;
 	}
 	/* getpgid */
 	case 207: {
 		struct getpgid_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		*n_args = 1;
 		break;
 	}
 	/* poll */
 	case 209: {
 		struct poll_args *p = params;
 		uarg[0] = (intptr_t) p->fds; /* struct pollfd * */
 		uarg[1] = p->nfds; /* u_int */
 		iarg[2] = p->timeout; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* lkmnosys */
 	case 210: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 211: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 212: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 213: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 214: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 215: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 216: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 217: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 218: {
 		*n_args = 0;
 		break;
 	}
 	/* lkmnosys */
 	case 219: {
 		*n_args = 0;
 		break;
 	}
 	/* __semctl */
 	case 220: {
 		struct __semctl_args *p = params;
 		iarg[0] = p->semid; /* int */
 		iarg[1] = p->semnum; /* int */
 		iarg[2] = p->cmd; /* int */
 		uarg[3] = (intptr_t) p->arg; /* union semun * */
 		*n_args = 4;
 		break;
 	}
 	/* semget */
 	case 221: {
 		struct semget_args *p = params;
 		iarg[0] = p->key; /* key_t */
 		iarg[1] = p->nsems; /* int */
 		iarg[2] = p->semflg; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* semop */
 	case 222: {
 		struct semop_args *p = params;
 		iarg[0] = p->semid; /* int */
 		uarg[1] = (intptr_t) p->sops; /* struct sembuf * */
 		uarg[2] = p->nsops; /* size_t */
 		*n_args = 3;
 		break;
 	}
 	/* msgctl */
 	case 224: {
 		struct msgctl_args *p = params;
 		iarg[0] = p->msqid; /* int */
 		iarg[1] = p->cmd; /* int */
 		uarg[2] = (intptr_t) p->buf; /* struct msqid_ds * */
 		*n_args = 3;
 		break;
 	}
 	/* msgget */
 	case 225: {
 		struct msgget_args *p = params;
 		iarg[0] = p->key; /* key_t */
 		iarg[1] = p->msgflg; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* msgsnd */
 	case 226: {
 		struct msgsnd_args *p = params;
 		iarg[0] = p->msqid; /* int */
 		uarg[1] = (intptr_t) p->msgp; /* const void * */
 		uarg[2] = p->msgsz; /* size_t */
 		iarg[3] = p->msgflg; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* msgrcv */
 	case 227: {
 		struct msgrcv_args *p = params;
 		iarg[0] = p->msqid; /* int */
 		uarg[1] = (intptr_t) p->msgp; /* void * */
 		uarg[2] = p->msgsz; /* size_t */
 		iarg[3] = p->msgtyp; /* long */
 		iarg[4] = p->msgflg; /* int */
 		*n_args = 5;
 		break;
 	}
 	/* shmat */
 	case 228: {
 		struct shmat_args *p = params;
 		iarg[0] = p->shmid; /* int */
 		uarg[1] = (intptr_t) p->shmaddr; /* const void * */
 		iarg[2] = p->shmflg; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* shmctl */
 	case 229: {
 		struct shmctl_args *p = params;
 		iarg[0] = p->shmid; /* int */
 		iarg[1] = p->cmd; /* int */
 		uarg[2] = (intptr_t) p->buf; /* struct shmid_ds * */
 		*n_args = 3;
 		break;
 	}
 	/* shmdt */
 	case 230: {
 		struct shmdt_args *p = params;
 		uarg[0] = (intptr_t) p->shmaddr; /* const void * */
 		*n_args = 1;
 		break;
 	}
 	/* shmget */
 	case 231: {
 		struct shmget_args *p = params;
 		iarg[0] = p->key; /* key_t */
 		uarg[1] = p->size; /* size_t */
 		iarg[2] = p->shmflg; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* clock_gettime */
 	case 232: {
 		struct clock_gettime_args *p = params;
 		iarg[0] = p->clock_id; /* clockid_t */
 		uarg[1] = (intptr_t) p->tp; /* struct timespec * */
 		*n_args = 2;
 		break;
 	}
 	/* clock_settime */
 	case 233: {
 		struct clock_settime_args *p = params;
 		iarg[0] = p->clock_id; /* clockid_t */
 		uarg[1] = (intptr_t) p->tp; /* const struct timespec * */
 		*n_args = 2;
 		break;
 	}
 	/* clock_getres */
 	case 234: {
 		struct clock_getres_args *p = params;
 		iarg[0] = p->clock_id; /* clockid_t */
 		uarg[1] = (intptr_t) p->tp; /* struct timespec * */
 		*n_args = 2;
 		break;
 	}
 	/* ktimer_create */
 	case 235: {
 		struct ktimer_create_args *p = params;
 		iarg[0] = p->clock_id; /* clockid_t */
 		uarg[1] = (intptr_t) p->evp; /* struct sigevent * */
 		uarg[2] = (intptr_t) p->timerid; /* int * */
 		*n_args = 3;
 		break;
 	}
 	/* ktimer_delete */
 	case 236: {
 		struct ktimer_delete_args *p = params;
 		iarg[0] = p->timerid; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* ktimer_settime */
 	case 237: {
 		struct ktimer_settime_args *p = params;
 		iarg[0] = p->timerid; /* int */
 		iarg[1] = p->flags; /* int */
 		uarg[2] = (intptr_t) p->value; /* const struct itimerspec * */
 		uarg[3] = (intptr_t) p->ovalue; /* struct itimerspec * */
 		*n_args = 4;
 		break;
 	}
 	/* ktimer_gettime */
 	case 238: {
 		struct ktimer_gettime_args *p = params;
 		iarg[0] = p->timerid; /* int */
 		uarg[1] = (intptr_t) p->value; /* struct itimerspec * */
 		*n_args = 2;
 		break;
 	}
 	/* ktimer_getoverrun */
 	case 239: {
 		struct ktimer_getoverrun_args *p = params;
 		iarg[0] = p->timerid; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* nanosleep */
 	case 240: {
 		struct nanosleep_args *p = params;
 		uarg[0] = (intptr_t) p->rqtp; /* const struct timespec * */
 		uarg[1] = (intptr_t) p->rmtp; /* struct timespec * */
 		*n_args = 2;
 		break;
 	}
 	/* ntp_gettime */
 	case 248: {
 		struct ntp_gettime_args *p = params;
 		uarg[0] = (intptr_t) p->ntvp; /* struct ntptimeval * */
 		*n_args = 1;
 		break;
 	}
 	/* minherit */
 	case 250: {
 		struct minherit_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* void * */
 		uarg[1] = p->len; /* size_t */
 		iarg[2] = p->inherit; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* rfork */
 	case 251: {
 		struct rfork_args *p = params;
 		iarg[0] = p->flags; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* openbsd_poll */
 	case 252: {
 		struct openbsd_poll_args *p = params;
 		uarg[0] = (intptr_t) p->fds; /* struct pollfd * */
 		uarg[1] = p->nfds; /* u_int */
 		iarg[2] = p->timeout; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* issetugid */
 	case 253: {
 		*n_args = 0;
 		break;
 	}
 	/* lchown */
 	case 254: {
 		struct lchown_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->uid; /* int */
 		iarg[2] = p->gid; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* aio_read */
 	case 255: {
 		struct aio_read_args *p = params;
 		uarg[0] = (intptr_t) p->aiocbp; /* struct aiocb * */
 		*n_args = 1;
 		break;
 	}
 	/* aio_write */
 	case 256: {
 		struct aio_write_args *p = params;
 		uarg[0] = (intptr_t) p->aiocbp; /* struct aiocb * */
 		*n_args = 1;
 		break;
 	}
 	/* lio_listio */
 	case 257: {
 		struct lio_listio_args *p = params;
 		iarg[0] = p->mode; /* int */
 		uarg[1] = (intptr_t) p->acb_list; /* struct aiocb *const * */
 		iarg[2] = p->nent; /* int */
 		uarg[3] = (intptr_t) p->sig; /* struct sigevent * */
 		*n_args = 4;
 		break;
 	}
 	/* getdents */
 	case 272: {
 		struct getdents_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* char * */
 		uarg[2] = p->count; /* size_t */
 		*n_args = 3;
 		break;
 	}
 	/* lchmod */
 	case 274: {
 		struct lchmod_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->mode; /* mode_t */
 		*n_args = 2;
 		break;
 	}
 	/* lchown */
 	case 275: {
 		struct lchown_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = p->uid; /* uid_t */
 		iarg[2] = p->gid; /* gid_t */
 		*n_args = 3;
 		break;
 	}
 	/* lutimes */
 	case 276: {
 		struct lutimes_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->tptr; /* struct timeval * */
 		*n_args = 2;
 		break;
 	}
 	/* msync */
 	case 277: {
 		struct msync_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* void * */
 		uarg[1] = p->len; /* size_t */
 		iarg[2] = p->flags; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* nstat */
 	case 278: {
 		struct nstat_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->ub; /* struct nstat * */
 		*n_args = 2;
 		break;
 	}
 	/* nfstat */
 	case 279: {
 		struct nfstat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->sb; /* struct nstat * */
 		*n_args = 2;
 		break;
 	}
 	/* nlstat */
 	case 280: {
 		struct nlstat_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->ub; /* struct nstat * */
 		*n_args = 2;
 		break;
 	}
 	/* preadv */
 	case 289: {
 		struct preadv_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->iovp; /* struct iovec * */
 		uarg[2] = p->iovcnt; /* u_int */
 		iarg[3] = p->offset; /* off_t */
 		*n_args = 4;
 		break;
 	}
 	/* pwritev */
 	case 290: {
 		struct pwritev_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->iovp; /* struct iovec * */
 		uarg[2] = p->iovcnt; /* u_int */
 		iarg[3] = p->offset; /* off_t */
 		*n_args = 4;
 		break;
 	}
 	/* fhopen */
 	case 298: {
 		struct fhopen_args *p = params;
 		uarg[0] = (intptr_t) p->u_fhp; /* const struct fhandle * */
 		iarg[1] = p->flags; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* fhstat */
 	case 299: {
 		struct fhstat_args *p = params;
 		uarg[0] = (intptr_t) p->u_fhp; /* const struct fhandle * */
 		uarg[1] = (intptr_t) p->sb; /* struct stat * */
 		*n_args = 2;
 		break;
 	}
 	/* modnext */
 	case 300: {
 		struct modnext_args *p = params;
 		iarg[0] = p->modid; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* modstat */
 	case 301: {
 		struct modstat_args *p = params;
 		iarg[0] = p->modid; /* int */
 		uarg[1] = (intptr_t) p->stat; /* struct module_stat * */
 		*n_args = 2;
 		break;
 	}
 	/* modfnext */
 	case 302: {
 		struct modfnext_args *p = params;
 		iarg[0] = p->modid; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* modfind */
 	case 303: {
 		struct modfind_args *p = params;
 		uarg[0] = (intptr_t) p->name; /* const char * */
 		*n_args = 1;
 		break;
 	}
 	/* kldload */
 	case 304: {
 		struct kldload_args *p = params;
 		uarg[0] = (intptr_t) p->file; /* const char * */
 		*n_args = 1;
 		break;
 	}
 	/* kldunload */
 	case 305: {
 		struct kldunload_args *p = params;
 		iarg[0] = p->fileid; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* kldfind */
 	case 306: {
 		struct kldfind_args *p = params;
 		uarg[0] = (intptr_t) p->file; /* const char * */
 		*n_args = 1;
 		break;
 	}
 	/* kldnext */
 	case 307: {
 		struct kldnext_args *p = params;
 		iarg[0] = p->fileid; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* kldstat */
 	case 308: {
 		struct kldstat_args *p = params;
 		iarg[0] = p->fileid; /* int */
 		uarg[1] = (intptr_t) p->stat; /* struct kld_file_stat * */
 		*n_args = 2;
 		break;
 	}
 	/* kldfirstmod */
 	case 309: {
 		struct kldfirstmod_args *p = params;
 		iarg[0] = p->fileid; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* getsid */
 	case 310: {
 		struct getsid_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		*n_args = 1;
 		break;
 	}
 	/* setresuid */
 	case 311: {
 		struct setresuid_args *p = params;
 		uarg[0] = p->ruid; /* uid_t */
 		uarg[1] = p->euid; /* uid_t */
 		uarg[2] = p->suid; /* uid_t */
 		*n_args = 3;
 		break;
 	}
 	/* setresgid */
 	case 312: {
 		struct setresgid_args *p = params;
 		iarg[0] = p->rgid; /* gid_t */
 		iarg[1] = p->egid; /* gid_t */
 		iarg[2] = p->sgid; /* gid_t */
 		*n_args = 3;
 		break;
 	}
 	/* aio_return */
 	case 314: {
 		struct aio_return_args *p = params;
 		uarg[0] = (intptr_t) p->aiocbp; /* struct aiocb * */
 		*n_args = 1;
 		break;
 	}
 	/* aio_suspend */
 	case 315: {
 		struct aio_suspend_args *p = params;
 		uarg[0] = (intptr_t) p->aiocbp; /* struct aiocb *const * */
 		iarg[1] = p->nent; /* int */
 		uarg[2] = (intptr_t) p->timeout; /* const struct timespec * */
 		*n_args = 3;
 		break;
 	}
 	/* aio_cancel */
 	case 316: {
 		struct aio_cancel_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->aiocbp; /* struct aiocb * */
 		*n_args = 2;
 		break;
 	}
 	/* aio_error */
 	case 317: {
 		struct aio_error_args *p = params;
 		uarg[0] = (intptr_t) p->aiocbp; /* struct aiocb * */
 		*n_args = 1;
 		break;
 	}
 	/* oaio_read */
 	case 318: {
 		struct oaio_read_args *p = params;
 		uarg[0] = (intptr_t) p->aiocbp; /* struct oaiocb * */
 		*n_args = 1;
 		break;
 	}
 	/* oaio_write */
 	case 319: {
 		struct oaio_write_args *p = params;
 		uarg[0] = (intptr_t) p->aiocbp; /* struct oaiocb * */
 		*n_args = 1;
 		break;
 	}
 	/* olio_listio */
 	case 320: {
 		struct olio_listio_args *p = params;
 		iarg[0] = p->mode; /* int */
 		uarg[1] = (intptr_t) p->acb_list; /* struct oaiocb *const * */
 		iarg[2] = p->nent; /* int */
 		uarg[3] = (intptr_t) p->sig; /* struct osigevent * */
 		*n_args = 4;
 		break;
 	}
 	/* yield */
 	case 321: {
 		*n_args = 0;
 		break;
 	}
 	/* mlockall */
 	case 324: {
 		struct mlockall_args *p = params;
 		iarg[0] = p->how; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* munlockall */
 	case 325: {
 		*n_args = 0;
 		break;
 	}
 	/* __getcwd */
 	case 326: {
 		struct __getcwd_args *p = params;
 		uarg[0] = (intptr_t) p->buf; /* u_char * */
 		uarg[1] = p->buflen; /* u_int */
 		*n_args = 2;
 		break;
 	}
 	/* sched_setparam */
 	case 327: {
 		struct sched_setparam_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		uarg[1] = (intptr_t) p->param; /* const struct sched_param * */
 		*n_args = 2;
 		break;
 	}
 	/* sched_getparam */
 	case 328: {
 		struct sched_getparam_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		uarg[1] = (intptr_t) p->param; /* struct sched_param * */
 		*n_args = 2;
 		break;
 	}
 	/* sched_setscheduler */
 	case 329: {
 		struct sched_setscheduler_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		iarg[1] = p->policy; /* int */
 		uarg[2] = (intptr_t) p->param; /* const struct sched_param * */
 		*n_args = 3;
 		break;
 	}
 	/* sched_getscheduler */
 	case 330: {
 		struct sched_getscheduler_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		*n_args = 1;
 		break;
 	}
 	/* sched_yield */
 	case 331: {
 		*n_args = 0;
 		break;
 	}
 	/* sched_get_priority_max */
 	case 332: {
 		struct sched_get_priority_max_args *p = params;
 		iarg[0] = p->policy; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* sched_get_priority_min */
 	case 333: {
 		struct sched_get_priority_min_args *p = params;
 		iarg[0] = p->policy; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* sched_rr_get_interval */
 	case 334: {
 		struct sched_rr_get_interval_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		uarg[1] = (intptr_t) p->interval; /* struct timespec * */
 		*n_args = 2;
 		break;
 	}
 	/* utrace */
 	case 335: {
 		struct utrace_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* const void * */
 		uarg[1] = p->len; /* size_t */
 		*n_args = 2;
 		break;
 	}
 	/* kldsym */
 	case 337: {
 		struct kldsym_args *p = params;
 		iarg[0] = p->fileid; /* int */
 		iarg[1] = p->cmd; /* int */
 		uarg[2] = (intptr_t) p->data; /* void * */
 		*n_args = 3;
 		break;
 	}
 	/* jail */
 	case 338: {
 		struct jail_args *p = params;
 		uarg[0] = (intptr_t) p->jail; /* struct jail * */
 		*n_args = 1;
 		break;
 	}
 	/* sigprocmask */
 	case 340: {
 		struct sigprocmask_args *p = params;
 		iarg[0] = p->how; /* int */
 		uarg[1] = (intptr_t) p->set; /* const sigset_t * */
 		uarg[2] = (intptr_t) p->oset; /* sigset_t * */
 		*n_args = 3;
 		break;
 	}
 	/* sigsuspend */
 	case 341: {
 		struct sigsuspend_args *p = params;
 		uarg[0] = (intptr_t) p->sigmask; /* const sigset_t * */
 		*n_args = 1;
 		break;
 	}
 	/* sigpending */
 	case 343: {
 		struct sigpending_args *p = params;
 		uarg[0] = (intptr_t) p->set; /* sigset_t * */
 		*n_args = 1;
 		break;
 	}
 	/* sigtimedwait */
 	case 345: {
 		struct sigtimedwait_args *p = params;
 		uarg[0] = (intptr_t) p->set; /* const sigset_t * */
 		uarg[1] = (intptr_t) p->info; /* siginfo_t * */
 		uarg[2] = (intptr_t) p->timeout; /* const struct timespec * */
 		*n_args = 3;
 		break;
 	}
 	/* sigwaitinfo */
 	case 346: {
 		struct sigwaitinfo_args *p = params;
 		uarg[0] = (intptr_t) p->set; /* const sigset_t * */
 		uarg[1] = (intptr_t) p->info; /* siginfo_t * */
 		*n_args = 2;
 		break;
 	}
 	/* __acl_get_file */
 	case 347: {
 		struct __acl_get_file_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* __acl_set_file */
 	case 348: {
 		struct __acl_set_file_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* __acl_get_fd */
 	case 349: {
 		struct __acl_get_fd_args *p = params;
 		iarg[0] = p->filedes; /* int */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* __acl_set_fd */
 	case 350: {
 		struct __acl_set_fd_args *p = params;
 		iarg[0] = p->filedes; /* int */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* __acl_delete_file */
 	case 351: {
 		struct __acl_delete_file_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->type; /* acl_type_t */
 		*n_args = 2;
 		break;
 	}
 	/* __acl_delete_fd */
 	case 352: {
 		struct __acl_delete_fd_args *p = params;
 		iarg[0] = p->filedes; /* int */
 		iarg[1] = p->type; /* acl_type_t */
 		*n_args = 2;
 		break;
 	}
 	/* __acl_aclcheck_file */
 	case 353: {
 		struct __acl_aclcheck_file_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* __acl_aclcheck_fd */
 	case 354: {
 		struct __acl_aclcheck_fd_args *p = params;
 		iarg[0] = p->filedes; /* int */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* extattrctl */
 	case 355: {
 		struct extattrctl_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->cmd; /* int */
 		uarg[2] = (intptr_t) p->filename; /* const char * */
 		iarg[3] = p->attrnamespace; /* int */
 		uarg[4] = (intptr_t) p->attrname; /* const char * */
 		*n_args = 5;
 		break;
 	}
 	/* extattr_set_file */
 	case 356: {
 		struct extattr_set_file_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		uarg[3] = (intptr_t) p->data; /* void * */
 		uarg[4] = p->nbytes; /* size_t */
 		*n_args = 5;
 		break;
 	}
 	/* extattr_get_file */
 	case 357: {
 		struct extattr_get_file_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		uarg[3] = (intptr_t) p->data; /* void * */
 		uarg[4] = p->nbytes; /* size_t */
 		*n_args = 5;
 		break;
 	}
 	/* extattr_delete_file */
 	case 358: {
 		struct extattr_delete_file_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		*n_args = 3;
 		break;
 	}
 	/* aio_waitcomplete */
 	case 359: {
 		struct aio_waitcomplete_args *p = params;
 		uarg[0] = (intptr_t) p->aiocbp; /* struct aiocb ** */
 		uarg[1] = (intptr_t) p->timeout; /* struct timespec * */
 		*n_args = 2;
 		break;
 	}
 	/* getresuid */
 	case 360: {
 		struct getresuid_args *p = params;
 		uarg[0] = (intptr_t) p->ruid; /* uid_t * */
 		uarg[1] = (intptr_t) p->euid; /* uid_t * */
 		uarg[2] = (intptr_t) p->suid; /* uid_t * */
 		*n_args = 3;
 		break;
 	}
 	/* getresgid */
 	case 361: {
 		struct getresgid_args *p = params;
 		uarg[0] = (intptr_t) p->rgid; /* gid_t * */
 		uarg[1] = (intptr_t) p->egid; /* gid_t * */
 		uarg[2] = (intptr_t) p->sgid; /* gid_t * */
 		*n_args = 3;
 		break;
 	}
 	/* kqueue */
 	case 362: {
 		*n_args = 0;
 		break;
 	}
 	/* kevent */
 	case 363: {
 		struct kevent_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->changelist; /* struct kevent * */
 		iarg[2] = p->nchanges; /* int */
 		uarg[3] = (intptr_t) p->eventlist; /* struct kevent * */
 		iarg[4] = p->nevents; /* int */
 		uarg[5] = (intptr_t) p->timeout; /* const struct timespec * */
 		*n_args = 6;
 		break;
 	}
 	/* lkmressys */
 	case 370: {
 		*n_args = 0;
 		break;
 	}
 	/* extattr_set_fd */
 	case 371: {
 		struct extattr_set_fd_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		uarg[3] = (intptr_t) p->data; /* void * */
 		uarg[4] = p->nbytes; /* size_t */
 		*n_args = 5;
 		break;
 	}
 	/* extattr_get_fd */
 	case 372: {
 		struct extattr_get_fd_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		uarg[3] = (intptr_t) p->data; /* void * */
 		uarg[4] = p->nbytes; /* size_t */
 		*n_args = 5;
 		break;
 	}
 	/* extattr_delete_fd */
 	case 373: {
 		struct extattr_delete_fd_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		*n_args = 3;
 		break;
 	}
 	/* __setugid */
 	case 374: {
 		struct __setugid_args *p = params;
 		iarg[0] = p->flag; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* nfsclnt */
 	case 375: {
 		struct nfsclnt_args *p = params;
 		iarg[0] = p->flag; /* int */
 		uarg[1] = (intptr_t) p->argp; /* caddr_t */
 		*n_args = 2;
 		break;
 	}
 	/* eaccess */
 	case 376: {
 		struct eaccess_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->flags; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* nmount */
 	case 378: {
 		struct nmount_args *p = params;
 		uarg[0] = (intptr_t) p->iovp; /* struct iovec * */
 		uarg[1] = p->iovcnt; /* unsigned int */
 		iarg[2] = p->flags; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* __mac_get_proc */
 	case 384: {
 		struct __mac_get_proc_args *p = params;
 		uarg[0] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 1;
 		break;
 	}
 	/* __mac_set_proc */
 	case 385: {
 		struct __mac_set_proc_args *p = params;
 		uarg[0] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 1;
 		break;
 	}
 	/* __mac_get_fd */
 	case 386: {
 		struct __mac_get_fd_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 2;
 		break;
 	}
 	/* __mac_get_file */
 	case 387: {
 		struct __mac_get_file_args *p = params;
 		uarg[0] = (intptr_t) p->path_p; /* const char * */
 		uarg[1] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 2;
 		break;
 	}
 	/* __mac_set_fd */
 	case 388: {
 		struct __mac_set_fd_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 2;
 		break;
 	}
 	/* __mac_set_file */
 	case 389: {
 		struct __mac_set_file_args *p = params;
 		uarg[0] = (intptr_t) p->path_p; /* const char * */
 		uarg[1] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 2;
 		break;
 	}
 	/* kenv */
 	case 390: {
 		struct kenv_args *p = params;
 		iarg[0] = p->what; /* int */
 		uarg[1] = (intptr_t) p->name; /* const char * */
 		uarg[2] = (intptr_t) p->value; /* char * */
 		iarg[3] = p->len; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* lchflags */
 	case 391: {
 		struct lchflags_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->flags; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* uuidgen */
 	case 392: {
 		struct uuidgen_args *p = params;
 		uarg[0] = (intptr_t) p->store; /* struct uuid * */
 		iarg[1] = p->count; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* sendfile */
 	case 393: {
 		struct sendfile_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->s; /* int */
 		iarg[2] = p->offset; /* off_t */
 		uarg[3] = p->nbytes; /* size_t */
 		uarg[4] = (intptr_t) p->hdtr; /* struct sf_hdtr * */
 		uarg[5] = (intptr_t) p->sbytes; /* off_t * */
 		iarg[6] = p->flags; /* int */
 		*n_args = 7;
 		break;
 	}
 	/* mac_syscall */
 	case 394: {
 		struct mac_syscall_args *p = params;
 		uarg[0] = (intptr_t) p->policy; /* const char * */
 		iarg[1] = p->call; /* int */
 		uarg[2] = (intptr_t) p->arg; /* void * */
 		*n_args = 3;
 		break;
 	}
 	/* getfsstat */
 	case 395: {
 		struct getfsstat_args *p = params;
 		uarg[0] = (intptr_t) p->buf; /* struct statfs * */
 		iarg[1] = p->bufsize; /* long */
 		iarg[2] = p->flags; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* statfs */
 	case 396: {
 		struct statfs_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		uarg[1] = (intptr_t) p->buf; /* struct statfs * */
 		*n_args = 2;
 		break;
 	}
 	/* fstatfs */
 	case 397: {
 		struct fstatfs_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* struct statfs * */
 		*n_args = 2;
 		break;
 	}
 	/* fhstatfs */
 	case 398: {
 		struct fhstatfs_args *p = params;
 		uarg[0] = (intptr_t) p->u_fhp; /* const struct fhandle * */
 		uarg[1] = (intptr_t) p->buf; /* struct statfs * */
 		*n_args = 2;
 		break;
 	}
 	/* ksem_close */
 	case 400: {
 		struct ksem_close_args *p = params;
 		iarg[0] = p->id; /* semid_t */
 		*n_args = 1;
 		break;
 	}
 	/* ksem_post */
 	case 401: {
 		struct ksem_post_args *p = params;
 		iarg[0] = p->id; /* semid_t */
 		*n_args = 1;
 		break;
 	}
 	/* ksem_wait */
 	case 402: {
 		struct ksem_wait_args *p = params;
 		iarg[0] = p->id; /* semid_t */
 		*n_args = 1;
 		break;
 	}
 	/* ksem_trywait */
 	case 403: {
 		struct ksem_trywait_args *p = params;
 		iarg[0] = p->id; /* semid_t */
 		*n_args = 1;
 		break;
 	}
 	/* ksem_init */
 	case 404: {
 		struct ksem_init_args *p = params;
 		uarg[0] = (intptr_t) p->idp; /* semid_t * */
 		uarg[1] = p->value; /* unsigned int */
 		*n_args = 2;
 		break;
 	}
 	/* ksem_open */
 	case 405: {
 		struct ksem_open_args *p = params;
 		uarg[0] = (intptr_t) p->idp; /* semid_t * */
 		uarg[1] = (intptr_t) p->name; /* const char * */
 		iarg[2] = p->oflag; /* int */
 		iarg[3] = p->mode; /* mode_t */
 		uarg[4] = p->value; /* unsigned int */
 		*n_args = 5;
 		break;
 	}
 	/* ksem_unlink */
 	case 406: {
 		struct ksem_unlink_args *p = params;
 		uarg[0] = (intptr_t) p->name; /* const char * */
 		*n_args = 1;
 		break;
 	}
 	/* ksem_getvalue */
 	case 407: {
 		struct ksem_getvalue_args *p = params;
 		iarg[0] = p->id; /* semid_t */
 		uarg[1] = (intptr_t) p->val; /* int * */
 		*n_args = 2;
 		break;
 	}
 	/* ksem_destroy */
 	case 408: {
 		struct ksem_destroy_args *p = params;
 		iarg[0] = p->id; /* semid_t */
 		*n_args = 1;
 		break;
 	}
 	/* __mac_get_pid */
 	case 409: {
 		struct __mac_get_pid_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		uarg[1] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 2;
 		break;
 	}
 	/* __mac_get_link */
 	case 410: {
 		struct __mac_get_link_args *p = params;
 		uarg[0] = (intptr_t) p->path_p; /* const char * */
 		uarg[1] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 2;
 		break;
 	}
 	/* __mac_set_link */
 	case 411: {
 		struct __mac_set_link_args *p = params;
 		uarg[0] = (intptr_t) p->path_p; /* const char * */
 		uarg[1] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 2;
 		break;
 	}
 	/* extattr_set_link */
 	case 412: {
 		struct extattr_set_link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		uarg[3] = (intptr_t) p->data; /* void * */
 		uarg[4] = p->nbytes; /* size_t */
 		*n_args = 5;
 		break;
 	}
 	/* extattr_get_link */
 	case 413: {
 		struct extattr_get_link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		uarg[3] = (intptr_t) p->data; /* void * */
 		uarg[4] = p->nbytes; /* size_t */
 		*n_args = 5;
 		break;
 	}
 	/* extattr_delete_link */
 	case 414: {
 		struct extattr_delete_link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->attrname; /* const char * */
 		*n_args = 3;
 		break;
 	}
 	/* __mac_execve */
 	case 415: {
 		struct __mac_execve_args *p = params;
 		uarg[0] = (intptr_t) p->fname; /* char * */
 		uarg[1] = (intptr_t) p->argv; /* char ** */
 		uarg[2] = (intptr_t) p->envv; /* char ** */
 		uarg[3] = (intptr_t) p->mac_p; /* struct mac * */
 		*n_args = 4;
 		break;
 	}
 	/* sigaction */
 	case 416: {
 		struct sigaction_args *p = params;
 		iarg[0] = p->sig; /* int */
 		uarg[1] = (intptr_t) p->act; /* const struct sigaction * */
 		uarg[2] = (intptr_t) p->oact; /* struct sigaction * */
 		*n_args = 3;
 		break;
 	}
 	/* sigreturn */
 	case 417: {
 		struct sigreturn_args *p = params;
 		uarg[0] = (intptr_t) p->sigcntxp; /* const struct __ucontext * */
 		*n_args = 1;
 		break;
 	}
 	/* getcontext */
 	case 421: {
 		struct getcontext_args *p = params;
 		uarg[0] = (intptr_t) p->ucp; /* struct __ucontext * */
 		*n_args = 1;
 		break;
 	}
 	/* setcontext */
 	case 422: {
 		struct setcontext_args *p = params;
 		uarg[0] = (intptr_t) p->ucp; /* const struct __ucontext * */
 		*n_args = 1;
 		break;
 	}
 	/* swapcontext */
 	case 423: {
 		struct swapcontext_args *p = params;
 		uarg[0] = (intptr_t) p->oucp; /* struct __ucontext * */
 		uarg[1] = (intptr_t) p->ucp; /* const struct __ucontext * */
 		*n_args = 2;
 		break;
 	}
 	/* swapoff */
 	case 424: {
 		struct swapoff_args *p = params;
 		uarg[0] = (intptr_t) p->name; /* const char * */
 		*n_args = 1;
 		break;
 	}
 	/* __acl_get_link */
 	case 425: {
 		struct __acl_get_link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* __acl_set_link */
 	case 426: {
 		struct __acl_set_link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* __acl_delete_link */
 	case 427: {
 		struct __acl_delete_link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->type; /* acl_type_t */
 		*n_args = 2;
 		break;
 	}
 	/* __acl_aclcheck_link */
 	case 428: {
 		struct __acl_aclcheck_link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->type; /* acl_type_t */
 		uarg[2] = (intptr_t) p->aclp; /* struct acl * */
 		*n_args = 3;
 		break;
 	}
 	/* sigwait */
 	case 429: {
 		struct sigwait_args *p = params;
 		uarg[0] = (intptr_t) p->set; /* const sigset_t * */
 		uarg[1] = (intptr_t) p->sig; /* int * */
 		*n_args = 2;
 		break;
 	}
 	/* thr_create */
 	case 430: {
 		struct thr_create_args *p = params;
 		uarg[0] = (intptr_t) p->ctx; /* ucontext_t * */
 		uarg[1] = (intptr_t) p->id; /* long * */
 		iarg[2] = p->flags; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* thr_exit */
 	case 431: {
 		struct thr_exit_args *p = params;
 		uarg[0] = (intptr_t) p->state; /* long * */
 		*n_args = 1;
 		break;
 	}
 	/* thr_self */
 	case 432: {
 		struct thr_self_args *p = params;
 		uarg[0] = (intptr_t) p->id; /* long * */
 		*n_args = 1;
 		break;
 	}
 	/* thr_kill */
 	case 433: {
 		struct thr_kill_args *p = params;
 		iarg[0] = p->id; /* long */
 		iarg[1] = p->sig; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* _umtx_lock */
 	case 434: {
 		struct _umtx_lock_args *p = params;
 		uarg[0] = (intptr_t) p->umtx; /* struct umtx * */
 		*n_args = 1;
 		break;
 	}
 	/* _umtx_unlock */
 	case 435: {
 		struct _umtx_unlock_args *p = params;
 		uarg[0] = (intptr_t) p->umtx; /* struct umtx * */
 		*n_args = 1;
 		break;
 	}
 	/* jail_attach */
 	case 436: {
 		struct jail_attach_args *p = params;
 		iarg[0] = p->jid; /* int */
 		*n_args = 1;
 		break;
 	}
 	/* extattr_list_fd */
 	case 437: {
 		struct extattr_list_fd_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->data; /* void * */
 		uarg[3] = p->nbytes; /* size_t */
 		*n_args = 4;
 		break;
 	}
 	/* extattr_list_file */
 	case 438: {
 		struct extattr_list_file_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->data; /* void * */
 		uarg[3] = p->nbytes; /* size_t */
 		*n_args = 4;
 		break;
 	}
 	/* extattr_list_link */
 	case 439: {
 		struct extattr_list_link_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->attrnamespace; /* int */
 		uarg[2] = (intptr_t) p->data; /* void * */
 		uarg[3] = p->nbytes; /* size_t */
 		*n_args = 4;
 		break;
 	}
 	/* ksem_timedwait */
 	case 441: {
 		struct ksem_timedwait_args *p = params;
 		iarg[0] = p->id; /* semid_t */
 		uarg[1] = (intptr_t) p->abstime; /* const struct timespec * */
 		*n_args = 2;
 		break;
 	}
 	/* thr_suspend */
 	case 442: {
 		struct thr_suspend_args *p = params;
 		uarg[0] = (intptr_t) p->timeout; /* const struct timespec * */
 		*n_args = 1;
 		break;
 	}
 	/* thr_wake */
 	case 443: {
 		struct thr_wake_args *p = params;
 		iarg[0] = p->id; /* long */
 		*n_args = 1;
 		break;
 	}
 	/* kldunloadf */
 	case 444: {
 		struct kldunloadf_args *p = params;
 		iarg[0] = p->fileid; /* int */
 		iarg[1] = p->flags; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* audit */
 	case 445: {
 		struct audit_args *p = params;
 		uarg[0] = (intptr_t) p->record; /* const void * */
 		uarg[1] = p->length; /* u_int */
 		*n_args = 2;
 		break;
 	}
 	/* auditon */
 	case 446: {
 		struct auditon_args *p = params;
 		iarg[0] = p->cmd; /* int */
 		uarg[1] = (intptr_t) p->data; /* void * */
 		uarg[2] = p->length; /* u_int */
 		*n_args = 3;
 		break;
 	}
 	/* getauid */
 	case 447: {
 		struct getauid_args *p = params;
 		uarg[0] = (intptr_t) p->auid; /* uid_t * */
 		*n_args = 1;
 		break;
 	}
 	/* setauid */
 	case 448: {
 		struct setauid_args *p = params;
 		uarg[0] = (intptr_t) p->auid; /* uid_t * */
 		*n_args = 1;
 		break;
 	}
 	/* getaudit */
 	case 449: {
 		struct getaudit_args *p = params;
 		uarg[0] = (intptr_t) p->auditinfo; /* struct auditinfo * */
 		*n_args = 1;
 		break;
 	}
 	/* setaudit */
 	case 450: {
 		struct setaudit_args *p = params;
 		uarg[0] = (intptr_t) p->auditinfo; /* struct auditinfo * */
 		*n_args = 1;
 		break;
 	}
 	/* getaudit_addr */
 	case 451: {
 		struct getaudit_addr_args *p = params;
 		uarg[0] = (intptr_t) p->auditinfo_addr; /* struct auditinfo_addr * */
 		uarg[1] = p->length; /* u_int */
 		*n_args = 2;
 		break;
 	}
 	/* setaudit_addr */
 	case 452: {
 		struct setaudit_addr_args *p = params;
 		uarg[0] = (intptr_t) p->auditinfo_addr; /* struct auditinfo_addr * */
 		uarg[1] = p->length; /* u_int */
 		*n_args = 2;
 		break;
 	}
 	/* auditctl */
 	case 453: {
 		struct auditctl_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		*n_args = 1;
 		break;
 	}
 	/* _umtx_op */
 	case 454: {
 		struct _umtx_op_args *p = params;
 		uarg[0] = (intptr_t) p->obj; /* void * */
 		iarg[1] = p->op; /* int */
 		uarg[2] = p->val; /* u_long */
 		uarg[3] = (intptr_t) p->uaddr1; /* void * */
 		uarg[4] = (intptr_t) p->uaddr2; /* void * */
 		*n_args = 5;
 		break;
 	}
 	/* thr_new */
 	case 455: {
 		struct thr_new_args *p = params;
 		uarg[0] = (intptr_t) p->param; /* struct thr_param * */
 		iarg[1] = p->param_size; /* int */
 		*n_args = 2;
 		break;
 	}
 	/* sigqueue */
 	case 456: {
 		struct sigqueue_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		iarg[1] = p->signum; /* int */
 		uarg[2] = (intptr_t) p->value; /* void * */
 		*n_args = 3;
 		break;
 	}
 	/* kmq_open */
 	case 457: {
 		struct kmq_open_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->flags; /* int */
 		iarg[2] = p->mode; /* mode_t */
 		uarg[3] = (intptr_t) p->attr; /* const struct mq_attr * */
 		*n_args = 4;
 		break;
 	}
 	/* kmq_setattr */
 	case 458: {
 		struct kmq_setattr_args *p = params;
 		iarg[0] = p->mqd; /* int */
 		uarg[1] = (intptr_t) p->attr; /* const struct mq_attr * */
 		uarg[2] = (intptr_t) p->oattr; /* struct mq_attr * */
 		*n_args = 3;
 		break;
 	}
 	/* kmq_timedreceive */
 	case 459: {
 		struct kmq_timedreceive_args *p = params;
 		iarg[0] = p->mqd; /* int */
 		uarg[1] = (intptr_t) p->msg_ptr; /* char * */
 		uarg[2] = p->msg_len; /* size_t */
 		uarg[3] = (intptr_t) p->msg_prio; /* unsigned * */
 		uarg[4] = (intptr_t) p->abs_timeout; /* const struct timespec * */
 		*n_args = 5;
 		break;
 	}
 	/* kmq_timedsend */
 	case 460: {
 		struct kmq_timedsend_args *p = params;
 		iarg[0] = p->mqd; /* int */
 		uarg[1] = (intptr_t) p->msg_ptr; /* const char * */
 		uarg[2] = p->msg_len; /* size_t */
 		uarg[3] = p->msg_prio; /* unsigned */
 		uarg[4] = (intptr_t) p->abs_timeout; /* const struct timespec * */
 		*n_args = 5;
 		break;
 	}
 	/* kmq_notify */
 	case 461: {
 		struct kmq_notify_args *p = params;
 		iarg[0] = p->mqd; /* int */
 		uarg[1] = (intptr_t) p->sigev; /* const struct sigevent * */
 		*n_args = 2;
 		break;
 	}
 	/* kmq_unlink */
 	case 462: {
 		struct kmq_unlink_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		*n_args = 1;
 		break;
 	}
 	/* abort2 */
 	case 463: {
 		struct abort2_args *p = params;
 		uarg[0] = (intptr_t) p->why; /* const char * */
 		iarg[1] = p->nargs; /* int */
 		uarg[2] = (intptr_t) p->args; /* void ** */
 		*n_args = 3;
 		break;
 	}
 	/* thr_set_name */
 	case 464: {
 		struct thr_set_name_args *p = params;
 		iarg[0] = p->id; /* long */
 		uarg[1] = (intptr_t) p->name; /* const char * */
 		*n_args = 2;
 		break;
 	}
 	/* aio_fsync */
 	case 465: {
 		struct aio_fsync_args *p = params;
 		iarg[0] = p->op; /* int */
 		uarg[1] = (intptr_t) p->aiocbp; /* struct aiocb * */
 		*n_args = 2;
 		break;
 	}
 	/* rtprio_thread */
 	case 466: {
 		struct rtprio_thread_args *p = params;
 		iarg[0] = p->function; /* int */
 		iarg[1] = p->lwpid; /* lwpid_t */
 		uarg[2] = (intptr_t) p->rtp; /* struct rtprio * */
 		*n_args = 3;
 		break;
 	}
 	/* sctp_peeloff */
 	case 471: {
 		struct sctp_peeloff_args *p = params;
 		iarg[0] = p->sd; /* int */
 		uarg[1] = p->name; /* uint32_t */
 		*n_args = 2;
 		break;
 	}
 	/* sctp_generic_sendmsg */
 	case 472: {
 		struct sctp_generic_sendmsg_args *p = params;
 		iarg[0] = p->sd; /* int */
 		uarg[1] = (intptr_t) p->msg; /* caddr_t */
 		iarg[2] = p->mlen; /* int */
 		uarg[3] = (intptr_t) p->to; /* caddr_t */
 		iarg[4] = p->tolen; /* __socklen_t */
 		uarg[5] = (intptr_t) p->sinfo; /* struct sctp_sndrcvinfo * */
 		iarg[6] = p->flags; /* int */
 		*n_args = 7;
 		break;
 	}
 	/* sctp_generic_sendmsg_iov */
 	case 473: {
 		struct sctp_generic_sendmsg_iov_args *p = params;
 		iarg[0] = p->sd; /* int */
 		uarg[1] = (intptr_t) p->iov; /* struct iovec * */
 		iarg[2] = p->iovlen; /* int */
 		uarg[3] = (intptr_t) p->to; /* caddr_t */
 		iarg[4] = p->tolen; /* __socklen_t */
 		uarg[5] = (intptr_t) p->sinfo; /* struct sctp_sndrcvinfo * */
 		iarg[6] = p->flags; /* int */
 		*n_args = 7;
 		break;
 	}
 	/* sctp_generic_recvmsg */
 	case 474: {
 		struct sctp_generic_recvmsg_args *p = params;
 		iarg[0] = p->sd; /* int */
 		uarg[1] = (intptr_t) p->iov; /* struct iovec * */
 		iarg[2] = p->iovlen; /* int */
 		uarg[3] = (intptr_t) p->from; /* struct sockaddr * */
 		uarg[4] = (intptr_t) p->fromlenaddr; /* __socklen_t * */
 		uarg[5] = (intptr_t) p->sinfo; /* struct sctp_sndrcvinfo * */
 		uarg[6] = (intptr_t) p->msg_flags; /* int * */
 		*n_args = 7;
 		break;
 	}
 	/* pread */
 	case 475: {
 		struct pread_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* void * */
 		uarg[2] = p->nbyte; /* size_t */
 		iarg[3] = p->offset; /* off_t */
 		*n_args = 4;
 		break;
 	}
 	/* pwrite */
 	case 476: {
 		struct pwrite_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->buf; /* const void * */
 		uarg[2] = p->nbyte; /* size_t */
 		iarg[3] = p->offset; /* off_t */
 		*n_args = 4;
 		break;
 	}
 	/* mmap */
 	case 477: {
 		struct mmap_args *p = params;
 		uarg[0] = (intptr_t) p->addr; /* caddr_t */
 		uarg[1] = p->len; /* size_t */
 		iarg[2] = p->prot; /* int */
 		iarg[3] = p->flags; /* int */
 		iarg[4] = p->fd; /* int */
 		iarg[5] = p->pos; /* off_t */
 		*n_args = 6;
 		break;
 	}
 	/* lseek */
 	case 478: {
 		struct lseek_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->offset; /* off_t */
 		iarg[2] = p->whence; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* truncate */
 	case 479: {
 		struct truncate_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* char * */
 		iarg[1] = p->length; /* off_t */
 		*n_args = 2;
 		break;
 	}
 	/* ftruncate */
 	case 480: {
 		struct ftruncate_args *p = params;
 		iarg[0] = p->fd; /* int */
 		iarg[1] = p->length; /* off_t */
 		*n_args = 2;
 		break;
 	}
 	/* thr_kill2 */
 	case 481: {
 		struct thr_kill2_args *p = params;
 		iarg[0] = p->pid; /* pid_t */
 		iarg[1] = p->id; /* long */
 		iarg[2] = p->sig; /* int */
 		*n_args = 3;
 		break;
 	}
 	/* shm_open */
 	case 482: {
 		struct shm_open_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		iarg[1] = p->flags; /* int */
 		iarg[2] = p->mode; /* mode_t */
 		*n_args = 3;
 		break;
 	}
 	/* shm_unlink */
 	case 483: {
 		struct shm_unlink_args *p = params;
 		uarg[0] = (intptr_t) p->path; /* const char * */
 		*n_args = 1;
 		break;
 	}
 	/* cpuset */
 	case 484: {
 		struct cpuset_args *p = params;
 		uarg[0] = (intptr_t) p->setid; /* cpusetid_t * */
 		*n_args = 1;
 		break;
 	}
 	/* cpuset_setid */
 	case 485: {
 		struct cpuset_setid_args *p = params;
 		iarg[0] = p->which; /* cpuwhich_t */
 		iarg[1] = p->id; /* id_t */
 		iarg[2] = p->setid; /* cpusetid_t */
 		*n_args = 3;
 		break;
 	}
 	/* cpuset_getid */
 	case 486: {
 		struct cpuset_getid_args *p = params;
 		iarg[0] = p->level; /* cpulevel_t */
 		iarg[1] = p->which; /* cpuwhich_t */
 		iarg[2] = p->id; /* id_t */
 		uarg[3] = (intptr_t) p->setid; /* cpusetid_t * */
 		*n_args = 4;
 		break;
 	}
 	/* cpuset_getaffinity */
 	case 487: {
 		struct cpuset_getaffinity_args *p = params;
 		iarg[0] = p->level; /* cpulevel_t */
 		iarg[1] = p->which; /* cpuwhich_t */
 		iarg[2] = p->id; /* id_t */
 		uarg[3] = p->cpusetsize; /* size_t */
 		uarg[4] = (intptr_t) p->mask; /* cpuset_t * */
 		*n_args = 5;
 		break;
 	}
 	/* cpuset_setaffinity */
 	case 488: {
 		struct cpuset_setaffinity_args *p = params;
 		iarg[0] = p->level; /* cpulevel_t */
 		iarg[1] = p->which; /* cpuwhich_t */
 		iarg[2] = p->id; /* id_t */
 		uarg[3] = p->cpusetsize; /* size_t */
 		uarg[4] = (intptr_t) p->mask; /* const cpuset_t * */
 		*n_args = 5;
 		break;
 	}
 	/* faccessat */
 	case 489: {
 		struct faccessat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		iarg[2] = p->mode; /* int */
 		iarg[3] = p->flag; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* fchmodat */
 	case 490: {
 		struct fchmodat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		iarg[2] = p->mode; /* mode_t */
 		iarg[3] = p->flag; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* fchownat */
 	case 491: {
 		struct fchownat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		uarg[2] = p->uid; /* uid_t */
 		iarg[3] = p->gid; /* gid_t */
 		iarg[4] = p->flag; /* int */
 		*n_args = 5;
 		break;
 	}
 	/* fexecve */
 	case 492: {
 		struct fexecve_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->argv; /* char ** */
 		uarg[2] = (intptr_t) p->envv; /* char ** */
 		*n_args = 3;
 		break;
 	}
 	/* fstatat */
 	case 493: {
 		struct fstatat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		uarg[2] = (intptr_t) p->buf; /* struct stat * */
 		iarg[3] = p->flag; /* int */
 		*n_args = 4;
 		break;
 	}
 	/* futimesat */
 	case 494: {
 		struct futimesat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		uarg[2] = (intptr_t) p->times; /* struct timeval * */
 		*n_args = 3;
 		break;
 	}
 	/* linkat */
 	case 495: {
 		struct linkat_args *p = params;
 		iarg[0] = p->fd1; /* int */
 		uarg[1] = (intptr_t) p->path1; /* char * */
 		iarg[2] = p->fd2; /* int */
 		uarg[3] = (intptr_t) p->path2; /* char * */
 		iarg[4] = p->flag; /* int */
 		*n_args = 5;
 		break;
 	}
 	/* mkdirat */
 	case 496: {
 		struct mkdirat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		iarg[2] = p->mode; /* mode_t */
 		*n_args = 3;
 		break;
 	}
 	/* mkfifoat */
 	case 497: {
 		struct mkfifoat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		iarg[2] = p->mode; /* mode_t */
 		*n_args = 3;
 		break;
 	}
 	/* mknodat */
 	case 498: {
 		struct mknodat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		iarg[2] = p->mode; /* mode_t */
 		iarg[3] = p->dev; /* dev_t */
 		*n_args = 4;
 		break;
 	}
 	/* openat */
 	case 499: {
 		struct openat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		iarg[2] = p->flag; /* int */
 		iarg[3] = p->mode; /* mode_t */
 		*n_args = 4;
 		break;
 	}
 	/* readlinkat */
 	case 500: {
 		struct readlinkat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		uarg[2] = (intptr_t) p->buf; /* char * */
 		uarg[3] = p->bufsize; /* size_t */
 		*n_args = 4;
 		break;
 	}
 	/* renameat */
 	case 501: {
 		struct renameat_args *p = params;
 		iarg[0] = p->oldfd; /* int */
 		uarg[1] = (intptr_t) p->old; /* char * */
 		iarg[2] = p->newfd; /* int */
 		uarg[3] = (intptr_t) p->new; /* char * */
 		*n_args = 4;
 		break;
 	}
 	/* symlinkat */
 	case 502: {
 		struct symlinkat_args *p = params;
 		uarg[0] = (intptr_t) p->path1; /* char * */
 		iarg[1] = p->fd; /* int */
 		uarg[2] = (intptr_t) p->path2; /* char * */
 		*n_args = 3;
 		break;
 	}
 	/* unlinkat */
 	case 503: {
 		struct unlinkat_args *p = params;
 		iarg[0] = p->fd; /* int */
 		uarg[1] = (intptr_t) p->path; /* char * */
 		iarg[2] = p->flag; /* int */
 		*n_args = 3;
 		break;
 	}
 	default:
 		*n_args = 0;
 		break;
 	};
 }
 static void
 systrace_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 {
 	const char *p = NULL;
 	switch (sysnum) {
 	/* nosys */
 	case 0:
 		break;
 	/* sys_exit */
 	case 1:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fork */
 	case 2:
 		break;
 	/* read */
 	case 3:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "void *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* write */
 	case 4:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const void *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* open */
 	case 5:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* close */
 	case 6:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* wait4 */
 	case 7:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "struct rusage *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* link */
 	case 9:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* unlink */
 	case 10:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* chdir */
 	case 12:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fchdir */
 	case 13:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mknod */
 	case 14:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* chmod */
 	case 15:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* chown */
 	case 16:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* obreak */
 	case 17:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getpid */
 	case 20:
 		break;
 	/* mount */
 	case 21:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "caddr_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* unmount */
 	case 22:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setuid */
 	case 23:
 		switch(ndx) {
 		case 0:
 			p = "uid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getuid */
 	case 24:
 		break;
 	/* geteuid */
 	case 25:
 		break;
 	/* ptrace */
 	case 26:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "pid_t";
 			break;
 		case 2:
 			p = "caddr_t";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* recvmsg */
 	case 27:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct msghdr *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sendmsg */
 	case 28:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct msghdr *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* recvfrom */
 	case 29:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "caddr_t";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "struct sockaddr *__restrict";
 			break;
 		case 5:
 			p = "__socklen_t *__restrict";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* accept */
 	case 30:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct sockaddr *__restrict";
 			break;
 		case 2:
 			p = "__socklen_t *__restrict";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getpeername */
 	case 31:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct sockaddr *__restrict";
 			break;
 		case 2:
 			p = "__socklen_t *__restrict";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getsockname */
 	case 32:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct sockaddr *__restrict";
 			break;
 		case 2:
 			p = "__socklen_t *__restrict";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* access */
 	case 33:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* chflags */
 	case 34:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fchflags */
 	case 35:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sync */
 	case 36:
 		break;
 	/* kill */
 	case 37:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getppid */
 	case 39:
 		break;
 	/* dup */
 	case 41:
 		switch(ndx) {
 		case 0:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* pipe */
 	case 42:
 		break;
 	/* getegid */
 	case 43:
 		break;
 	/* profil */
 	case 44:
 		switch(ndx) {
 		case 0:
 			p = "caddr_t";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ktrace */
 	case 45:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getgid */
 	case 47:
 		break;
 	/* getlogin */
 	case 49:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setlogin */
 	case 50:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* acct */
 	case 51:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigaltstack */
 	case 53:
 		switch(ndx) {
 		case 0:
 			p = "stack_t *";
 			break;
 		case 1:
 			p = "stack_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ioctl */
 	case 54:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "u_long";
 			break;
 		case 2:
 			p = "caddr_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* reboot */
 	case 55:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* revoke */
 	case 56:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* symlink */
 	case 57:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* readlink */
 	case 58:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* execve */
 	case 59:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "char **";
 			break;
 		case 2:
 			p = "char **";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* umask */
 	case 60:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* chroot */
 	case 61:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* msync */
 	case 65:
 		switch(ndx) {
 		case 0:
 			p = "void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* vfork */
 	case 66:
 		break;
 	/* sbrk */
 	case 69:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sstk */
 	case 70:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ovadvise */
 	case 72:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* munmap */
 	case 73:
 		switch(ndx) {
 		case 0:
 			p = "void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mprotect */
 	case 74:
 		switch(ndx) {
 		case 0:
 			p = "const void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* madvise */
 	case 75:
 		switch(ndx) {
 		case 0:
 			p = "void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mincore */
 	case 78:
 		switch(ndx) {
 		case 0:
 			p = "const void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getgroups */
 	case 79:
 		switch(ndx) {
 		case 0:
 			p = "u_int";
 			break;
 		case 1:
 			p = "gid_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setgroups */
 	case 80:
 		switch(ndx) {
 		case 0:
 			p = "u_int";
 			break;
 		case 1:
 			p = "gid_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getpgrp */
 	case 81:
 		break;
 	/* setpgid */
 	case 82:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setitimer */
 	case 83:
 		switch(ndx) {
 		case 0:
 			p = "u_int";
 			break;
 		case 1:
 			p = "struct itimerval *";
 			break;
 		case 2:
 			p = "struct itimerval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* swapon */
 	case 85:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getitimer */
 	case 86:
 		switch(ndx) {
 		case 0:
 			p = "u_int";
 			break;
 		case 1:
 			p = "struct itimerval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getdtablesize */
 	case 89:
 		break;
 	/* dup2 */
 	case 90:
 		switch(ndx) {
 		case 0:
 			p = "u_int";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fcntl */
 	case 92:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "long";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* select */
 	case 93:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "fd_set *";
 			break;
 		case 2:
 			p = "fd_set *";
 			break;
 		case 3:
 			p = "fd_set *";
 			break;
 		case 4:
 			p = "struct timeval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fsync */
 	case 95:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setpriority */
 	case 96:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* socket */
 	case 97:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* connect */
 	case 98:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "caddr_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getpriority */
 	case 100:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* bind */
 	case 104:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "caddr_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setsockopt */
 	case 105:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "caddr_t";
 			break;
 		case 4:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* listen */
 	case 106:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* gettimeofday */
 	case 116:
 		switch(ndx) {
 		case 0:
 			p = "struct timeval *";
 			break;
 		case 1:
 			p = "struct timezone *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getrusage */
 	case 117:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct rusage *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getsockopt */
 	case 118:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "caddr_t";
 			break;
 		case 4:
 			p = "int *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* readv */
 	case 120:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct iovec *";
 			break;
 		case 2:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* writev */
 	case 121:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct iovec *";
 			break;
 		case 2:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* settimeofday */
 	case 122:
 		switch(ndx) {
 		case 0:
 			p = "struct timeval *";
 			break;
 		case 1:
 			p = "struct timezone *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fchown */
 	case 123:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fchmod */
 	case 124:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setreuid */
 	case 126:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setregid */
 	case 127:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* rename */
 	case 128:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* flock */
 	case 131:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mkfifo */
 	case 132:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sendto */
 	case 133:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "caddr_t";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "caddr_t";
 			break;
 		case 5:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* shutdown */
 	case 134:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* socketpair */
 	case 135:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "int *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mkdir */
 	case 136:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* rmdir */
 	case 137:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* utimes */
 	case 138:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct timeval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* adjtime */
 	case 140:
 		switch(ndx) {
 		case 0:
 			p = "struct timeval *";
 			break;
 		case 1:
 			p = "struct timeval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setsid */
 	case 147:
 		break;
 	/* quotactl */
 	case 148:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "caddr_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nlm_syscall */
 	case 154:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "char **";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nfssvc */
 	case 155:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "caddr_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lgetfh */
 	case 160:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct fhandle *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getfh */
 	case 161:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct fhandle *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getdomainname */
 	case 162:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setdomainname */
 	case 163:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* uname */
 	case 164:
 		switch(ndx) {
 		case 0:
 			p = "struct utsname *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sysarch */
 	case 165:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* rtprio */
 	case 166:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "pid_t";
 			break;
 		case 2:
 			p = "struct rtprio *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* semsys */
 	case 169:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* msgsys */
 	case 170:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "int";
 			break;
 		case 5:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* shmsys */
 	case 171:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* freebsd6_pread */
 	case 173:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "void *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* freebsd6_pwrite */
 	case 174:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const void *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ntp_adjtime */
 	case 176:
 		switch(ndx) {
 		case 0:
 			p = "struct timex *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setgid */
 	case 181:
 		switch(ndx) {
 		case 0:
 			p = "gid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setegid */
 	case 182:
 		switch(ndx) {
 		case 0:
 			p = "gid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* seteuid */
 	case 183:
 		switch(ndx) {
 		case 0:
 			p = "uid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* stat */
 	case 188:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct stat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fstat */
 	case 189:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct stat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lstat */
 	case 190:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct stat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* pathconf */
 	case 191:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fpathconf */
 	case 192:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getrlimit */
 	case 194:
 		switch(ndx) {
 		case 0:
 			p = "u_int";
 			break;
 		case 1:
 			p = "struct rlimit *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setrlimit */
 	case 195:
 		switch(ndx) {
 		case 0:
 			p = "u_int";
 			break;
 		case 1:
 			p = "struct rlimit *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getdirentries */
 	case 196:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "u_int";
 			break;
 		case 3:
 			p = "long *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* freebsd6_mmap */
 	case 197:
 		switch(ndx) {
 		case 0:
 			p = "caddr_t";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "int";
 			break;
 		case 5:
 			p = "int";
 			break;
 		case 6:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nosys */
 	case 198:
 		break;
 	/* freebsd6_lseek */
 	case 199:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "off_t";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* freebsd6_truncate */
 	case 200:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* freebsd6_ftruncate */
 	case 201:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __sysctl */
 	case 202:
 		switch(ndx) {
 		case 0:
 			p = "int *";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		case 2:
 			p = "void *";
 			break;
 		case 3:
 			p = "size_t *";
 			break;
 		case 4:
 			p = "void *";
 			break;
 		case 5:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mlock */
 	case 203:
 		switch(ndx) {
 		case 0:
 			p = "const void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* munlock */
 	case 204:
 		switch(ndx) {
 		case 0:
 			p = "const void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* undelete */
 	case 205:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* futimes */
 	case 206:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct timeval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getpgid */
 	case 207:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* poll */
 	case 209:
 		switch(ndx) {
 		case 0:
 			p = "struct pollfd *";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lkmnosys */
 	case 210:
 		break;
 	/* lkmnosys */
 	case 211:
 		break;
 	/* lkmnosys */
 	case 212:
 		break;
 	/* lkmnosys */
 	case 213:
 		break;
 	/* lkmnosys */
 	case 214:
 		break;
 	/* lkmnosys */
 	case 215:
 		break;
 	/* lkmnosys */
 	case 216:
 		break;
 	/* lkmnosys */
 	case 217:
 		break;
 	/* lkmnosys */
 	case 218:
 		break;
 	/* lkmnosys */
 	case 219:
 		break;
 	/* __semctl */
 	case 220:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "union semun *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* semget */
 	case 221:
 		switch(ndx) {
 		case 0:
 			p = "key_t";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* semop */
 	case 222:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct sembuf *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* msgctl */
 	case 224:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "struct msqid_ds *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* msgget */
 	case 225:
 		switch(ndx) {
 		case 0:
 			p = "key_t";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* msgsnd */
 	case 226:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const void *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* msgrcv */
 	case 227:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "void *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "long";
 			break;
 		case 4:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* shmat */
 	case 228:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const void *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* shmctl */
 	case 229:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "struct shmid_ds *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* shmdt */
 	case 230:
 		switch(ndx) {
 		case 0:
 			p = "const void *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* shmget */
 	case 231:
 		switch(ndx) {
 		case 0:
 			p = "key_t";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* clock_gettime */
 	case 232:
 		switch(ndx) {
 		case 0:
 			p = "clockid_t";
 			break;
 		case 1:
 			p = "struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* clock_settime */
 	case 233:
 		switch(ndx) {
 		case 0:
 			p = "clockid_t";
 			break;
 		case 1:
 			p = "const struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* clock_getres */
 	case 234:
 		switch(ndx) {
 		case 0:
 			p = "clockid_t";
 			break;
 		case 1:
 			p = "struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ktimer_create */
 	case 235:
 		switch(ndx) {
 		case 0:
 			p = "clockid_t";
 			break;
 		case 1:
 			p = "struct sigevent *";
 			break;
 		case 2:
 			p = "int *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ktimer_delete */
 	case 236:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ktimer_settime */
 	case 237:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const struct itimerspec *";
 			break;
 		case 3:
 			p = "struct itimerspec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ktimer_gettime */
 	case 238:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct itimerspec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ktimer_getoverrun */
 	case 239:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nanosleep */
 	case 240:
 		switch(ndx) {
 		case 0:
 			p = "const struct timespec *";
 			break;
 		case 1:
 			p = "struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ntp_gettime */
 	case 248:
 		switch(ndx) {
 		case 0:
 			p = "struct ntptimeval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* minherit */
 	case 250:
 		switch(ndx) {
 		case 0:
 			p = "void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* rfork */
 	case 251:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* openbsd_poll */
 	case 252:
 		switch(ndx) {
 		case 0:
 			p = "struct pollfd *";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* issetugid */
 	case 253:
 		break;
 	/* lchown */
 	case 254:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* aio_read */
 	case 255:
 		switch(ndx) {
 		case 0:
 			p = "struct aiocb *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* aio_write */
 	case 256:
 		switch(ndx) {
 		case 0:
 			p = "struct aiocb *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lio_listio */
 	case 257:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct aiocb *const *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "struct sigevent *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getdents */
 	case 272:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lchmod */
 	case 274:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "mode_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lchown */
 	case 275:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "uid_t";
 			break;
 		case 2:
 			p = "gid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lutimes */
 	case 276:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct timeval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* msync */
 	case 277:
 		switch(ndx) {
 		case 0:
 			p = "void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nstat */
 	case 278:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct nstat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nfstat */
 	case 279:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct nstat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nlstat */
 	case 280:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct nstat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* preadv */
 	case 289:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct iovec *";
 			break;
 		case 2:
 			p = "u_int";
 			break;
 		case 3:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* pwritev */
 	case 290:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct iovec *";
 			break;
 		case 2:
 			p = "u_int";
 			break;
 		case 3:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fhopen */
 	case 298:
 		switch(ndx) {
 		case 0:
 			p = "const struct fhandle *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fhstat */
 	case 299:
 		switch(ndx) {
 		case 0:
 			p = "const struct fhandle *";
 			break;
 		case 1:
 			p = "struct stat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* modnext */
 	case 300:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* modstat */
 	case 301:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct module_stat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* modfnext */
 	case 302:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* modfind */
 	case 303:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kldload */
 	case 304:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kldunload */
 	case 305:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kldfind */
 	case 306:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kldnext */
 	case 307:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kldstat */
 	case 308:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct kld_file_stat *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kldfirstmod */
 	case 309:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getsid */
 	case 310:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setresuid */
 	case 311:
 		switch(ndx) {
 		case 0:
 			p = "uid_t";
 			break;
 		case 1:
 			p = "uid_t";
 			break;
 		case 2:
 			p = "uid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setresgid */
 	case 312:
 		switch(ndx) {
 		case 0:
 			p = "gid_t";
 			break;
 		case 1:
 			p = "gid_t";
 			break;
 		case 2:
 			p = "gid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* aio_return */
 	case 314:
 		switch(ndx) {
 		case 0:
 			p = "struct aiocb *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* aio_suspend */
 	case 315:
 		switch(ndx) {
 		case 0:
 			p = "struct aiocb *const *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* aio_cancel */
 	case 316:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct aiocb *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* aio_error */
 	case 317:
 		switch(ndx) {
 		case 0:
 			p = "struct aiocb *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* oaio_read */
 	case 318:
 		switch(ndx) {
 		case 0:
 			p = "struct oaiocb *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* oaio_write */
 	case 319:
 		switch(ndx) {
 		case 0:
 			p = "struct oaiocb *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* olio_listio */
 	case 320:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct oaiocb *const *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "struct osigevent *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* yield */
 	case 321:
 		break;
 	/* mlockall */
 	case 324:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* munlockall */
 	case 325:
 		break;
 	/* __getcwd */
 	case 326:
 		switch(ndx) {
 		case 0:
 			p = "u_char *";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sched_setparam */
 	case 327:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		case 1:
 			p = "const struct sched_param *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sched_getparam */
 	case 328:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		case 1:
 			p = "struct sched_param *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sched_setscheduler */
 	case 329:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const struct sched_param *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sched_getscheduler */
 	case 330:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sched_yield */
 	case 331:
 		break;
 	/* sched_get_priority_max */
 	case 332:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sched_get_priority_min */
 	case 333:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sched_rr_get_interval */
 	case 334:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		case 1:
 			p = "struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* utrace */
 	case 335:
 		switch(ndx) {
 		case 0:
 			p = "const void *";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kldsym */
 	case 337:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "void *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* jail */
 	case 338:
 		switch(ndx) {
 		case 0:
 			p = "struct jail *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigprocmask */
 	case 340:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const sigset_t *";
 			break;
 		case 2:
 			p = "sigset_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigsuspend */
 	case 341:
 		switch(ndx) {
 		case 0:
 			p = "const sigset_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigpending */
 	case 343:
 		switch(ndx) {
 		case 0:
 			p = "sigset_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigtimedwait */
 	case 345:
 		switch(ndx) {
 		case 0:
 			p = "const sigset_t *";
 			break;
 		case 1:
 			p = "siginfo_t *";
 			break;
 		case 2:
 			p = "const struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigwaitinfo */
 	case 346:
 		switch(ndx) {
 		case 0:
 			p = "const sigset_t *";
 			break;
 		case 1:
 			p = "siginfo_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_get_file */
 	case 347:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_set_file */
 	case 348:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_get_fd */
 	case 349:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_set_fd */
 	case 350:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_delete_file */
 	case 351:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_delete_fd */
 	case 352:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_aclcheck_file */
 	case 353:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_aclcheck_fd */
 	case 354:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattrctl */
 	case 355:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_set_file */
 	case 356:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		case 3:
 			p = "void *";
 			break;
 		case 4:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_get_file */
 	case 357:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		case 3:
 			p = "void *";
 			break;
 		case 4:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_delete_file */
 	case 358:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* aio_waitcomplete */
 	case 359:
 		switch(ndx) {
 		case 0:
 			p = "struct aiocb **";
 			break;
 		case 1:
 			p = "struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getresuid */
 	case 360:
 		switch(ndx) {
 		case 0:
 			p = "uid_t *";
 			break;
 		case 1:
 			p = "uid_t *";
 			break;
 		case 2:
 			p = "uid_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getresgid */
 	case 361:
 		switch(ndx) {
 		case 0:
 			p = "gid_t *";
 			break;
 		case 1:
 			p = "gid_t *";
 			break;
 		case 2:
 			p = "gid_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kqueue */
 	case 362:
 		break;
 	/* kevent */
 	case 363:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct kevent *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "struct kevent *";
 			break;
 		case 4:
 			p = "int";
 			break;
 		case 5:
 			p = "const struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lkmressys */
 	case 370:
 		break;
 	/* extattr_set_fd */
 	case 371:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		case 3:
 			p = "void *";
 			break;
 		case 4:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_get_fd */
 	case 372:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		case 3:
 			p = "void *";
 			break;
 		case 4:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_delete_fd */
 	case 373:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __setugid */
 	case 374:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nfsclnt */
 	case 375:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "caddr_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* eaccess */
 	case 376:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* nmount */
 	case 378:
 		switch(ndx) {
 		case 0:
 			p = "struct iovec *";
 			break;
 		case 1:
 			p = "unsigned int";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_get_proc */
 	case 384:
 		switch(ndx) {
 		case 0:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_set_proc */
 	case 385:
 		switch(ndx) {
 		case 0:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_get_fd */
 	case 386:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_get_file */
 	case 387:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_set_fd */
 	case 388:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_set_file */
 	case 389:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kenv */
 	case 390:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const char *";
 			break;
 		case 2:
 			p = "char *";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lchflags */
 	case 391:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* uuidgen */
 	case 392:
 		switch(ndx) {
 		case 0:
 			p = "struct uuid *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sendfile */
 	case 393:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "off_t";
 			break;
 		case 3:
 			p = "size_t";
 			break;
 		case 4:
 			p = "struct sf_hdtr *";
 			break;
 		case 5:
 			p = "off_t *";
 			break;
 		case 6:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mac_syscall */
 	case 394:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "void *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getfsstat */
 	case 395:
 		switch(ndx) {
 		case 0:
 			p = "struct statfs *";
 			break;
 		case 1:
 			p = "long";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* statfs */
 	case 396:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "struct statfs *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fstatfs */
 	case 397:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct statfs *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fhstatfs */
 	case 398:
 		switch(ndx) {
 		case 0:
 			p = "const struct fhandle *";
 			break;
 		case 1:
 			p = "struct statfs *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_close */
 	case 400:
 		switch(ndx) {
 		case 0:
 			p = "semid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_post */
 	case 401:
 		switch(ndx) {
 		case 0:
 			p = "semid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_wait */
 	case 402:
 		switch(ndx) {
 		case 0:
 			p = "semid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_trywait */
 	case 403:
 		switch(ndx) {
 		case 0:
 			p = "semid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_init */
 	case 404:
 		switch(ndx) {
 		case 0:
 			p = "semid_t *";
 			break;
 		case 1:
 			p = "unsigned int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_open */
 	case 405:
 		switch(ndx) {
 		case 0:
 			p = "semid_t *";
 			break;
 		case 1:
 			p = "const char *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "mode_t";
 			break;
 		case 4:
 			p = "unsigned int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_unlink */
 	case 406:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_getvalue */
 	case 407:
 		switch(ndx) {
 		case 0:
 			p = "semid_t";
 			break;
 		case 1:
 			p = "int *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_destroy */
 	case 408:
 		switch(ndx) {
 		case 0:
 			p = "semid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_get_pid */
 	case 409:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		case 1:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_get_link */
 	case 410:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_set_link */
 	case 411:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_set_link */
 	case 412:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		case 3:
 			p = "void *";
 			break;
 		case 4:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_get_link */
 	case 413:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		case 3:
 			p = "void *";
 			break;
 		case 4:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_delete_link */
 	case 414:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __mac_execve */
 	case 415:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "char **";
 			break;
 		case 2:
 			p = "char **";
 			break;
 		case 3:
 			p = "struct mac *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigaction */
 	case 416:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const struct sigaction *";
 			break;
 		case 2:
 			p = "struct sigaction *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigreturn */
 	case 417:
 		switch(ndx) {
 		case 0:
 			p = "const struct __ucontext *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getcontext */
 	case 421:
 		switch(ndx) {
 		case 0:
 			p = "struct __ucontext *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setcontext */
 	case 422:
 		switch(ndx) {
 		case 0:
 			p = "const struct __ucontext *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* swapcontext */
 	case 423:
 		switch(ndx) {
 		case 0:
 			p = "struct __ucontext *";
 			break;
 		case 1:
 			p = "const struct __ucontext *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* swapoff */
 	case 424:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_get_link */
 	case 425:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_set_link */
 	case 426:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_delete_link */
 	case 427:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* __acl_aclcheck_link */
 	case 428:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "acl_type_t";
 			break;
 		case 2:
 			p = "struct acl *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigwait */
 	case 429:
 		switch(ndx) {
 		case 0:
 			p = "const sigset_t *";
 			break;
 		case 1:
 			p = "int *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_create */
 	case 430:
 		switch(ndx) {
 		case 0:
 			p = "ucontext_t *";
 			break;
 		case 1:
 			p = "long *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_exit */
 	case 431:
 		switch(ndx) {
 		case 0:
 			p = "long *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_self */
 	case 432:
 		switch(ndx) {
 		case 0:
 			p = "long *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_kill */
 	case 433:
 		switch(ndx) {
 		case 0:
 			p = "long";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* _umtx_lock */
 	case 434:
 		switch(ndx) {
 		case 0:
 			p = "struct umtx *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* _umtx_unlock */
 	case 435:
 		switch(ndx) {
 		case 0:
 			p = "struct umtx *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* jail_attach */
 	case 436:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_list_fd */
 	case 437:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "void *";
 			break;
 		case 3:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_list_file */
 	case 438:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "void *";
 			break;
 		case 3:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* extattr_list_link */
 	case 439:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "void *";
 			break;
 		case 3:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ksem_timedwait */
 	case 441:
 		switch(ndx) {
 		case 0:
 			p = "semid_t";
 			break;
 		case 1:
 			p = "const struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_suspend */
 	case 442:
 		switch(ndx) {
 		case 0:
 			p = "const struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_wake */
 	case 443:
 		switch(ndx) {
 		case 0:
 			p = "long";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kldunloadf */
 	case 444:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* audit */
 	case 445:
 		switch(ndx) {
 		case 0:
 			p = "const void *";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* auditon */
 	case 446:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "void *";
 			break;
 		case 2:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getauid */
 	case 447:
 		switch(ndx) {
 		case 0:
 			p = "uid_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setauid */
 	case 448:
 		switch(ndx) {
 		case 0:
 			p = "uid_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getaudit */
 	case 449:
 		switch(ndx) {
 		case 0:
 			p = "struct auditinfo *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setaudit */
 	case 450:
 		switch(ndx) {
 		case 0:
 			p = "struct auditinfo *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* getaudit_addr */
 	case 451:
 		switch(ndx) {
 		case 0:
 			p = "struct auditinfo_addr *";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* setaudit_addr */
 	case 452:
 		switch(ndx) {
 		case 0:
 			p = "struct auditinfo_addr *";
 			break;
 		case 1:
 			p = "u_int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* auditctl */
 	case 453:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* _umtx_op */
 	case 454:
 		switch(ndx) {
 		case 0:
 			p = "void *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "u_long";
 			break;
 		case 3:
 			p = "void *";
 			break;
 		case 4:
 			p = "void *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_new */
 	case 455:
 		switch(ndx) {
 		case 0:
 			p = "struct thr_param *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sigqueue */
 	case 456:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "void *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kmq_open */
 	case 457:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "mode_t";
 			break;
 		case 3:
 			p = "const struct mq_attr *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kmq_setattr */
 	case 458:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const struct mq_attr *";
 			break;
 		case 2:
 			p = "struct mq_attr *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kmq_timedreceive */
 	case 459:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "unsigned *";
 			break;
 		case 4:
 			p = "const struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kmq_timedsend */
 	case 460:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const char *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "unsigned";
 			break;
 		case 4:
 			p = "const struct timespec *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kmq_notify */
 	case 461:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const struct sigevent *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* kmq_unlink */
 	case 462:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* abort2 */
 	case 463:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "void **";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_set_name */
 	case 464:
 		switch(ndx) {
 		case 0:
 			p = "long";
 			break;
 		case 1:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* aio_fsync */
 	case 465:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct aiocb *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* rtprio_thread */
 	case 466:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "lwpid_t";
 			break;
 		case 2:
 			p = "struct rtprio *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sctp_peeloff */
 	case 471:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "uint32_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sctp_generic_sendmsg */
 	case 472:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "caddr_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "caddr_t";
 			break;
 		case 4:
 			p = "__socklen_t";
 			break;
 		case 5:
 			p = "struct sctp_sndrcvinfo *";
 			break;
 		case 6:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sctp_generic_sendmsg_iov */
 	case 473:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct iovec *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "caddr_t";
 			break;
 		case 4:
 			p = "__socklen_t";
 			break;
 		case 5:
 			p = "struct sctp_sndrcvinfo *";
 			break;
 		case 6:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* sctp_generic_recvmsg */
 	case 474:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "struct iovec *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "struct sockaddr *";
 			break;
 		case 4:
 			p = "__socklen_t *";
 			break;
 		case 5:
 			p = "struct sctp_sndrcvinfo *";
 			break;
 		case 6:
 			p = "int *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* pread */
 	case 475:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "void *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* pwrite */
 	case 476:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "const void *";
 			break;
 		case 2:
 			p = "size_t";
 			break;
 		case 3:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mmap */
 	case 477:
 		switch(ndx) {
 		case 0:
 			p = "caddr_t";
 			break;
 		case 1:
 			p = "size_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "int";
 			break;
 		case 4:
 			p = "int";
 			break;
 		case 5:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* lseek */
 	case 478:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "off_t";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* truncate */
 	case 479:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* ftruncate */
 	case 480:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "off_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* thr_kill2 */
 	case 481:
 		switch(ndx) {
 		case 0:
 			p = "pid_t";
 			break;
 		case 1:
 			p = "long";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* shm_open */
 	case 482:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "mode_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* shm_unlink */
 	case 483:
 		switch(ndx) {
 		case 0:
 			p = "const char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* cpuset */
 	case 484:
 		switch(ndx) {
 		case 0:
 			p = "cpusetid_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* cpuset_setid */
 	case 485:
 		switch(ndx) {
 		case 0:
 			p = "cpuwhich_t";
 			break;
 		case 1:
 			p = "id_t";
 			break;
 		case 2:
 			p = "cpusetid_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* cpuset_getid */
 	case 486:
 		switch(ndx) {
 		case 0:
 			p = "cpulevel_t";
 			break;
 		case 1:
 			p = "cpuwhich_t";
 			break;
 		case 2:
 			p = "id_t";
 			break;
 		case 3:
 			p = "cpusetid_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* cpuset_getaffinity */
 	case 487:
 		switch(ndx) {
 		case 0:
 			p = "cpulevel_t";
 			break;
 		case 1:
 			p = "cpuwhich_t";
 			break;
 		case 2:
 			p = "id_t";
 			break;
 		case 3:
 			p = "size_t";
 			break;
 		case 4:
 			p = "cpuset_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* cpuset_setaffinity */
 	case 488:
 		switch(ndx) {
 		case 0:
 			p = "cpulevel_t";
 			break;
 		case 1:
 			p = "cpuwhich_t";
 			break;
 		case 2:
 			p = "id_t";
 			break;
 		case 3:
 			p = "size_t";
 			break;
 		case 4:
 			p = "const cpuset_t *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* faccessat */
 	case 489:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fchmodat */
 	case 490:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "mode_t";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fchownat */
 	case 491:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "uid_t";
 			break;
 		case 3:
 			p = "gid_t";
 			break;
 		case 4:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fexecve */
 	case 492:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char **";
 			break;
 		case 2:
 			p = "char **";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* fstatat */
 	case 493:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "struct stat *";
 			break;
 		case 3:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* futimesat */
 	case 494:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "struct timeval *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* linkat */
 	case 495:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "char *";
 			break;
 		case 4:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mkdirat */
 	case 496:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "mode_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mkfifoat */
 	case 497:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "mode_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* mknodat */
 	case 498:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "mode_t";
 			break;
 		case 3:
 			p = "dev_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* openat */
 	case 499:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "mode_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* readlinkat */
 	case 500:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "char *";
 			break;
 		case 3:
 			p = "size_t";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* renameat */
 	case 501:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		case 3:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* symlinkat */
 	case 502:
 		switch(ndx) {
 		case 0:
 			p = "char *";
 			break;
 		case 1:
 			p = "int";
 			break;
 		case 2:
 			p = "char *";
 			break;
 		default:
 			break;
 		};
 		break;
 	/* unlinkat */
 	case 503:
 		switch(ndx) {
 		case 0:
 			p = "int";
 			break;
 		case 1:
 			p = "char *";
 			break;
 		case 2:
 			p = "int";
 			break;
 		default:
 			break;
 		};
 		break;
 	default:
 		break;
 	};
 	if (p != NULL)
 		strlcpy(desc, p, descsz);
 }
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c	(revision 178887)
+++ head/sys/kern/uipc_socket.c	(revision 178888)
@@ -1,2957 +1,2977 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004 The FreeBSD Foundation
  * Copyright (c) 2004-2007 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
  */
 
 /*
  * Comments on the socket life cycle:
  *
  * soalloc() sets of socket layer state for a socket, called only by
  * socreate() and sonewconn().  Socket layer private.
  *
  * sodealloc() tears down socket layer state for a socket, called only by
  * sofree() and sonewconn().  Socket layer private.
  *
  * pru_attach() associates protocol layer state with an allocated socket;
  * called only once, may fail, aborting socket allocation.  This is called
  * from socreate() and sonewconn().  Socket layer private.
  *
  * pru_detach() disassociates protocol layer state from an attached socket,
  * and will be called exactly once for sockets in which pru_attach() has
  * been successfully called.  If pru_attach() returned an error,
  * pru_detach() will not be called.  Socket layer private.
  *
  * pru_abort() and pru_close() notify the protocol layer that the last
  * consumer of a socket is starting to tear down the socket, and that the
  * protocol should terminate the connection.  Historically, pru_abort() also
  * detached protocol state from the socket state, but this is no longer the
  * case.
  *
  * socreate() creates a socket and attaches protocol state.  This is a public
  * interface that may be used by socket layer consumers to create new
  * sockets.
  *
  * sonewconn() creates a socket and attaches protocol state.  This is a
  * public interface  that may be used by protocols to create new sockets when
  * a new connection is received and will be available for accept() on a
  * listen socket.
  *
  * soclose() destroys a socket after possibly waiting for it to disconnect.
  * This is a public interface that socket consumers should use to close and
  * release a socket when done with it.
  *
  * soabort() destroys a socket without waiting for it to disconnect (used
  * only for incoming connections that are already partially or fully
  * connected).  This is used internally by the socket layer when clearing
  * listen socket queues (due to overflow or close on the listen socket), but
  * is also a public interface protocols may use to abort connections in
  * their incomplete listen queues should they no longer be required.  Sockets
  * placed in completed connection listen queues should not be aborted for
  * reasons described in the comment above the soclose() implementation.  This
  * is not a general purpose close routine, and except in the specific
  * circumstances described here, should not be used.
  *
  * sofree() will free a socket and its protocol state if all references on
  * the socket have been released, and is the public interface to attempt to
  * free a socket when a reference is removed.  This is a socket layer private
  * interface.
  *
  * NOTE: In addition to socreate() and soclose(), which provide a single
  * socket reference to the consumer to be managed as required, there are two
  * calls to explicitly manage socket references, soref(), and sorele().
  * Currently, these are generally required only when transitioning a socket
  * from a listen queue to a file descriptor, in order to prevent garbage
  * collection of the socket at an untimely moment.  For a number of reasons,
  * these interfaces are not preferred, and should be avoided.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_mac.h"
 #include "opt_zero.h"
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/domain.h>
 #include <sys/file.h>			/* for struct knote */
 #include <sys/kernel.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
+#include <net/route.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/jail.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 #ifdef COMPAT_IA32
 #include <sys/mount.h>
 #include <compat/freebsd32/freebsd32.h>
 
 extern struct sysentvec ia32_freebsd_sysvec;
 #endif
 
 static int	soreceive_rcvoob(struct socket *so, struct uio *uio,
 		    int flags);
 
 static void	filt_sordetach(struct knote *kn);
 static int	filt_soread(struct knote *kn, long hint);
 static void	filt_sowdetach(struct knote *kn);
 static int	filt_sowrite(struct knote *kn, long hint);
 static int	filt_solisten(struct knote *kn, long hint);
 
 static struct filterops solisten_filtops =
 	{ 1, NULL, filt_sordetach, filt_solisten };
 static struct filterops soread_filtops =
 	{ 1, NULL, filt_sordetach, filt_soread };
 static struct filterops sowrite_filtops =
 	{ 1, NULL, filt_sowdetach, filt_sowrite };
 
 uma_zone_t socket_zone;
 so_gen_t	so_gencnt;	/* generation count for sockets */
 
 int	maxsockets;
 
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 static int somaxconn = SOMAXCONN;
 static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS);
 /* XXX: we dont have SYSCTL_USHORT */
 SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW,
     0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection "
     "queue size");
 static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
 #ifdef ZERO_COPY_SOCKETS
 /* These aren't static because they're used in other files. */
 int so_zero_copy_send = 1;
 int so_zero_copy_receive = 1;
 SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
     "Zero copy controls");
 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
     &so_zero_copy_receive, 0, "Enable zero copy receive");
 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
     &so_zero_copy_send, 0, "Enable zero copy send");
 #endif /* ZERO_COPY_SOCKETS */
 
 /*
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
  */
 struct mtx accept_mtx;
 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
 
 /*
  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  * so_gencnt field.
  */
 static struct mtx so_global_mtx;
 MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
 
 /*
  * General IPC sysctl name space, used by sockets and a variety of other IPC
  * types.
  */
 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
 
 /*
  * Sysctl to get and set the maximum global sockets limit.  Notify protocols
  * of the change so that they can update their dependent limits as required.
  */
 static int
 sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
 {
 	int error, newmaxsockets;
 
 	newmaxsockets = maxsockets;
 	error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
 	if (error == 0 && req->newptr) {
 		if (newmaxsockets > maxsockets) {
 			maxsockets = newmaxsockets;
 			if (maxsockets > ((maxfiles / 4) * 3)) {
 				maxfiles = (maxsockets * 5) / 4;
 				maxfilesperproc = (maxfiles * 9) / 10;
 			}
 			EVENTHANDLER_INVOKE(maxsockets_change);
 		} else
 			error = EINVAL;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
     &maxsockets, 0, sysctl_maxsockets, "IU",
     "Maximum number of sockets avaliable");
 
 /*
  * Initialise maxsockets.
  */
 static void init_maxsockets(void *ignored)
 {
 	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 	maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
 
 /*
  * Socket operation routines.  These routines are called by the routines in
  * sys_socket.c or from a system process, and implement the semantics of
  * socket operations by switching out to the protocol specific routines.
  */
 
 /*
  * Get a socket structure from our zone, and initialize it.  Note that it
  * would probably be better to allocate socket and PCB at the same time, but
  * I'm not convinced that all the protocols can be easily modified to do
  * this.
  *
  * soalloc() returns a socket with a ref count of 0.
  */
 static struct socket *
 soalloc(void)
 {
 	struct socket *so;
 
 	so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO);
 	if (so == NULL)
 		return (NULL);
 #ifdef MAC
 	if (mac_socket_init(so, M_NOWAIT) != 0) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 #endif
 	SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
 	SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
 	sx_init(&so->so_snd.sb_sx, "so_snd_sx");
 	sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
 	TAILQ_INIT(&so->so_aiojobq);
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	++numopensockets;
 	mtx_unlock(&so_global_mtx);
 	return (so);
 }
 
 /*
  * Free the storage associated with a socket at the socket layer, tear down
  * locks, labels, etc.  All protocol state is assumed already to have been
  * torn down (and possibly never set up) by the caller.
  */
 static void
 sodealloc(struct socket *so)
 {
 
 	KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
 	KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL"));
 
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	--numopensockets;	/* Could be below, but faster here. */
 	mtx_unlock(&so_global_mtx);
 	if (so->so_rcv.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
 		    &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
 	if (so->so_snd.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
 		    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
 #ifdef INET
 	/* remove acccept filter if one is present. */
 	if (so->so_accf != NULL)
 		do_setopt_accept_filter(so, NULL);
 #endif
 #ifdef MAC
 	mac_socket_destroy(so);
 #endif
 	crfree(so->so_cred);
 	sx_destroy(&so->so_snd.sb_sx);
 	sx_destroy(&so->so_rcv.sb_sx);
 	SOCKBUF_LOCK_DESTROY(&so->so_snd);
 	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 	uma_zfree(socket_zone, so);
 }
 
 /*
  * socreate returns a socket with a ref count of 1.  The socket should be
  * closed with soclose().
  */
 int
 socreate(int dom, struct socket **aso, int type, int proto,
     struct ucred *cred, struct thread *td)
 {
 	struct protosw *prp;
 	struct socket *so;
 	int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 
 	if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL ||
 	    prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
 		return (EPROTONOSUPPORT);
 
 	if (jailed(cred) && jail_socket_unixiproute_only &&
 	    prp->pr_domain->dom_family != PF_LOCAL &&
 	    prp->pr_domain->dom_family != PF_INET &&
 	    prp->pr_domain->dom_family != PF_ROUTE) {
 		return (EPROTONOSUPPORT);
 	}
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	so = soalloc();
 	if (so == NULL)
 		return (ENOBUFS);
 
 	TAILQ_INIT(&so->so_incomp);
 	TAILQ_INIT(&so->so_comp);
 	so->so_type = type;
 	so->so_cred = crhold(cred);
+	if ((prp->pr_domain->dom_family == PF_INET) ||
+	    (prp->pr_domain->dom_family == PF_ROUTE))
+		so->so_fibnum = td->td_proc->p_fibnum;
+	else
+		so->so_fibnum = 0;
 	so->so_proto = prp;
 #ifdef MAC
 	mac_socket_create(cred, so);
 #endif
 	knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv),
 	    NULL, NULL, NULL);
 	knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd),
 	    NULL, NULL, NULL);
 	so->so_count = 1;
 	/*
 	 * Auto-sizing of socket buffers is managed by the protocols and
 	 * the appropriate flags must be set in the pru_attach function.
 	 */
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	if (error) {
 		KASSERT(so->so_count == 1, ("socreate: so_count %d",
 		    so->so_count));
 		so->so_count = 0;
 		sodealloc(so);
 		return (error);
 	}
 	*aso = so;
 	return (0);
 }
 
 #ifdef REGRESSION
 static int regression_sonewconn_earlytest = 1;
 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
     &regression_sonewconn_earlytest, 0, "Perform early sonewconn limit test");
 #endif
 
 /*
  * When an attempt at a new connection is noted on a socket which accepts
  * connections, sonewconn is called.  If the connection is possible (subject
  * to space constraints, etc.) then we allocate a new structure, propoerly
  * linked into the data structure of the original socket, and return this.
  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  *
  * Note: the ref count on the socket is 0 on return.
  */
 struct socket *
 sonewconn(struct socket *head, int connstatus)
 {
 	struct socket *so;
 	int over;
 
 	ACCEPT_LOCK();
 	over = (head->so_qlen > 3 * head->so_qlimit / 2);
 	ACCEPT_UNLOCK();
 #ifdef REGRESSION
 	if (regression_sonewconn_earlytest && over)
 #else
 	if (over)
 #endif
 		return (NULL);
 	so = soalloc();
 	if (so == NULL)
 		return (NULL);
 	if ((head->so_options & SO_ACCEPTFILTER) != 0)
 		connstatus = 0;
 	so->so_head = head;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	SOCK_LOCK(head);
 	mac_socket_newconn(head, so);
 	SOCK_UNLOCK(head);
 #endif
 	knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv),
 	    NULL, NULL, NULL);
 	knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd),
 	    NULL, NULL, NULL);
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
 	    (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		return (NULL);
 	}
 	so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
 	so->so_snd.sb_lowat = head->so_snd.sb_lowat;
 	so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
 	so->so_snd.sb_timeo = head->so_snd.sb_timeo;
 	so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
 	so->so_state |= connstatus;
 	ACCEPT_LOCK();
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_qstate |= SQ_COMP;
 		head->so_qlen++;
 	} else {
 		/*
 		 * Keep removing sockets from the head until there's room for
 		 * us to insert on the tail.  In pre-locking revisions, this
 		 * was a simple if(), but as we could be racing with other
 		 * threads and soabort() requires dropping locks, we must
 		 * loop waiting for the condition to be true.
 		 */
 		while (head->so_incqlen > head->so_qlimit) {
 			struct socket *sp;
 			sp = TAILQ_FIRST(&head->so_incomp);
 			TAILQ_REMOVE(&head->so_incomp, sp, so_list);
 			head->so_incqlen--;
 			sp->so_qstate &= ~SQ_INCOMP;
 			sp->so_head = NULL;
 			ACCEPT_UNLOCK();
 			soabort(sp);
 			ACCEPT_LOCK();
 		}
 		TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
 		so->so_qstate |= SQ_INCOMP;
 		head->so_incqlen++;
 	}
 	ACCEPT_UNLOCK();
 	if (connstatus) {
 		sorwakeup(head);
 		wakeup_one(&head->so_timeo);
 	}
 	return (so);
 }
 
 int
 sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return ((*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td));
 }
 
 /*
  * solisten() transitions a socket from a non-listening state to a listening
  * state, but can also be used to update the listen queue depth on an
  * existing listen socket.  The protocol will call back into the sockets
  * layer using solisten_proto_check() and solisten_proto() to check and set
  * socket-layer listen state.  Call backs are used so that the protocol can
  * acquire both protocol and socket layer locks in whatever order is required
  * by the protocol.
  *
  * Protocol implementors are advised to hold the socket lock across the
  * socket-layer test and set to avoid races at the socket layer.
  */
 int
 solisten(struct socket *so, int backlog, struct thread *td)
 {
 
 	return ((*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td));
 }
 
 int
 solisten_proto_check(struct socket *so)
 {
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
 	    SS_ISDISCONNECTING))
 		return (EINVAL);
 	return (0);
 }
 
 void
 solisten_proto(struct socket *so, int backlog)
 {
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->so_qlimit = backlog;
 	so->so_options |= SO_ACCEPTCONN;
 }
 
 /*
  * Attempt to free a socket.  This should really be sotryfree().
  *
  * sofree() will succeed if:
  *
  * - There are no outstanding file descriptor references or related consumers
  *   (so_count == 0).
  *
  * - The socket has been closed by user space, if ever open (SS_NOFDREF).
  *
  * - The protocol does not have an outstanding strong reference on the socket
  *   (SS_PROTOREF).
  *
  * - The socket is not in a completed connection queue, so a process has been
  *   notified that it is present.  If it is removed, the user process may
  *   block in accept() despite select() saying the socket was ready.
  *
  * Otherwise, it will quietly abort so that a future call to sofree(), when
  * conditions are right, can succeed.
  */
 void
 sofree(struct socket *so)
 {
 	struct protosw *pr = so->so_proto;
 	struct socket *head;
 
 	ACCEPT_LOCK_ASSERT();
 	SOCK_LOCK_ASSERT(so);
 
 	if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
 	    (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) {
 		SOCK_UNLOCK(so);
 		ACCEPT_UNLOCK();
 		return;
 	}
 
 	head = so->so_head;
 	if (head != NULL) {
 		KASSERT((so->so_qstate & SQ_COMP) != 0 ||
 		    (so->so_qstate & SQ_INCOMP) != 0,
 		    ("sofree: so_head != NULL, but neither SQ_COMP nor "
 		    "SQ_INCOMP"));
 		KASSERT((so->so_qstate & SQ_COMP) == 0 ||
 		    (so->so_qstate & SQ_INCOMP) == 0,
 		    ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP"));
 		TAILQ_REMOVE(&head->so_incomp, so, so_list);
 		head->so_incqlen--;
 		so->so_qstate &= ~SQ_INCOMP;
 		so->so_head = NULL;
 	}
 	KASSERT((so->so_qstate & SQ_COMP) == 0 &&
 	    (so->so_qstate & SQ_INCOMP) == 0,
 	    ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
 	    so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
 	if (so->so_options & SO_ACCEPTCONN) {
 		KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated"));
 		KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated"));
 	}
 	SOCK_UNLOCK(so);
 	ACCEPT_UNLOCK();
 
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
 	if (pr->pr_usrreqs->pru_detach != NULL)
 		(*pr->pr_usrreqs->pru_detach)(so);
 
 	/*
 	 * From this point on, we assume that no other references to this
 	 * socket exist anywhere else in the stack.  Therefore, no locks need
 	 * to be acquired or held.
 	 *
 	 * We used to do a lot of socket buffer and socket locking here, as
 	 * well as invoke sorflush() and perform wakeups.  The direct call to
 	 * dom_dispose() and sbrelease_internal() are an inlining of what was
 	 * necessary from sorflush().
 	 *
 	 * Notice that the socket buffer and kqueue state are torn down
 	 * before calling pru_detach.  This means that protocols shold not
 	 * assume they can perform socket wakeups, etc, in their detach code.
 	 */
 	sbdestroy(&so->so_snd, so);
 	sbdestroy(&so->so_rcv, so);
 	knlist_destroy(&so->so_rcv.sb_sel.si_note);
 	knlist_destroy(&so->so_snd.sb_sel.si_note);
 	sodealloc(so);
 }
 
 /*
  * Close a socket on last file table reference removal.  Initiate disconnect
  * if connected.  Free socket when disconnect complete.
  *
  * This function will sorele() the socket.  Note that soclose() may be called
  * prior to the ref count reaching zero.  The actual socket structure will
  * not be freed until the ref count reaches zero.
  */
 int
 soclose(struct socket *so)
 {
 	int error = 0;
 
 	KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
 
 	funsetown(&so->so_sigio);
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error)
 				goto drop;
 		}
 		if (so->so_options & SO_LINGER) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep(&so->so_timeo,
 				    PSOCK | PCATCH, "soclos", so->so_linger * hz);
 				if (error)
 					break;
 			}
 		}
 	}
 
 drop:
 	if (so->so_proto->pr_usrreqs->pru_close != NULL)
 		(*so->so_proto->pr_usrreqs->pru_close)(so);
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct socket *sp;
 		ACCEPT_LOCK();
 		while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
 			TAILQ_REMOVE(&so->so_incomp, sp, so_list);
 			so->so_incqlen--;
 			sp->so_qstate &= ~SQ_INCOMP;
 			sp->so_head = NULL;
 			ACCEPT_UNLOCK();
 			soabort(sp);
 			ACCEPT_LOCK();
 		}
 		while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
 			TAILQ_REMOVE(&so->so_comp, sp, so_list);
 			so->so_qlen--;
 			sp->so_qstate &= ~SQ_COMP;
 			sp->so_head = NULL;
 			ACCEPT_UNLOCK();
 			soabort(sp);
 			ACCEPT_LOCK();
 		}
 		ACCEPT_UNLOCK();
 	}
 	ACCEPT_LOCK();
 	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
 	so->so_state |= SS_NOFDREF;
 	sorele(so);
 	return (error);
 }
 
 /*
  * soabort() is used to abruptly tear down a connection, such as when a
  * resource limit is reached (listen queue depth exceeded), or if a listen
  * socket is closed while there are sockets waiting to be accepted.
  *
  * This interface is tricky, because it is called on an unreferenced socket,
  * and must be called only by a thread that has actually removed the socket
  * from the listen queue it was on, or races with other threads are risked.
  *
  * This interface will call into the protocol code, so must not be called
  * with any socket locks held.  Protocols do call it while holding their own
  * recursible protocol mutexes, but this is something that should be subject
  * to review in the future.
  */
 void
 soabort(struct socket *so)
 {
 
 	/*
 	 * In as much as is possible, assert that no references to this
 	 * socket are held.  This is not quite the same as asserting that the
 	 * current thread is responsible for arranging for no references, but
 	 * is as close as we can get for now.
 	 */
 	KASSERT(so->so_count == 0, ("soabort: so_count"));
 	KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
 	KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
 	KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
 	KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
 
 	if (so->so_proto->pr_usrreqs->pru_abort != NULL)
 		(*so->so_proto->pr_usrreqs->pru_abort)(so);
 	ACCEPT_LOCK();
 	SOCK_LOCK(so);
 	sofree(so);
 }
 
 int
 soaccept(struct socket *so, struct sockaddr **nam)
 {
 	int error;
 
 	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
 	so->so_state &= ~SS_NOFDREF;
 	SOCK_UNLOCK(so);
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	return (error);
 }
 
 int
 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.  This allows
 	 * user to disconnect by connecting to, e.g., a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so)))) {
 		error = EISCONN;
 	} else {
 		/*
 		 * Prevent accumulated error from previous connection from
 		 * biting us.
 		 */
 		so->so_error = 0;
 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
 	}
 
 	return (error);
 }
 
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
 
 	return ((*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2));
 }
 
 int
 sodisconnect(struct socket *so)
 {
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	if (so->so_state & SS_ISDISCONNECTING)
 		return (EALREADY);
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 	return (error);
 }
 
 #ifdef ZERO_COPY_SOCKETS
 struct so_zerocopy_stats{
 	int size_ok;
 	int align_ok;
 	int found_ifp;
 };
 struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
 #include <netinet/in.h>
 #include <net/route.h>
 #include <netinet/in_pcb.h>
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 
 /*
  * sosend_copyin() is only used if zero copy sockets are enabled.  Otherwise
  * sosend_dgram() and sosend_generic() use m_uiotombuf().
  * 
  * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or
  * all of the data referenced by the uio.  If desired, it uses zero-copy.
  * *space will be updated to reflect data copied in.
  *
  * NB: If atomic I/O is requested, the caller must already have checked that
  * space can hold resid bytes.
  *
  * NB: In the event of an error, the caller may need to free the partial
  * chain pointed to by *mpp.  The contents of both *uio and *space may be
  * modified even in the case of an error.
  */
 static int
 sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
     int flags)
 {
 	struct mbuf *m, **mp, *top;
 	long len, resid;
 	int error;
 #ifdef ZERO_COPY_SOCKETS
 	int cow_send;
 #endif
 
 	*retmp = top = NULL;
 	mp = &top;
 	len = 0;
 	resid = uio->uio_resid;
 	error = 0;
 	do {
 #ifdef ZERO_COPY_SOCKETS
 		cow_send = 0;
 #endif /* ZERO_COPY_SOCKETS */
 		if (resid >= MINCLSIZE) {
 #ifdef ZERO_COPY_SOCKETS
 			if (top == NULL) {
 				m = m_gethdr(M_WAITOK, MT_DATA);
 				m->m_pkthdr.len = 0;
 				m->m_pkthdr.rcvif = NULL;
 			} else
 				m = m_get(M_WAITOK, MT_DATA);
 			if (so_zero_copy_send &&
 			    resid>=PAGE_SIZE &&
 			    *space>=PAGE_SIZE &&
 			    uio->uio_iov->iov_len>=PAGE_SIZE) {
 				so_zerocp_stats.size_ok++;
 				so_zerocp_stats.align_ok++;
 				cow_send = socow_setup(m, uio);
 				len = cow_send;
 			}
 			if (!cow_send) {
 				m_clget(m, M_WAITOK);
 				len = min(min(MCLBYTES, resid), *space);
 			}
 #else /* ZERO_COPY_SOCKETS */
 			if (top == NULL) {
 				m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
 				m->m_pkthdr.len = 0;
 				m->m_pkthdr.rcvif = NULL;
 			} else
 				m = m_getcl(M_WAIT, MT_DATA, 0);
 			len = min(min(MCLBYTES, resid), *space);
 #endif /* ZERO_COPY_SOCKETS */
 		} else {
 			if (top == NULL) {
 				m = m_gethdr(M_WAIT, MT_DATA);
 				m->m_pkthdr.len = 0;
 				m->m_pkthdr.rcvif = NULL;
 
 				len = min(min(MHLEN, resid), *space);
 				/*
 				 * For datagram protocols, leave room
 				 * for protocol headers in first mbuf.
 				 */
 				if (atomic && m && len < MHLEN)
 					MH_ALIGN(m, len);
 			} else {
 				m = m_get(M_WAIT, MT_DATA);
 				len = min(min(MLEN, resid), *space);
 			}
 		}
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 
 		*space -= len;
 #ifdef ZERO_COPY_SOCKETS
 		if (cow_send)
 			error = 0;
 		else
 #endif /* ZERO_COPY_SOCKETS */
 		error = uiomove(mtod(m, void *), (int)len, uio);
 		resid = uio->uio_resid;
 		m->m_len = len;
 		*mp = m;
 		top->m_pkthdr.len += len;
 		if (error)
 			goto out;
 		mp = &m->m_next;
 		if (resid <= 0) {
 			if (flags & MSG_EOR)
 				top->m_flags |= M_EOR;
 			break;
 		}
 	} while (*space > 0 && atomic);
 out:
 	*retmp = top;
 	return (error);
 }
 #endif /*ZERO_COPY_SOCKETS*/
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
 int
 sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space, resid;
 	int clen = 0, error, dontroute;
 #ifdef ZERO_COPY_SOCKETS
 	int atomic = sosendallatonce(so) || top;
 #endif
 
 	KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM"));
 	KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
 	    ("sodgram_send: !PR_ATOMIC"));
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		error = EPIPE;
 		goto out;
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		goto out;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		/*
 		 * `sendto' and `sendmsg' is allowed on a connection-based
 		 * socket if it supports implied connect.  Return ENOTCONN if
 		 * not connected and no address is supplied.
 		 */
 		if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 		    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 			if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 			    !(resid == 0 && clen != 0)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = ENOTCONN;
 				goto out;
 			}
 		} else if (addr == NULL) {
 			if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 				error = ENOTCONN;
 			else
 				error = EDESTADDRREQ;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto out;
 		}
 	}
 
 	/*
 	 * Do we need MSG_OOB support in SOCK_DGRAM?  Signs here may be a
 	 * problem and need fixing.
 	 */
 	space = sbspace(&so->so_snd);
 	if (flags & MSG_OOB)
 		space += 1024;
 	space -= clen;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	if (resid > space) {
 		error = EMSGSIZE;
 		goto out;
 	}
 	if (uio == NULL) {
 		resid = 0;
 		if (flags & MSG_EOR)
 			top->m_flags |= M_EOR;
 	} else {
 #ifdef ZERO_COPY_SOCKETS
 		error = sosend_copyin(uio, &top, atomic, &space, flags);
 		if (error)
 			goto out;
 #else
 		/*
 		 * Copy the data from userland into a mbuf chain.
 		 * If no data is to be copied in, a single empty mbuf
 		 * is returned.
 		 */
 		top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
 		    (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
 		if (top == NULL) {
 			error = EFAULT;	/* only possible error */
 			goto out;
 		}
 		space -= resid - uio->uio_resid;
 #endif
 		resid = uio->uio_resid;
 	}
 	KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
 	/*
 	 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
 	 * than with.
 	 */
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options |= SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	/*
 	 * XXX all the SBS_CANTSENDMORE checks previously done could be out
 	 * of date.  We could have recieved a reset packet in an interrupt or
 	 * maybe we slept while doing page faults in uiomove() etc.  We could
 	 * probably recheck again inside the locking protection here, but
 	 * there are probably other places that this also happens.  We must
 	 * rethink this.
 	 */
 	error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 	    (flags & MSG_OOB) ? PRUS_OOB :
 	/*
 	 * If the user set MSG_EOF, the protocol understands this flag and
 	 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND.
 	 */
 	    ((flags & MSG_EOF) &&
 	     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 	     (resid <= 0)) ?
 		PRUS_EOF :
 		/* If there is more to send set PRUS_MORETOCOME */
 		(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 		top, addr, control, td);
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options &= ~SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	clen = 0;
 	control = NULL;
 	top = NULL;
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Send on a socket.  If send must go all at once and message is larger than
  * send buffering, then hard error.  Lock against other senders.  If must go
  * all at once and not enough room now, then inform user that this would
  * block and do nothing.  Otherwise, if nonblocking, send as much as
  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
  * in mbuf chain must be small enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers must check for short
  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
  * on return.
  */
 int
 sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space, resid;
 	int clen = 0, error, dontroute;
 	int atomic = sosendallatonce(so) || top;
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 
 restart:
 	do {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EPIPE;
 			goto release;
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0)) {
 					SOCKBUF_UNLOCK(&so->so_snd);
 					error = ENOTCONN;
 					goto release;
 				}
 			} else if (addr == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 					error = ENOTCONN;
 				else
 					error = EDESTADDRREQ;
 				goto release;
 			}
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EMSGSIZE;
 			goto release;
 		}
 		if (space < resid + clen &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EWOULDBLOCK;
 				goto release;
 			}
 			error = sbwait(&so->so_snd);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
 			goto restart;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		space -= clen;
 		do {
 			if (uio == NULL) {
 				resid = 0;
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 			} else {
 #ifdef ZERO_COPY_SOCKETS
 				error = sosend_copyin(uio, &top, atomic,
 				    &space, flags);
 				if (error != 0)
 					goto release;
 #else
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If no data is to be copied in,
 				 * a single empty mbuf is returned.
 				 */
 				top = m_uiotombuf(uio, M_WAITOK, space,
 				    (atomic ? max_hdr : 0),
 				    (atomic ? M_PKTHDR : 0) |
 				    ((flags & MSG_EOR) ? M_EOR : 0));
 				if (top == NULL) {
 					error = EFAULT; /* only possible error */
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
 #endif
 				resid = uio->uio_resid;
 			}
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options |= SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			/*
 			 * XXX all the SBS_CANTSENDMORE checks previously
 			 * done could be out of date.  We could have recieved
 			 * a reset packet in an interrupt or maybe we slept
 			 * while doing page faults in uiomove() etc.  We
 			 * could probably recheck again inside the locking
 			 * protection here, but there are probably other
 			 * places that this also happens.  We must rethink
 			 * this.
 			 */
 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			    (flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol understands
 			 * this flag and nothing left to send then use
 			 * PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			    ((flags & MSG_EOF) &&
 			     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			     (resid <= 0)) ?
 				PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME. */
 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 			    top, addr, control, td);
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options &= ~SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			clen = 0;
 			control = NULL;
 			top = NULL;
 			if (error)
 				goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 int
 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 
 	/* XXXRW: Temporary debugging. */
 	KASSERT(so->so_proto->pr_usrreqs->pru_sosend != sosend,
 	    ("sosend: protocol calls sosend"));
 
 	return (so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
 	    control, flags, td));
 }
 
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
  * which this code originated.
  *
  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
  * unable to return an mbuf chain to the caller.
  */
 static int
 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 {
 	struct protosw *pr = so->so_proto;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 
 	m = m_get(M_WAIT, MT_DATA);
 	error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 	if (error)
 		goto bad;
 	do {
 #ifdef ZERO_COPY_SOCKETS
 		if (so_zero_copy_receive) {
 			int disposable;
 
 			if ((m->m_flags & M_EXT)
 			 && (m->m_ext.ext_type == EXT_DISPOSABLE))
 				disposable = 1;
 			else
 				disposable = 0;
 
 			error = uiomoveco(mtod(m, void *),
 					  min(uio->uio_resid, m->m_len),
 					  uio, disposable);
 		} else
 #endif /* ZERO_COPY_SOCKETS */
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
 	} while (uio->uio_resid && error == 0 && m);
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Following replacement or removal of the first mbuf on the first mbuf chain
  * of a socket buffer, push necessary state changes back into the socket
  * buffer so that other consumers see the values consistently.  'nextrecord'
  * is the callers locally stored value of the original value of
  * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
  * NOTE: 'nextrecord' may be NULL.
  */
 static __inline void
 sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	/*
 	 * First, update for the new value of nextrecord.  If necessary, make
 	 * it the first record.
 	 */
 	if (sb->sb_mb != NULL)
 		sb->sb_mb->m_nextpkt = nextrecord;
 	else
 		sb->sb_mb = nextrecord;
 
         /*
          * Now update any dependent socket buffer fields to reflect the new
          * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
 	 * addition of a second clause that takes care of the case where
 	 * sb_mb has been updated, but remains the last record.
          */
         if (sb->sb_mb == NULL) {
                 sb->sb_mbtail = NULL;
                 sb->sb_lastrecord = NULL;
         } else if (sb->sb_mb->m_nextpkt == NULL)
                 sb->sb_lastrecord = sb->sb_mb;
 }
 
 
 /*
  * Implement receive operations on a socket.  We depend on the way that
  * records are added to the sockbuf by sbappend.  In particular, each record
  * (mbufs linked through m_next) must begin with an address if the protocol
  * so specifies, followed by an optional mbuf or mbufs containing ancillary
  * data, and then zero or more mbufs of data.  In order to allow parallelism
  * between network receive and copying to user space, as well as avoid
  * sleeping with a mutex held, we release the socket buffer mutex during the
  * user space copy.  Although the sockbuf is locked, new data may still be
  * appended, and thus we must maintain consistency of the sockbuf during that
  * time.
  *
  * The caller may receive the data as a single mbuf chain by supplying an
  * mbuf **mp0 for use in returning the chain.  The uio is then used only for
  * the count in uio_resid.
  */
 int
 soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, **mp;
 	int flags, len, error, offset;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	int orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp != NULL)
 		*mp = NULL;
 	if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
 	    && uio->uio_resid)
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 
 restart:
 	SOCKBUF_LOCK(&so->so_rcv);
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more (subject
 	 * to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
 	 *	receive operation at once if we block (resid <= hiwat).
 	 *   3. MSG_DONTWAIT is not set
 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
 	 * we have to do the receive in sections, and thus risk returning a
 	 * short count if a timeout or signal occurs after we start.
 	 */
 	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 	    so->so_rcv.sb_cc < uio->uio_resid) &&
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
 		KASSERT(m != NULL || !so->so_rcv.sb_cc,
 		    ("receive: m == %p so->so_rcv.sb_cc == %u",
 		    m, so->so_rcv.sb_cc));
 		if (so->so_error) {
 			if (m != NULL)
 				goto dontblock;
 			error = so->so_error;
 			if ((flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			if (m == NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			} else
 				goto dontblock;
 		}
 		for (; m != NULL; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		if (error)
 			goto release;
 		goto restart;
 	}
 dontblock:
 	/*
 	 * From this point onward, we maintain 'nextrecord' as a cache of the
 	 * pointer to the next record in the socket buffer.  We must keep the
 	 * various socket buffer pointers and local stack versions of the
 	 * pointers in sync, pushing out modifications before dropping the
 	 * socket buffer mutex, and re-reading them when picking it up.
 	 *
 	 * Otherwise, we will race with the network stack appending new data
 	 * or records onto the socket buffer by using inconsistent/stale
 	 * versions of the field, possibly resulting in socket buffer
 	 * corruption.
 	 *
 	 * By holding the high-level sblock(), we prevent simultaneous
 	 * readers from pulling off the front of the socket buffer.
 	 */
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb"));
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		orig_resid = 0;
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		}
 	}
 
 	/*
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
 	 * just copy the data; if !MSG_PEEK, we call into the protocol to
 	 * perform externalization (or freeing if controlp == NULL).
 	 */
 	if (m != NULL && m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 
 		do {
 			if (flags & MSG_PEEK) {
 				if (controlp != NULL) {
 					*controlp = m_copy(m, 0, m->m_len);
 					controlp = &(*controlp)->m_next;
 				}
 				m = m->m_next;
 			} else {
 				sbfree(&so->so_rcv, m);
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = NULL;
 				*cme = m;
 				cme = &(*cme)->m_next;
 				m = so->so_rcv.sb_mb;
 			}
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		if ((flags & MSG_PEEK) == 0)
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp);
 				SOCKBUF_LOCK(&so->so_rcv);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				orig_resid = 0;
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 		if (m != NULL)
 			nextrecord = so->so_rcv.sb_mb->m_nextpkt;
 		else
 			nextrecord = so->so_rcv.sb_mb;
 		orig_resid = 0;
 	}
 	if (m != NULL) {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(m->m_nextpkt == nextrecord,
 			    ("soreceive: post-control, nextrecord !sync"));
 			if (nextrecord == NULL) {
 				KASSERT(so->so_rcv.sb_mb == m,
 				    ("soreceive: post-control, sb_mb!=m"));
 				KASSERT(so->so_rcv.sb_lastrecord == m,
 				    ("soreceive: post-control, lastrecord!=m"));
 			}
 		}
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	} else {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(so->so_rcv.sb_mb == nextrecord,
 			    ("soreceive: sb_mb != nextrecord"));
 			if (so->so_rcv.sb_mb == NULL) {
 				KASSERT(so->so_rcv.sb_lastrecord == NULL,
 				    ("soreceive: sb_lastercord != NULL"));
 			}
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 
 	/*
 	 * Now continue to read any data mbufs off of the head of the socket
 	 * buffer until the read request is satisfied.  Note that 'type' is
 	 * used to store the type of any mbuf reads that have happened so far
 	 * such that soreceive() can stop reading if the type changes, which
 	 * causes soreceive() to return only one of regular data and inline
 	 * out-of-band data in a single socket receive operation.
 	 */
 	moff = 0;
 	offset = 0;
 	while (m != NULL && uio->uio_resid > 0 && error == 0) {
 		/*
 		 * If the type of mbuf has changed since the last mbuf
 		 * examined ('type'), end the receive operation.
 	 	 */
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (m->m_type == MT_OOBDATA) {
 			if (type != MT_OOBDATA)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 		else
 		    KASSERT(m->m_type == MT_DATA,
 			("m->m_type == %d", m->m_type));
 		so->so_rcv.sb_state &= ~SBS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.  Otherwise copy
 		 * them out via the uio, then free.  Sockbuf must be
 		 * consistent here (points to current mbuf, it points to next
 		 * record) when we drop priority; we must note any additions
 		 * to the sockbuf when we block interrupts again.
 		 */
 		if (mp == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 #ifdef ZERO_COPY_SOCKETS
 			if (so_zero_copy_receive) {
 				int disposable;
 
 				if ((m->m_flags & M_EXT)
 				 && (m->m_ext.ext_type == EXT_DISPOSABLE))
 					disposable = 1;
 				else
 					disposable = 0;
 
 				error = uiomoveco(mtod(m, char *) + moff,
 						  (int)len, uio,
 						  disposable);
 			} else
 #endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {
 				/*
 				 * The MT_SONAME mbuf has already been removed
 				 * from the record, so it is necessary to
 				 * remove the data mbufs, if any, to preserve
 				 * the invariant in the case of PR_ADDR that
 				 * requires MT_SONAME mbufs at the head of
 				 * each record.
 				 */
 				if (m && pr->pr_flags & PR_ATOMIC &&
 				    ((flags & MSG_PEEK) == 0))
 					(void)sbdroprecord_locked(&so->so_rcv);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		} else
 			uio->uio_resid -= len;
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp != NULL) {
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = NULL;
 				} else {
 					so->so_rcv.sb_mb = m_free(m);
 					m = so->so_rcv.sb_mb;
 				}
 				sockbuf_pushsync(&so->so_rcv, nextrecord);
 				SBLASTRECORDCHK(&so->so_rcv);
 				SBLASTMBUFCHK(&so->so_rcv);
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp != NULL) {
 					int copy_flag;
 
 					if (flags & MSG_DONTWAIT)
 						copy_flag = M_DONTWAIT;
 					else
 						copy_flag = M_WAIT;
 					if (copy_flag == M_WAIT)
 						SOCKBUF_UNLOCK(&so->so_rcv);
 					*mp = m_copym(m, 0, len, copy_flag);
 					if (copy_flag == M_WAIT)
 						SOCKBUF_LOCK(&so->so_rcv);
  					if (*mp == NULL) {
  						/*
  						 * m_copym() couldn't
 						 * allocate an mbuf.  Adjust
 						 * uio_resid back (it was
 						 * adjusted down by len
 						 * bytes, which we didn't end
 						 * up "copying" over).
  						 */
  						uio->uio_resid += len;
  						break;
  					}
 				}
 				m->m_data += len;
 				m->m_len -= len;
 				so->so_rcv.sb_cc -= len;
 			}
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_rcv.sb_state |= SBS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket), we
 		 * must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return with a
 		 * short count but without error.  Keep sockbuf locked
 		 * against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && nextrecord == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				break;
 			/*
 			 * Notify the protocol that some data has been
 			 * drained before blocking.
 			 */
 			if (pr->pr_flags & PR_WANTRCVD) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			}
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			error = sbwait(&so->so_rcv);
 			if (error) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 			m = so->so_rcv.sb_mb;
 			if (m != NULL)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (m != NULL && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord_locked(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == NULL) {
 			/*
 			 * First part is an inline SB_EMPTY_FIXUP().  Second
 			 * part makes sure sb_lastrecord is up-to-date if
 			 * there is still data in the socket buffer.
 			 */
 			so->so_rcv.sb_mb = nextrecord;
 			if (so->so_rcv.sb_mb == NULL) {
 				so->so_rcv.sb_mbtail = NULL;
 				so->so_rcv.sb_lastrecord = NULL;
 			} else if (nextrecord->m_nextpkt == NULL)
 				so->so_rcv.sb_lastrecord = nextrecord;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		/*
 		 * If soreceive() is being done from the socket callback,
 		 * then don't need to generate ACK to peer to update window,
 		 * since ACK will be generated on return to TCP.
 		 */
 		if (!(flags & MSG_SOCALLBCK) &&
 		    (pr->pr_flags & PR_WANTRCVD)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(&so->so_rcv);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		goto restart;
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (flagsp != NULL)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
 	return (error);
 }
 
 int
 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 
 	/* XXXRW: Temporary debugging. */
 	KASSERT(so->so_proto->pr_usrreqs->pru_soreceive != soreceive,
 	    ("soreceive: protocol calls soreceive"));
 
 	return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
 	    controlp, flagsp));
 }
 
 int
 soshutdown(struct socket *so, int how)
 {
 	struct protosw *pr = so->so_proto;
 
 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 		return (EINVAL);
 	if (pr->pr_usrreqs->pru_flush != NULL) {
 	        (*pr->pr_usrreqs->pru_flush)(so, how);
 	}
 	if (how != SHUT_WR)
 		sorflush(so);
 	if (how != SHUT_RD)
 		return ((*pr->pr_usrreqs->pru_shutdown)(so));
 	return (0);
 }
 
 void
 sorflush(struct socket *so)
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct protosw *pr = so->so_proto;
 	struct sockbuf asb;
 
 	/*
 	 * In order to avoid calling dom_dispose with the socket buffer mutex
 	 * held, and in order to generally avoid holding the lock for a long
 	 * time, we make a copy of the socket buffer and clear the original
 	 * (except locks, state).  The new socket buffer copy won't have
 	 * initialized locks so we can only call routines that won't use or
 	 * assert those locks.
 	 *
 	 * Dislodge threads currently blocked in receive and wait to acquire
 	 * a lock against other simultaneous readers before clearing the
 	 * socket buffer.  Don't let our acquire be interrupted by a signal
 	 * despite any existing socket disposition on interruptable waiting.
 	 */
 	socantrcvmore(so);
 	(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
 
 	/*
 	 * Invalidate/clear most of the sockbuf structure, but leave selinfo
 	 * and mutex data unchanged.
 	 */
 	SOCKBUF_LOCK(sb);
 	bzero(&asb, offsetof(struct sockbuf, sb_startzero));
 	bcopy(&sb->sb_startzero, &asb.sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	bzero(&sb->sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 
 	/*
 	 * Dispose of special rights and flush the socket buffer.  Don't call
 	 * any unsafe routines (that rely on locks being initialized) on asb.
 	 */
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
 	sbrelease_internal(&asb, so);
 }
 
 /*
  * Perhaps this routine, and sooptcopyout(), below, ought to come in an
  * additional variant to handle the case where the option value needs to be
  * some kind of integer, but not a specific size.  In addition to their use
  * here, these functions are also called by the protocol-level pr_ctloutput()
  * routines.
  */
 int
 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
 {
 	size_t	valsize;
 
 	/*
 	 * If the user gives us more than we wanted, we ignore it, but if we
 	 * don't get the minimum length the caller wants, we return EINVAL.
 	 * On success, sopt->sopt_valsize is set to however much we actually
 	 * retrieved.
 	 */
 	if ((valsize = sopt->sopt_valsize) < minlen)
 		return EINVAL;
 	if (valsize > len)
 		sopt->sopt_valsize = valsize = len;
 
 	if (sopt->sopt_td != NULL)
 		return (copyin(sopt->sopt_val, buf, valsize));
 
 	bcopy(sopt->sopt_val, buf, valsize);
 	return (0);
 }
 
 /*
  * Kernel version of setsockopt(2).
  *
  * XXX: optlen is size_t, not socklen_t
  */
 int
 so_setsockopt(struct socket *so, int level, int optname, void *optval,
     size_t optlen)
 {
 	struct sockopt sopt;
 
 	sopt.sopt_level = level;
 	sopt.sopt_name = optname;
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_val = optval;
 	sopt.sopt_valsize = optlen;
 	sopt.sopt_td = NULL;
 	return (sosetopt(so, &sopt));
 }
 
 int
 sosetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 	u_long  val;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput)
 			return ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		error = ENOPROTOOPT;
 	} else {
 		switch (sopt->sopt_name) {
 #ifdef INET
 		case SO_ACCEPTFILTER:
 			error = do_setopt_accept_filter(so, sopt);
 			if (error)
 				goto bad;
 			break;
 #endif
 		case SO_LINGER:
 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 			if (error)
 				goto bad;
 
 			SOCK_LOCK(so);
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				goto bad;
 			SOCK_LOCK(so);
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
 			SOCK_UNLOCK(so);
 			break;
 
+		case SO_SETFIB:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (optval < 1 || optval > rt_numfibs) {
+				error = EINVAL;
+				goto bad;
+			}
+			if ((so->so_proto->pr_domain->dom_family == PF_INET) ||
+			    (so->so_proto->pr_domain->dom_family == PF_ROUTE)) {
+				so->so_fibnum = optval;
+			} else {
+				so->so_fibnum = 0;
+			}
+			break;
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				goto bad;
 
 			/*
 			 * Values < 1 make no sense for any of these options,
 			 * so disallow them.
 			 */
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			switch (sopt->sopt_name) {
 			case SO_SNDBUF:
 			case SO_RCVBUF:
 				if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
 				    &so->so_snd : &so->so_rcv, (u_long)optval,
 				    so, curthread) == 0) {
 					error = ENOBUFS;
 					goto bad;
 				}
 				(sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
 				    &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE;
 				break;
 
 			/*
 			 * Make sure the low-water is never greater than the
 			 * high-water.
 			 */
 			case SO_SNDLOWAT:
 				SOCKBUF_LOCK(&so->so_snd);
 				so->so_snd.sb_lowat =
 				    (optval > so->so_snd.sb_hiwat) ?
 				    so->so_snd.sb_hiwat : optval;
 				SOCKBUF_UNLOCK(&so->so_snd);
 				break;
 			case SO_RCVLOWAT:
 				SOCKBUF_LOCK(&so->so_rcv);
 				so->so_rcv.sb_lowat =
 				    (optval > so->so_rcv.sb_hiwat) ?
 				    so->so_rcv.sb_hiwat : optval;
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				break;
 			}
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 #ifdef COMPAT_IA32
 			if (curthread->td_proc->p_sysent == &ia32_freebsd_sysvec) {
 				struct timeval32 tv32;
 
 				error = sooptcopyin(sopt, &tv32, sizeof tv32,
 				    sizeof tv32);
 				CP(tv32, tv, tv_sec);
 				CP(tv32, tv, tv_usec);
 			} else
 #endif
 				error = sooptcopyin(sopt, &tv, sizeof tv,
 				    sizeof tv);
 			if (error)
 				goto bad;
 
 			/* assert(hz > 0); */
 			if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz ||
 			    tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
 				error = EDOM;
 				goto bad;
 			}
 			/* assert(tick > 0); */
 			/* assert(ULONG_MAX - INT_MAX >= 1000000); */
 			val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
 			if (val > INT_MAX) {
 				error = EDOM;
 				goto bad;
 			}
 			if (val == 0 && tv.tv_usec != 0)
 				val = 1;
 
 			switch (sopt->sopt_name) {
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof extmac,
 			    sizeof extmac);
 			if (error)
 				goto bad;
 			error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto != NULL &&
 		    so->so_proto->pr_ctloutput != NULL) {
 			(void) ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		}
 	}
 bad:
 	return (error);
 }
 
 /*
  * Helper routine for getsockopt.
  */
 int
 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 {
 	int	error;
 	size_t	valsize;
 
 	error = 0;
 
 	/*
 	 * Documented get behavior is that we always return a value, possibly
 	 * truncated to fit in the user's buffer.  Traditional behavior is
 	 * that we always tell the user precisely how much we copied, rather
 	 * than something useful like the total amount we had available for
 	 * her.  Note that this interface is not idempotent; the entire
 	 * answer must generated ahead of time.
 	 */
 	valsize = min(len, sopt->sopt_valsize);
 	sopt->sopt_valsize = valsize;
 	if (sopt->sopt_val != NULL) {
 		if (sopt->sopt_td != NULL)
 			error = copyout(buf, sopt->sopt_val, valsize);
 		else
 			bcopy(buf, sopt->sopt_val, valsize);
 	}
 	return (error);
 }
 
 int
 sogetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput) {
 			return ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		} else
 			return (ENOPROTOOPT);
 	} else {
 		switch (sopt->sopt_name) {
 #ifdef INET
 		case SO_ACCEPTFILTER:
 			error = do_getopt_accept_filter(so, sopt);
 			break;
 #endif
 		case SO_LINGER:
 			SOCK_LOCK(so);
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
 			SOCK_UNLOCK(so);
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_ACCEPTCONN:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case SO_TYPE:
 			optval = so->so_type;
 			goto integer;
 
 		case SO_ERROR:
 			SOCK_LOCK(so);
 			optval = so->so_error;
 			so->so_error = 0;
 			SOCK_UNLOCK(so);
 			goto integer;
 
 		case SO_SNDBUF:
 			optval = so->so_snd.sb_hiwat;
 			goto integer;
 
 		case SO_RCVBUF:
 			optval = so->so_rcv.sb_hiwat;
 			goto integer;
 
 		case SO_SNDLOWAT:
 			optval = so->so_snd.sb_lowat;
 			goto integer;
 
 		case SO_RCVLOWAT:
 			optval = so->so_rcv.sb_lowat;
 			goto integer;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			optval = (sopt->sopt_name == SO_SNDTIMEO ?
 				  so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 
 			tv.tv_sec = optval / hz;
 			tv.tv_usec = (optval % hz) * tick;
 #ifdef COMPAT_IA32
 			if (curthread->td_proc->p_sysent == &ia32_freebsd_sysvec) {
 				struct timeval32 tv32;
 
 				CP(tv, tv32, tv_sec);
 				CP(tv, tv32, tv_usec);
 				error = sooptcopyout(sopt, &tv32, sizeof tv32);
 			} else
 #endif
 				error = sooptcopyout(sopt, &tv, sizeof tv);
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				return (error);
 			error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 			if (error)
 				return (error);
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_PEERLABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				return (error);
 			error = mac_getsockopt_peerlabel(
 			    sopt->sopt_td->td_ucred, so, &extmac);
 			if (error)
 				return (error);
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_LISTENQLIMIT:
 			optval = so->so_qlimit;
 			goto integer;
 
 		case SO_LISTENQLEN:
 			optval = so->so_qlen;
 			goto integer;
 
 		case SO_LISTENINCQLEN:
 			optval = so->so_incqlen;
 			goto integer;
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		return (error);
 	}
 }
 
 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
 	struct mbuf *m, *m_prev;
 	int sopt_size = sopt->sopt_valsize;
 
 	MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	if (sopt_size > MLEN) {
 		MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return ENOBUFS;
 		}
 		m->m_len = min(MCLBYTES, sopt_size);
 	} else {
 		m->m_len = min(MLEN, sopt_size);
 	}
 	sopt_size -= m->m_len;
 	*mp = m;
 	m_prev = m;
 
 	while (sopt_size) {
 		MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
 		if (m == NULL) {
 			m_freem(*mp);
 			return ENOBUFS;
 		}
 		if (sopt_size > MLEN) {
 			MCLGET(m, sopt->sopt_td != NULL ? M_WAIT :
 			    M_DONTWAIT);
 			if ((m->m_flags & M_EXT) == 0) {
 				m_freem(m);
 				m_freem(*mp);
 				return ENOBUFS;
 			}
 			m->m_len = min(MCLBYTES, sopt_size);
 		} else {
 			m->m_len = min(MLEN, sopt_size);
 		}
 		sopt_size -= m->m_len;
 		m_prev->m_next = m;
 		m_prev = m;
 	}
 	return (0);
 }
 
 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyin(sopt->sopt_val, mtod(m, char *),
 				       m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 		panic("ip6_sooptmcopyin");
 	return (0);
 }
 
 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 	size_t valsize = 0;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyout(mtod(m, char *), sopt->sopt_val,
 				       m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 	       sopt->sopt_valsize -= m->m_len;
 	       sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 	       valsize += m->m_len;
 	       m = m->m_next;
 	}
 	if (m != NULL) {
 		/* enough soopt buffer should be given from user-land */
 		m_freem(m0);
 		return(EINVAL);
 	}
 	sopt->sopt_valsize = valsize;
 	return (0);
 }
 
 /*
  * sohasoutofband(): protocol notifies socket layer of the arrival of new
  * out-of-band data, which will then notify socket consumers.
  */
 void
 sohasoutofband(struct socket *so)
 {
 
 	if (so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGURG, 0);
 	selwakeuppri(&so->so_rcv.sb_sel, PSOCK);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	/* XXXRW: Temporary debugging. */
 	KASSERT(so->so_proto->pr_usrreqs->pru_sopoll != sopoll,
 	    ("sopoll: protocol calls sopoll"));
 
 	return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
 	    td));
 }
 
 int
 sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	int revents = 0;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (events & (POLLIN | POLLRDNORM))
 		if (soreadable(so))
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (events & POLLINIGNEOF)
 		if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
 		    !TAILQ_EMPTY(&so->so_comp) || so->so_error)
 			revents |= POLLINIGNEOF;
 
 	if (events & (POLLOUT | POLLWRNORM))
 		if (sowriteable(so))
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if (events & (POLLPRI | POLLRDBAND))
 		if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
 			revents |= events & (POLLPRI | POLLRDBAND);
 
 	if (revents == 0) {
 		if (events &
 		    (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM |
 		     POLLRDBAND)) {
 			selrecord(td, &so->so_rcv.sb_sel);
 			so->so_rcv.sb_flags |= SB_SEL;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(td, &so->so_snd.sb_sel);
 			so->so_snd.sb_flags |= SB_SEL;
 		}
 	}
 
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (revents);
 }
 
 int
 soo_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		if (so->so_options & SO_ACCEPTCONN)
 			kn->kn_fop = &solisten_filtops;
 		else
 			kn->kn_fop = &soread_filtops;
 		sb = &so->so_rcv;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &sowrite_filtops;
 		sb = &so->so_snd;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	SOCKBUF_LOCK(sb);
 	knlist_add(&sb->sb_sel.si_note, kn, 1);
 	sb->sb_flags |= SB_KNOTE;
 	SOCKBUF_UNLOCK(sb);
 	return (0);
 }
 
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
  */
 int
 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_disconnect_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_listen_notsupp(struct socket *so, int backlog, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvd_notsupp(struct socket *so, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 /*
  * This isn't really a ``null'' operation, but it's the default one and
  * doesn't do anything destructive.
  */
 int
 pru_sense_null(struct socket *so, struct stat *sb)
 {
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	return 0;
 }
 
 int
 pru_shutdown_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1);
 	if (knlist_empty(&so->so_rcv.sb_sel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
 	SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 /*ARGSUSED*/
 static int
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 	else if (kn->kn_sfflags & NOTE_LOWAT)
 		return (kn->kn_data >= kn->kn_sdata);
 	else
 		return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1);
 	if (knlist_empty(&so->so_snd.sb_sel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
 	SOCKBUF_UNLOCK(&so->so_snd);
 }
 
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbspace(&so->so_snd);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 	else if (((so->so_state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
 		return (0);
 	else if (kn->kn_sfflags & NOTE_LOWAT)
 		return (kn->kn_data >= kn->kn_sdata);
 	else
 		return (kn->kn_data >= so->so_snd.sb_lowat);
 }
 
 /*ARGSUSED*/
 static int
 filt_solisten(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	kn->kn_data = so->so_qlen;
 	return (! TAILQ_EMPTY(&so->so_comp));
 }
 
 int
 socheckuid(struct socket *so, uid_t uid)
 {
 
 	if (so == NULL)
 		return (EPERM);
 	if (so->so_cred->cr_uid != uid)
 		return (EPERM);
 	return (0);
 }
 
 static int
 sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int val;
 
 	val = somaxconn;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 
 	if (val < 1 || val > USHRT_MAX)
 		return (EINVAL);
 
 	somaxconn = val;
 	return (0);
 }
 
 /*
  * These functions are used by protocols to notify the socket layer (and its
  * consumers) of state changes in the sockets driven by protocol-side events.
  */
 
 /*
  * Procedures to manipulate state flags of socket and do appropriate wakeups.
  *
  * Normal sequence from the active (originating) side is that
  * soisconnecting() is called during processing of connect() call, resulting
  * in an eventual call to soisconnected() if/when the connection is
  * established.  When the connection is torn down soisdisconnecting() is
  * called during processing of disconnect() call, and soisdisconnected() is
  * called when the connection to the peer is totally severed.  The semantics
  * of these routines are such that connectionless protocols can call
  * soisconnected() and soisdisconnected() only, bypassing the in-progress
  * calls when setting up a ``connection'' takes no time.
  *
  * From the passive side, a socket is created with two queues of sockets:
  * so_incomp for connections in progress and so_comp for connections already
  * made and awaiting user acceptance.  As a protocol is preparing incoming
  * connections, it creates a socket structure queued on so_incomp by calling
  * sonewconn().  When the connection is established, soisconnected() is
  * called, and transfers the socket structure to so_comp, making it available
  * to accept().
  *
  * If a socket is closed with sockets on either so_incomp or so_comp, these
  * sockets are dropped.
  *
  * If higher-level protocols are implemented in the kernel, the wakeups done
  * here will sometimes cause software-interrupt process scheduling.
  */
 void
 soisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 	SOCK_UNLOCK(so);
 }
 
 void
 soisconnected(struct socket *so)
 {
 	struct socket *head;
 
 	ACCEPT_LOCK();
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 	head = so->so_head;
 	if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
 		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
 			SOCK_UNLOCK(so);
 			TAILQ_REMOVE(&head->so_incomp, so, so_list);
 			head->so_incqlen--;
 			so->so_qstate &= ~SQ_INCOMP;
 			TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 			head->so_qlen++;
 			so->so_qstate |= SQ_COMP;
 			ACCEPT_UNLOCK();
 			sorwakeup(head);
 			wakeup_one(&head->so_timeo);
 		} else {
 			ACCEPT_UNLOCK();
 			so->so_upcall =
 			    head->so_accf->so_accept_filter->accf_callback;
 			so->so_upcallarg = head->so_accf->so_accept_filter_arg;
 			so->so_rcv.sb_flags |= SB_UPCALL;
 			so->so_options &= ~SO_ACCEPTFILTER;
 			SOCK_UNLOCK(so);
 			so->so_upcall(so, so->so_upcallarg, M_DONTWAIT);
 		}
 		return;
 	}
 	SOCK_UNLOCK(so);
 	ACCEPT_UNLOCK();
 	wakeup(&so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 }
 
 void
 soisdisconnecting(struct socket *so)
 {
 
 	/*
 	 * Note: This code assumes that SOCK_LOCK(so) and
 	 * SOCKBUF_LOCK(&so->so_rcv) are the same.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= SS_ISDISCONNECTING;
 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
 	sorwakeup_locked(so);
 	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
 	sowwakeup_locked(so);
 	wakeup(&so->so_timeo);
 }
 
 void
 soisdisconnected(struct socket *so)
 {
 
 	/*
 	 * Note: This code assumes that SOCK_LOCK(so) and
 	 * SOCKBUF_LOCK(&so->so_rcv) are the same.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISDISCONNECTED;
 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
 	sorwakeup_locked(so);
 	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
 	sbdrop_locked(&so->so_snd, so->so_snd.sb_cc);
 	sowwakeup_locked(so);
 	wakeup(&so->so_timeo);
 }
 
 /*
  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  */
 struct sockaddr *
 sodupsockaddr(const struct sockaddr *sa, int mflags)
 {
 	struct sockaddr *sa2;
 
 	sa2 = malloc(sa->sa_len, M_SONAME, mflags);
 	if (sa2)
 		bcopy(sa, sa2, sa->sa_len);
 	return sa2;
 }
 
 /*
  * Create an external-format (``xsocket'') structure using the information in
  * the kernel-format socket structure pointed to by so.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
 	xso->xso_family = so->so_proto->pr_domain->dom_family;
 	xso->so_qlen = so->so_qlen;
 	xso->so_incqlen = so->so_incqlen;
 	xso->so_qlimit = so->so_qlimit;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
 	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 	xso->so_oobmark = so->so_oobmark;
 	sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 	sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	xso->so_uid = so->so_cred->cr_uid;
 }
Index: head/sys/kern/vfs_export.c
===================================================================
--- head/sys/kern/vfs_export.c	(revision 178887)
+++ head/sys/kern/vfs_export.c	(revision 178888)
@@ -1,440 +1,453 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/dirent.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 
 #include <net/radix.h>
 
 static MALLOC_DEFINE(M_NETADDR, "export_host", "Export host address structure");
 
 static void	vfs_free_addrlist(struct netexport *nep);
 static int	vfs_free_netcred(struct radix_node *rn, void *w);
 static int	vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
 		    struct export_args *argp);
 static struct netcred *vfs_export_lookup(struct mount *, struct sockaddr *);
 
 /*
  * Network address lookup element
  */
 struct netcred {
 	struct	radix_node netc_rnodes[2];
 	int	netc_exflags;
 	struct	ucred netc_anon;
 };
 
 /*
  * Network export information
  */
 struct netexport {
 	struct	netcred ne_defexported;		      /* Default export */
 	struct	radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */
 };
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
  * Called by ufs_mount() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
     struct export_args *argp)
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	register int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = 0;
 	struct domain *dom;
 	int error;
 
 	/*
 	 * XXX: This routine converts from a `struct xucred'
 	 * (argp->ex_anon) to a `struct ucred' (np->netc_anon).  This
 	 * operation is questionable; for example, what should be done
 	 * with fields like cr_uidinfo and cr_prison?  Currently, this
 	 * routine does not touch them (leaves them as NULL).
 	 */
 	if (argp->ex_anon.cr_version != XUCRED_VERSION) {
 		vfs_mount_error(mp, "ex_anon.cr_version: %d != %d",
 		    argp->ex_anon.cr_version, XUCRED_VERSION);
 		return (EINVAL);
 	}
 
 	if (argp->ex_addrlen == 0) {
 		if (mp->mnt_flag & MNT_DEFEXPORTED) {
 			vfs_mount_error(mp,
 			    "MNT_DEFEXPORTED already set for mount %p", mp);
 			return (EPERM);
 		}
 		np = &nep->ne_defexported;
 		np->netc_exflags = argp->ex_flags;
 		bzero(&np->netc_anon, sizeof(np->netc_anon));
 		np->netc_anon.cr_uid = argp->ex_anon.cr_uid;
 		np->netc_anon.cr_ngroups = argp->ex_anon.cr_ngroups;
 		bcopy(argp->ex_anon.cr_groups, np->netc_anon.cr_groups,
 		    sizeof(np->netc_anon.cr_groups));
 		refcount_init(&np->netc_anon.cr_ref, 1);
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_DEFEXPORTED;
 		MNT_IUNLOCK(mp);
 		return (0);
 	}
 
 #if MSIZE <= 256
 	if (argp->ex_addrlen > MLEN) {
 		vfs_mount_error(mp, "ex_addrlen %d is greater than %d",
 		    argp->ex_addrlen, MLEN);
 		return (EINVAL);
 	}
 #endif
 
 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK | M_ZERO);
 	saddr = (struct sockaddr *) (np + 1);
 	if ((error = copyin(argp->ex_addr, saddr, argp->ex_addrlen)))
 		goto out;
 	if (saddr->sa_family == AF_UNSPEC || saddr->sa_family > AF_MAX) {
 		error = EINVAL;
 		vfs_mount_error(mp, "Invalid saddr->sa_family: %d");
 		goto out;
 	}
 	if (saddr->sa_len > argp->ex_addrlen)
 		saddr->sa_len = argp->ex_addrlen;
 	if (argp->ex_masklen) {
 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
 		error = copyin(argp->ex_mask, smask, argp->ex_masklen);
 		if (error)
 			goto out;
 		if (smask->sa_len > argp->ex_masklen)
 			smask->sa_len = argp->ex_masklen;
 	}
 	i = saddr->sa_family;
 	if ((rnh = nep->ne_rtable[i]) == NULL) {
 		/*
 		 * Seems silly to initialize every AF when most are not used,
 		 * do so on demand here
 		 */
-		for (dom = domains; dom; dom = dom->dom_next)
+		for (dom = domains; dom; dom = dom->dom_next) {
+			KASSERT(((i == AF_INET) || (i == AF_INET6)), 
+			    ("unexpected protocol in vfs_hang_addrlist"));
 			if (dom->dom_family == i && dom->dom_rtattach) {
-				dom->dom_rtattach((void **) &nep->ne_rtable[i],
-				    dom->dom_rtoffset);
+				/*
+				 * XXX MRT 
+				 * The INET and INET6 domains know the
+				 * offset already. We don't need to send it
+				 * So we just use it as a flag to say that
+				 * we are or are not setting up a real routing
+				 * table. Only IP and IPV6 need have this
+				 * be 0 so all other protocols can stay the 
+				 * same (ABI compatible).
+				 */ 
+				dom->dom_rtattach(
+				    (void **) &nep->ne_rtable[i], 0);
 				break;
 			}
+		}
 		if ((rnh = nep->ne_rtable[i]) == NULL) {
 			error = ENOBUFS;
 			vfs_mount_error(mp, "%s %s %d",
 			    "Unable to initialize radix node head ",
 			    "for address family", i);
 			goto out;
 		}
 	}
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rn = (*rnh->rnh_addaddr)(saddr, smask, rnh, np->netc_rnodes);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 	if (rn == NULL || np != (struct netcred *)rn) {	/* already exists */
 		error = EPERM;
 		vfs_mount_error(mp, "Invalid radix node head, rn: %p %p",
 		    rn, np);
 		goto out;
 	}
 	np->netc_exflags = argp->ex_flags;
 	bzero(&np->netc_anon, sizeof(np->netc_anon));
 	np->netc_anon.cr_uid = argp->ex_anon.cr_uid;
 	np->netc_anon.cr_ngroups = argp->ex_anon.cr_ngroups;
 	bcopy(argp->ex_anon.cr_groups, np->netc_anon.cr_groups,
 	    sizeof(np->netc_anon.cr_groups));
 	refcount_init(&np->netc_anon.cr_ref, 1);
 	return (0);
 out:
 	free(np, M_NETADDR);
 	return (error);
 }
 
 /* Helper for vfs_free_addrlist. */
 /* ARGSUSED */
 static int
 vfs_free_netcred(struct radix_node *rn, void *w)
 {
 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
 
 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 	free(rn, M_NETADDR);
 	return (0);
 }
 
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
 static void
 vfs_free_addrlist(struct netexport *nep)
 {
 	register int i;
 	register struct radix_node_head *rnh;
 
 	for (i = 0; i <= AF_MAX; i++)
 		if ((rnh = nep->ne_rtable[i])) {
 			RADIX_NODE_HEAD_LOCK(rnh);
 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred, rnh);
 			RADIX_NODE_HEAD_DESTROY(rnh);
 			free(rnh, M_RTABLE);
 			nep->ne_rtable[i] = NULL;	/* not SMP safe XXX */
 		}
 }
 
 /*
  * High level function to manipulate export options on a mount point
  * and the passed in netexport.
  * Struct export_args *argp is the variable used to twiddle options,
  * the structure is described in sys/mount.h
  */
 int
 vfs_export(struct mount *mp, struct export_args *argp)
 {
 	struct netexport *nep;
 	int error;
 
 	nep = mp->mnt_export;
 	error = 0;
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (nep == NULL) {
 			error = ENOENT;
 			goto out;
 		}
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_EXPUBLIC;
 			MNT_IUNLOCK(mp);
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_export = NULL;
 		free(nep, M_MOUNT);
 		nep = NULL;
 		MNT_ILOCK(mp);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 		MNT_IUNLOCK(mp);
 	}
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (nep == NULL) {
 			nep = malloc(sizeof(struct netexport), M_MOUNT, M_WAITOK | M_ZERO);
 			mp->mnt_export = nep;
 		}
 		if (argp->ex_flags & MNT_EXPUBLIC) {
 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 				goto out;
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 			MNT_IUNLOCK(mp);
 		}
 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
 			goto out;
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_EXPORTED;
 		MNT_IUNLOCK(mp);
 	}
 
 out:
 	/*
 	 * Once we have executed the vfs_export() command, we do
 	 * not want to keep the "export" option around in the
 	 * options list, since that will cause subsequent MNT_UPDATE
 	 * calls to fail.  The export information is saved in
 	 * mp->mnt_export, so we can safely delete the "export" mount option
 	 * here.
 	 */
 	vfs_deleteopt(mp->mnt_optnew, "export");
 	vfs_deleteopt(mp->mnt_opt, "export");
 	return (error);
 }
 
 /*
  * Set the publicly exported filesystem (WebNFS). Currently, only
  * one public filesystem is possible in the spec (RFC 2054 and 2055)
  */
 int
 vfs_setpublicfs(struct mount *mp, struct netexport *nep,
     struct export_args *argp)
 {
 	int error;
 	struct vnode *rvp;
 	char *cp;
 
 	/*
 	 * mp == NULL -> invalidate the current info, the FS is
 	 * no longer exported. May be called from either vfs_export
 	 * or unmount, so check if it hasn't already been done.
 	 */
 	if (mp == NULL) {
 		if (nfs_pub.np_valid) {
 			nfs_pub.np_valid = 0;
 			if (nfs_pub.np_index != NULL) {
 				FREE(nfs_pub.np_index, M_TEMP);
 				nfs_pub.np_index = NULL;
 			}
 		}
 		return (0);
 	}
 
 	/*
 	 * Only one allowed at a time.
 	 */
 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 		return (EBUSY);
 
 	/*
 	 * Get real filehandle for root of exported FS.
 	 */
 	bzero(&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 
 	if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp, curthread /* XXX */)))
 		return (error);
 
 	if ((error = VOP_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 		return (error);
 
 	vput(rvp);
 
 	/*
 	 * If an indexfile was specified, pull it in.
 	 */
 	if (argp->ex_indexfile != NULL) {
 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 		    M_WAITOK);
 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 		    MAXNAMLEN, (size_t *)0);
 		if (!error) {
 			/*
 			 * Check for illegal filenames.
 			 */
 			for (cp = nfs_pub.np_index; *cp; cp++) {
 				if (*cp == '/') {
 					error = EINVAL;
 					break;
 				}
 			}
 		}
 		if (error) {
 			FREE(nfs_pub.np_index, M_TEMP);
 			return (error);
 		}
 	}
 
 	nfs_pub.np_mount = mp;
 	nfs_pub.np_valid = 1;
 	return (0);
 }
 
 /*
  * Used by the filesystems to determine if a given network address
  * (passed in 'nam') is present in thier exports list, returns a pointer
  * to struct netcred so that the filesystem can examine it for
  * access rights (read/write/etc).
  */
 static struct netcred *
 vfs_export_lookup(struct mount *mp, struct sockaddr *nam)
 {
 	struct netexport *nep;
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	nep = mp->mnt_export;
 	if (nep == NULL)
 		return (NULL);
 	np = NULL;
 	if (mp->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * Lookup in the export list first.
 		 */
 		if (nam != NULL) {
 			saddr = nam;
 			rnh = nep->ne_rtable[saddr->sa_family];
 			if (rnh != NULL) {
 				RADIX_NODE_HEAD_LOCK(rnh);
 				np = (struct netcred *)
 				    (*rnh->rnh_matchaddr)(saddr, rnh);
 				RADIX_NODE_HEAD_UNLOCK(rnh);
 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 					np = NULL;
 			}
 		}
 		/*
 		 * If no address match, use the default if it exists.
 		 */
 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 			np = &nep->ne_defexported;
 	}
 	return (np);
 }
 
 /*
  * XXX: This comment comes from the deprecated ufs_check_export()
  * XXX: and may not entirely apply, but lacking something better:
  * This is the generic part of fhtovp called after the underlying
  * filesystem has validated the file handle.
  *
  * Verify that a host should have access to a filesystem.
  */
 
 int 
 vfs_stdcheckexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
     struct ucred **credanonp)
 {
 	struct netcred *np;
 
 	np = vfs_export_lookup(mp, nam);
 	if (np == NULL)
 		return (EACCES);
 	*extflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
Index: head/sys/net/if.c
===================================================================
--- head/sys/net/if.c	(revision 178887)
+++ head/sys/net/if.c	(revision 178888)
@@ -1,2770 +1,2773 @@
 /*-
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if.c	8.5 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #include "opt_compat.h"
 #include "opt_inet6.h"
 #include "opt_inet.h"
 #include "opt_mac.h"
 #include "opt_carp.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/conf.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 #include <sys/bus.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <machine/stdarg.h>
 
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 
 #if defined(INET) || defined(INET6)
 /*XXX*/
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #ifdef INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #endif
 #endif
 #ifdef INET
 #include <netinet/if_ether.h>
 #endif
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
 
 /* Log link state change events */
 static int log_link_state_change = 1;
 
 SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
 	&log_link_state_change, 0,
 	"log interface link state change events");
 
 void	(*bstp_linkstate_p)(struct ifnet *ifp, int state);
 void	(*ng_ether_link_state_p)(struct ifnet *ifp, int state);
 void	(*lagg_linkstate_p)(struct ifnet *ifp, int state);
 
 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
 
 /*
  * XXX: Style; these should be sorted alphabetically, and unprototyped
  * static functions should be prototyped. Currently they are sorted by
  * declaration order.
  */
 static void	if_attachdomain(void *);
 static void	if_attachdomain1(struct ifnet *);
 static int	ifconf(u_long, caddr_t);
 static void	if_freemulti(struct ifmultiaddr *);
 static void	if_grow(void);
 static void	if_init(void *);
 static void	if_check(void *);
 static void	if_qflush(struct ifaltq *);
 static void	if_route(struct ifnet *, int flag, int fam);
 static int	if_setflag(struct ifnet *, int, int, int *, int);
 static void	if_slowtimo(void *);
 static void	if_unroute(struct ifnet *, int flag, int fam);
 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int	if_rtdel(struct radix_node *, void *);
 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
 static int	if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
 static void	if_start_deferred(void *context, int pending);
 static void	do_link_state_change(void *, int);
 static int	if_getgroup(struct ifgroupreq *, struct ifnet *);
 static int	if_getgroupmembers(struct ifgroupreq *);
 #ifdef INET6
 /*
  * XXX: declare here to avoid to include many inet6 related files..
  * should be more generalized?
  */
 extern void	nd6_setmtu(struct ifnet *);
 #endif
 
 int	if_index = 0;
 struct	ifindex_entry *ifindex_table = NULL;
 int	ifqmaxlen = IFQ_MAXLEN;
 struct	ifnethead ifnet;	/* depend on static init XXX */
 struct	ifgrouphead ifg_head;
 struct	mtx ifnet_lock;
 static	if_com_alloc_t *if_com_alloc[256];
 static	if_com_free_t *if_com_free[256];
 
 static int	if_indexlim = 8;
 static struct	knlist ifklist;
 
 static void	filt_netdetach(struct knote *kn);
 static int	filt_netdev(struct knote *kn, long hint);
 
 static struct filterops netdev_filtops =
     { 1, NULL, filt_netdetach, filt_netdev };
 
 /*
  * System initialization
  */
 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL);
 
 MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
 
 static d_open_t		netopen;
 static d_close_t	netclose;
 static d_ioctl_t	netioctl;
 static d_kqfilter_t	netkqfilter;
 
 static struct cdevsw net_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	D_NEEDGIANT,
 	.d_open =	netopen,
 	.d_close =	netclose,
 	.d_ioctl =	netioctl,
 	.d_name =	"net",
 	.d_kqfilter =	netkqfilter,
 };
 
 static int
 netopen(struct cdev *dev, int flag, int mode, struct thread *td)
 {
 	return (0);
 }
 
 static int
 netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	return (0);
 }
 
 static int
 netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
 {
 	struct ifnet *ifp;
 	int error, idx;
 
 	/* only support interface specific ioctls */
 	if (IOCGROUP(cmd) != 'i')
 		return (EOPNOTSUPP);
 	idx = minor(dev);
 	if (idx == 0) {
 		/*
 		 * special network device, not interface.
 		 */
 		if (cmd == SIOCGIFCONF)
 			return (ifconf(cmd, data));	/* XXX remove cmd */
 #ifdef __amd64__
 		if (cmd == SIOCGIFCONF32)
 			return (ifconf(cmd, data));	/* XXX remove cmd */
 #endif
 		return (EOPNOTSUPP);
 	}
 
 	ifp = ifnet_byindex(idx);
 	if (ifp == NULL)
 		return (ENXIO);
 
 	error = ifhwioctl(cmd, ifp, data, td);
 	if (error == ENOIOCTL)
 		error = EOPNOTSUPP;
 	return (error);
 }
 
 static int
 netkqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct knlist *klist;
 	struct ifnet *ifp;
 	int idx;
 
 	switch (kn->kn_filter) {
 	case EVFILT_NETDEV:
 		kn->kn_fop = &netdev_filtops;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	idx = minor(dev);
 	if (idx == 0) {
 		klist = &ifklist;
 	} else {
 		ifp = ifnet_byindex(idx);
 		if (ifp == NULL)
 			return (1);
 		klist = &ifp->if_klist;
 	}
 
 	kn->kn_hook = (caddr_t)klist;
 
 	knlist_add(klist, kn, 0);
 
 	return (0);
 }
 
 static void
 filt_netdetach(struct knote *kn)
 {
 	struct knlist *klist = (struct knlist *)kn->kn_hook;
 
 	knlist_remove(klist, kn, 0);
 }
 
 static int
 filt_netdev(struct knote *kn, long hint)
 {
 	struct knlist *klist = (struct knlist *)kn->kn_hook;
 
 	/*
 	 * Currently NOTE_EXIT is abused to indicate device detach.
 	 */
 	if (hint == NOTE_EXIT) {
 		kn->kn_data = NOTE_LINKINV;
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		knlist_remove_inevent(klist, kn);
 		return (1);
 	}
 	if (hint != 0)
 		kn->kn_data = hint;			/* current status */
 	if (kn->kn_sfflags & hint)
 		kn->kn_fflags |= hint;
 	return (kn->kn_fflags != 0);
 }
 
 /*
  * Network interface utility routines.
  *
  * Routines with ifa_ifwith* names take sockaddr *'s as
  * parameters.
  */
 
 /* ARGSUSED*/
 static void
 if_init(void *dummy __unused)
 {
 
 	IFNET_LOCK_INIT();
 	TAILQ_INIT(&ifnet);
 	TAILQ_INIT(&ifg_head);
 	knlist_init(&ifklist, NULL, NULL, NULL, NULL);
 	if_grow();				/* create initial table */
 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
 	    UID_ROOT, GID_WHEEL, 0600, "network");
 	if_clone_init();
 }
 
 static void
 if_grow(void)
 {
 	u_int n;
 	struct ifindex_entry *e;
 
 	if_indexlim <<= 1;
 	n = if_indexlim * sizeof(*e);
 	e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
 	if (ifindex_table != NULL) {
 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
 		free((caddr_t)ifindex_table, M_IFNET);
 	}
 	ifindex_table = e;
 }
 
 /* ARGSUSED*/
 static void
 if_check(void *dummy __unused)
 {
 	struct ifnet *ifp;
 	int s;
 
 	s = splimp();
 	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (ifp->if_snd.ifq_maxlen == 0) {
 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
 		}
 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
 			if_printf(ifp,
 			    "XXX: driver didn't initialize queue mtx\n");
 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
 			    MTX_NETWORK_LOCK, MTX_DEF);
 		}
 	}
 	IFNET_RUNLOCK();
 	splx(s);
 	if_slowtimo(0);
 }
 
 /*
  * Allocate a struct ifnet and an index for an interface.  A layer 2
  * common structure will also be allocated if an allocation routine is
  * registered for the passed type.
  */
 struct ifnet*
 if_alloc(u_char type)
 {
 	struct ifnet *ifp;
 
 	ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
 
 	/*
 	 * Try to find an empty slot below if_index.  If we fail, take
 	 * the next slot.
 	 *
 	 * XXX: should be locked!
 	 */
 	for (ifp->if_index = 1; ifp->if_index <= if_index; ifp->if_index++) {
 		if (ifnet_byindex(ifp->if_index) == NULL)
 			break;
 	}
 	/* Catch if_index overflow. */
 	if (ifp->if_index < 1) {
 		free(ifp, M_IFNET);
 		return (NULL);
 	}
 	if (ifp->if_index > if_index)
 		if_index = ifp->if_index;
 	if (if_index >= if_indexlim)
 		if_grow();
 
 	ifp->if_type = type;
 
 	if (if_com_alloc[type] != NULL) {
 		ifp->if_l2com = if_com_alloc[type](type, ifp);
 		if (ifp->if_l2com == NULL) {
 			free(ifp, M_IFNET);
 			return (NULL);
 		}
 	}
 	ifnet_byindex(ifp->if_index) = ifp;
 	IF_ADDR_LOCK_INIT(ifp);
 
 	return (ifp);
 }
 
 /*
  * Free the struct ifnet, the associated index, and the layer 2 common
  * structure if needed.  All the work is done in if_free_type().
  *
  * Do not add code to this function!  Add it to if_free_type().
  */
 void
 if_free(struct ifnet *ifp)
 {
 
 	if_free_type(ifp, ifp->if_type);
 }
 
 /*
  * Do the actual work of freeing a struct ifnet, associated index, and
  * layer 2 common structure.  This version should only be called by
  * intefaces that switch their type after calling if_alloc().
  */
 void
 if_free_type(struct ifnet *ifp, u_char type)
 {
 
 	if (ifp != ifnet_byindex(ifp->if_index)) {
 		if_printf(ifp, "%s: value was not if_alloced, skipping\n",
 		    __func__);
 		return;
 	}
 
 	IF_ADDR_LOCK_DESTROY(ifp);
 
 	ifnet_byindex(ifp->if_index) = NULL;
 
 	/* XXX: should be locked with if_findindex() */
 	while (if_index > 0 && ifnet_byindex(if_index) == NULL)
 		if_index--;
 
 	if (if_com_free[type] != NULL)
 		if_com_free[type](ifp->if_l2com, type);
 
 	free(ifp, M_IFNET);
 };
 
 /*
  * Perform generic interface initalization tasks and attach the interface
  * to the list of "active" interfaces.
  *
  * XXX:
  *  - The decision to return void and thus require this function to
  *    succeed is questionable.
  *  - We do more initialization here then is probably a good idea.
  *    Some of this should probably move to if_alloc().
  *  - We should probably do more sanity checking.  For instance we don't
  *    do anything to insure if_xname is unique or non-empty.
  */
 void
 if_attach(struct ifnet *ifp)
 {
 	unsigned socksize, ifasize;
 	int namelen, masklen;
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 
 	if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
 		panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
 		    ifp->if_xname);
 
 	TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
 	TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
 	IF_AFDATA_LOCK_INIT(ifp);
 	ifp->if_afdata_initialized = 0;
 
 	TAILQ_INIT(&ifp->if_addrhead);
 	TAILQ_INIT(&ifp->if_prefixhead);
 	TAILQ_INIT(&ifp->if_multiaddrs);
 	TAILQ_INIT(&ifp->if_groups);
 
 	if_addgroup(ifp, IFG_ALL);
 
 	knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
 	getmicrotime(&ifp->if_lastchange);
 	ifp->if_data.ifi_epoch = time_uptime;
 	ifp->if_data.ifi_datalen = sizeof(struct if_data);
 
 #ifdef MAC
 	mac_ifnet_init(ifp);
 	mac_ifnet_create(ifp);
 #endif
 
 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
 	    unit2minor(ifp->if_index),
 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
 	    net_cdevsw.d_name, ifp->if_xname);
 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
 	    net_cdevsw.d_name, ifp->if_index);
 
 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
 
 	/*
 	 * create a Link Level name for this device
 	 */
 	namelen = strlen(ifp->if_xname);
 	/*
 	 * Always save enough space for any possiable name so we can do
 	 * a rename in place later.
 	 */
 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
 	socksize = masklen + ifp->if_addrlen;
 	if (socksize < sizeof(*sdl))
 		socksize = sizeof(*sdl);
 	socksize = roundup2(socksize, sizeof(long));
 	ifasize = sizeof(*ifa) + 2 * socksize;
 	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
 	IFA_LOCK_INIT(ifa);
 	sdl = (struct sockaddr_dl *)(ifa + 1);
 	sdl->sdl_len = socksize;
 	sdl->sdl_family = AF_LINK;
 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
 	sdl->sdl_nlen = namelen;
 	sdl->sdl_index = ifp->if_index;
 	sdl->sdl_type = ifp->if_type;
 	ifp->if_addr = ifa;
 	ifa->ifa_ifp = ifp;
 	ifa->ifa_rtrequest = link_rtrequest;
 	ifa->ifa_addr = (struct sockaddr *)sdl;
 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
 	ifa->ifa_netmask = (struct sockaddr *)sdl;
 	sdl->sdl_len = masklen;
 	while (namelen != 0)
 		sdl->sdl_data[--namelen] = 0xff;
 	ifa->ifa_refcnt = 1;
 	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
 	ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */
 	ifp->if_snd.altq_type = 0;
 	ifp->if_snd.altq_disc = NULL;
 	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
 	ifp->if_snd.altq_tbr  = NULL;
 	ifp->if_snd.altq_ifp  = ifp;
 
 	IFNET_WLOCK();
 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
 	IFNET_WUNLOCK();
 
 	if (domain_init_status >= 2)
 		if_attachdomain1(ifp);
 
 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
 
 	/* Announce the interface. */
 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 
 	if (ifp->if_watchdog != NULL)
 		if_printf(ifp,
 		    "WARNING: using obsoleted if_watchdog interface\n");
 	if (ifp->if_flags & IFF_NEEDSGIANT)
 		if_printf(ifp,
 		    "WARNING: using obsoleted IFF_NEEDSGIANT flag\n");
 }
 
 static void
 if_attachdomain(void *dummy)
 {
 	struct ifnet *ifp;
 	int s;
 
 	s = splnet();
 	TAILQ_FOREACH(ifp, &ifnet, if_link)
 		if_attachdomain1(ifp);
 	splx(s);
 }
 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
     if_attachdomain, NULL);
 
 static void
 if_attachdomain1(struct ifnet *ifp)
 {
 	struct domain *dp;
 	int s;
 
 	s = splnet();
 
 	/*
 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
 	 * cannot lock ifp->if_afdata initialization, entirely.
 	 */
 	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
 		splx(s);
 		return;
 	}
 	if (ifp->if_afdata_initialized >= domain_init_status) {
 		IF_AFDATA_UNLOCK(ifp);
 		splx(s);
 		printf("if_attachdomain called more than once on %s\n",
 		    ifp->if_xname);
 		return;
 	}
 	ifp->if_afdata_initialized = domain_init_status;
 	IF_AFDATA_UNLOCK(ifp);
 
 	/* address family dependent data region */
 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
 	for (dp = domains; dp; dp = dp->dom_next) {
 		if (dp->dom_ifattach)
 			ifp->if_afdata[dp->dom_family] =
 			    (*dp->dom_ifattach)(ifp);
 	}
 
 	splx(s);
 }
 
 /*
  * Remove any unicast or broadcast network addresses from an interface.
  */
 void
 if_purgeaddrs(struct ifnet *ifp)
 {
 	struct ifaddr *ifa, *next;
 
 	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
 		if (ifa->ifa_addr->sa_family == AF_LINK)
 			continue;
 #ifdef INET
 		/* XXX: Ugly!! ad hoc just for INET */
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			struct ifaliasreq ifr;
 
 			bzero(&ifr, sizeof(ifr));
 			ifr.ifra_addr = *ifa->ifa_addr;
 			if (ifa->ifa_dstaddr)
 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
 			    NULL) == 0)
 				continue;
 		}
 #endif /* INET */
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6) {
 			in6_purgeaddr(ifa);
 			/* ifp_addrhead is already updated */
 			continue;
 		}
 #endif /* INET6 */
 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
 		IFAFREE(ifa);
 	}
 }
 
 /*
  * Remove any multicast network addresses from an interface.
  */
 void
 if_purgemaddrs(struct ifnet *ifp)
 {
 	struct ifmultiaddr *ifma;
 	struct ifmultiaddr *next;
 
 	IF_ADDR_LOCK(ifp);
 	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
 		if_delmulti_locked(ifp, ifma, 1);
 	IF_ADDR_UNLOCK(ifp);
 }
 
 /*
  * Detach an interface, removing it from the
  * list of "active" interfaces.
  *
  * XXXRW: There are some significant questions about event ordering, and
  * how to prevent things from starting to use the interface during detach.
  */
 void
 if_detach(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	struct radix_node_head	*rnh;
 	int s;
 	int i;
 	struct domain *dp;
  	struct ifnet *iter;
  	int found = 0;
 
 	IFNET_WLOCK();
 	TAILQ_FOREACH(iter, &ifnet, if_link)
 		if (iter == ifp) {
 			TAILQ_REMOVE(&ifnet, ifp, if_link);
 			found = 1;
 			break;
 		}
 	IFNET_WUNLOCK();
 	if (!found)
 		return;
 
 	/*
 	 * Remove/wait for pending events.
 	 */
 	taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
 
 	/*
 	 * Remove routes and flush queues.
 	 */
 	s = splnet();
 	if_down(ifp);
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		altq_disable(&ifp->if_snd);
 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
 		altq_detach(&ifp->if_snd);
 #endif
 
 	if_purgeaddrs(ifp);
 
 #ifdef INET
 	in_ifdetach(ifp);
 #endif
 
 #ifdef INET6
 	/*
 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
 	 * before removing routing entries below, since IPv6 interface direct
 	 * routes are expected to be removed by the IPv6-specific kernel API.
 	 * Otherwise, the kernel will detect some inconsistency and bark it.
 	 */
 	in6_ifdetach(ifp);
 #endif
 	if_purgemaddrs(ifp);
 
 	/*
 	 * Remove link ifaddr pointer and maybe decrement if_index.
 	 * Clean up all addresses.
 	 */
 	ifp->if_addr = NULL;
 	destroy_dev(ifdev_byindex(ifp->if_index));
 	ifdev_byindex(ifp->if_index) = NULL;
 
 	/* We can now free link ifaddr. */
 	if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
 		IFAFREE(ifa);
 	}
 
 	/*
 	 * Delete all remaining routes using this interface
 	 * Unfortuneatly the only way to do this is to slog through
 	 * the entire routing table looking for routes which point
 	 * to this interface...oh well...
 	 */
 	for (i = 1; i <= AF_MAX; i++) {
-		if ((rnh = rt_tables[i]) == NULL)
+	    int j;
+	    for (j = 0; j < rt_numfibs; j++) {
+		if ((rnh = rt_tables[j][i]) == NULL)
 			continue;
 		RADIX_NODE_HEAD_LOCK(rnh);
 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
+	    }
 	}
 
 	/* Announce that the interface is gone. */
 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
 
 	IF_AFDATA_LOCK(ifp);
 	for (dp = domains; dp; dp = dp->dom_next) {
 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
 			(*dp->dom_ifdetach)(ifp,
 			    ifp->if_afdata[dp->dom_family]);
 	}
 	IF_AFDATA_UNLOCK(ifp);
 
 #ifdef MAC
 	mac_ifnet_destroy(ifp);
 #endif /* MAC */
 	KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
 	knlist_clear(&ifp->if_klist, 0);
 	knlist_destroy(&ifp->if_klist);
 	mtx_destroy(&ifp->if_snd.ifq_mtx);
 	IF_AFDATA_DESTROY(ifp);
 	splx(s);
 }
 
 /*
  * Add a group to an interface
  */
 int
 if_addgroup(struct ifnet *ifp, const char *groupname)
 {
 	struct ifg_list		*ifgl;
 	struct ifg_group	*ifg = NULL;
 	struct ifg_member	*ifgm;
 
 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
 	    groupname[strlen(groupname) - 1] <= '9')
 		return (EINVAL);
 
 	IFNET_WLOCK();
 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
 			IFNET_WUNLOCK();
 			return (EEXIST);
 		}
 
 	if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
 	    M_NOWAIT)) == NULL) {
 	    	IFNET_WUNLOCK();
 		return (ENOMEM);
 	}
 
 	if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
 	    M_TEMP, M_NOWAIT)) == NULL) {
 		free(ifgl, M_TEMP);
 		IFNET_WUNLOCK();
 		return (ENOMEM);
 	}
 
 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
 		if (!strcmp(ifg->ifg_group, groupname))
 			break;
 
 	if (ifg == NULL) {
 		if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
 		    M_TEMP, M_NOWAIT)) == NULL) {
 			free(ifgl, M_TEMP);
 			free(ifgm, M_TEMP);
 			IFNET_WUNLOCK();
 			return (ENOMEM);
 		}
 		strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
 		ifg->ifg_refcnt = 0;
 		TAILQ_INIT(&ifg->ifg_members);
 		EVENTHANDLER_INVOKE(group_attach_event, ifg);
 		TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
 	}
 
 	ifg->ifg_refcnt++;
 	ifgl->ifgl_group = ifg;
 	ifgm->ifgm_ifp = ifp;
 
 	IF_ADDR_LOCK(ifp);
 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
 	IF_ADDR_UNLOCK(ifp);
 
 	IFNET_WUNLOCK();
 
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 	return (0);
 }
 
 /*
  * Remove a group from an interface
  */
 int
 if_delgroup(struct ifnet *ifp, const char *groupname)
 {
 	struct ifg_list		*ifgl;
 	struct ifg_member	*ifgm;
 
 	IFNET_WLOCK();
 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
 			break;
 	if (ifgl == NULL) {
 		IFNET_WUNLOCK();
 		return (ENOENT);
 	}
 
 	IF_ADDR_LOCK(ifp);
 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
 	IF_ADDR_UNLOCK(ifp);
 
 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
 		if (ifgm->ifgm_ifp == ifp)
 			break;
 
 	if (ifgm != NULL) {
 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
 		free(ifgm, M_TEMP);
 	}
 
 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
 		EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
 		free(ifgl->ifgl_group, M_TEMP);
 	}
 	IFNET_WUNLOCK();
 
 	free(ifgl, M_TEMP);
 
 	EVENTHANDLER_INVOKE(group_change_event, groupname);
 
 	return (0);
 }
 
 /*
  * Stores all groups from an interface in memory pointed
  * to by data
  */
 static int
 if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
 {
 	int			 len, error;
 	struct ifg_list		*ifgl;
 	struct ifg_req		 ifgrq, *ifgp;
 	struct ifgroupreq	*ifgr = data;
 
 	if (ifgr->ifgr_len == 0) {
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
 			ifgr->ifgr_len += sizeof(struct ifg_req);
 		IF_ADDR_UNLOCK(ifp);
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr->ifgr_groups;
 	/* XXX: wire */
 	IF_ADDR_LOCK(ifp);
 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
 		if (len < sizeof(ifgrq)) {
 			IF_ADDR_UNLOCK(ifp);
 			return (EINVAL);
 		}
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
 		    sizeof(ifgrq.ifgrq_group));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
 		    	IF_ADDR_UNLOCK(ifp);
 			return (error);
 		}
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
 	IF_ADDR_UNLOCK(ifp);
 
 	return (0);
 }
 
 /*
  * Stores all members of a group in memory pointed to by data
  */
 static int
 if_getgroupmembers(struct ifgroupreq *data)
 {
 	struct ifgroupreq	*ifgr = data;
 	struct ifg_group	*ifg;
 	struct ifg_member	*ifgm;
 	struct ifg_req		 ifgrq, *ifgp;
 	int			 len, error;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
 			break;
 	if (ifg == NULL) {
 		IFNET_RUNLOCK();
 		return (ENOENT);
 	}
 
 	if (ifgr->ifgr_len == 0) {
 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
 			ifgr->ifgr_len += sizeof(ifgrq);
 		IFNET_RUNLOCK();
 		return (0);
 	}
 
 	len = ifgr->ifgr_len;
 	ifgp = ifgr->ifgr_groups;
 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
 		if (len < sizeof(ifgrq)) {
 			IFNET_RUNLOCK();
 			return (EINVAL);
 		}
 		bzero(&ifgrq, sizeof ifgrq);
 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
 		    sizeof(ifgrq.ifgrq_member));
 		if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
 			IFNET_RUNLOCK();
 			return (error);
 		}
 		len -= sizeof(ifgrq);
 		ifgp++;
 	}
 	IFNET_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Delete Routes for a Network Interface
  *
  * Called for each routing entry via the rnh->rnh_walktree() call above
  * to delete all route entries referencing a detaching network interface.
  *
  * Arguments:
  *	rn	pointer to node in the routing table
  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
  *
  * Returns:
  *	0	successful
  *	errno	failed - reason indicated
  *
  */
 static int
 if_rtdel(struct radix_node *rn, void *arg)
 {
 	struct rtentry	*rt = (struct rtentry *)rn;
 	struct ifnet	*ifp = arg;
 	int		err;
 
 	if (rt->rt_ifp == ifp) {
 
 		/*
 		 * Protect (sorta) against walktree recursion problems
 		 * with cloned routes
 		 */
 		if ((rt->rt_flags & RTF_UP) == 0)
 			return (0);
 
-		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
+		err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
 				rt_mask(rt), rt->rt_flags,
-				(struct rtentry **) NULL);
+				(struct rtentry **) NULL, rt->rt_fibnum);
 		if (err) {
 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * XXX: Because sockaddr_dl has deeper structure than the sockaddr
  * structs used to represent other address families, it is necessary
  * to perform a different comparison.
  */
 
 #define	sa_equal(a1, a2)	\
 	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
 
 #define	sa_dl_equal(a1, a2)	\
 	((((struct sockaddr_dl *)(a1))->sdl_len ==			\
 	 ((struct sockaddr_dl *)(a2))->sdl_len) &&			\
 	 (bcmp(LLADDR((struct sockaddr_dl *)(a1)),			\
 	       LLADDR((struct sockaddr_dl *)(a2)),			\
 	       ((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
 
 /*
  * Locate an interface based on a complete address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithaddr(struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link)
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (sa_equal(addr, ifa->ifa_addr))
 				goto done;
 			/* IP6 doesn't have broadcast */
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr))
 				goto done;
 		}
 	ifa = NULL;
 done:
 	IFNET_RUNLOCK();
 	return (ifa);
 }
 
 /*
  * Locate an interface based on the broadcast address.
  */
 /* ARGSUSED */
 struct ifaddr *
 ifa_ifwithbroadaddr(struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link)
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if ((ifp->if_flags & IFF_BROADCAST) &&
 			    ifa->ifa_broadaddr &&
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    sa_equal(ifa->ifa_broadaddr, addr))
 				goto done;
 		}
 	ifa = NULL;
 done:
 	IFNET_RUNLOCK();
 	return (ifa);
 }
 
 /*
  * Locate the point to point interface with a given destination address.
  */
 /*ARGSUSED*/
 struct ifaddr *
 ifa_ifwithdstaddr(struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			continue;
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sa_family)
 				continue;
 			if (ifa->ifa_dstaddr &&
 			    sa_equal(addr, ifa->ifa_dstaddr))
 				goto done;
 		}
 	}
 	ifa = NULL;
 done:
 	IFNET_RUNLOCK();
 	return (ifa);
 }
 
 /*
  * Find an interface on a specific network.  If many, choice
  * is most specific found.
  */
 struct ifaddr *
 ifa_ifwithnet(struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
 	u_int af = addr->sa_family;
 	char *addr_data = addr->sa_data, *cplim;
 
 	/*
 	 * AF_LINK addresses can be looked up directly by their index number,
 	 * so do that if we can.
 	 */
 	if (af == AF_LINK) {
 	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
 		return (ifaddr_byindex(sdl->sdl_index));
 	}
 
 	/*
 	 * Scan though each interface, looking for ones that have
 	 * addresses in this address family.
 	 */
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			char *cp, *cp2, *cp3;
 
 			if (ifa->ifa_addr->sa_family != af)
 next:				continue;
 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
 				/*
 				 * This is a bit broken as it doesn't
 				 * take into account that the remote end may
 				 * be a single node in the network we are
 				 * looking for.
 				 * The trouble is that we don't know the
 				 * netmask for the remote end.
 				 */
 				if (ifa->ifa_dstaddr != 0 &&
 				    sa_equal(addr, ifa->ifa_dstaddr))
 					goto done;
 			} else {
 				/*
 				 * if we have a special address handler,
 				 * then use it instead of the generic one.
 				 */
 				if (ifa->ifa_claim_addr) {
 					if ((*ifa->ifa_claim_addr)(ifa, addr))
 						goto done;
 					continue;
 				}
 
 				/*
 				 * Scan all the bits in the ifa's address.
 				 * If a bit dissagrees with what we are
 				 * looking for, mask it with the netmask
 				 * to see if it really matters.
 				 * (A byte at a time)
 				 */
 				if (ifa->ifa_netmask == 0)
 					continue;
 				cp = addr_data;
 				cp2 = ifa->ifa_addr->sa_data;
 				cp3 = ifa->ifa_netmask->sa_data;
 				cplim = ifa->ifa_netmask->sa_len
 					+ (char *)ifa->ifa_netmask;
 				while (cp3 < cplim)
 					if ((*cp++ ^ *cp2++) & *cp3++)
 						goto next; /* next address! */
 				/*
 				 * If the netmask of what we just found
 				 * is more specific than what we had before
 				 * (if we had one) then remember the new one
 				 * before continuing to search
 				 * for an even better one.
 				 */
 				if (ifa_maybe == 0 ||
 				    rn_refines((caddr_t)ifa->ifa_netmask,
 				    (caddr_t)ifa_maybe->ifa_netmask))
 					ifa_maybe = ifa;
 			}
 		}
 	}
 	ifa = ifa_maybe;
 done:
 	IFNET_RUNLOCK();
 	return (ifa);
 }
 
 /*
  * Find an interface address specific to an interface best matching
  * a given address.
  */
 struct ifaddr *
 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	char *cp, *cp2, *cp3;
 	char *cplim;
 	struct ifaddr *ifa_maybe = 0;
 	u_int af = addr->sa_family;
 
 	if (af >= AF_MAX)
 		return (0);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != af)
 			continue;
 		if (ifa_maybe == 0)
 			ifa_maybe = ifa;
 		if (ifa->ifa_netmask == 0) {
 			if (sa_equal(addr, ifa->ifa_addr) ||
 			    (ifa->ifa_dstaddr &&
 			    sa_equal(addr, ifa->ifa_dstaddr)))
 				goto done;
 			continue;
 		}
 		if (ifp->if_flags & IFF_POINTOPOINT) {
 			if (sa_equal(addr, ifa->ifa_dstaddr))
 				goto done;
 		} else {
 			cp = addr->sa_data;
 			cp2 = ifa->ifa_addr->sa_data;
 			cp3 = ifa->ifa_netmask->sa_data;
 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
 			for (; cp3 < cplim; cp3++)
 				if ((*cp++ ^ *cp2++) & *cp3)
 					break;
 			if (cp3 == cplim)
 				goto done;
 		}
 	}
 	ifa = ifa_maybe;
 done:
 	return (ifa);
 }
 
 #include <net/route.h>
 
 /*
  * Default action when installing a route with a Link Level gateway.
  * Lookup an appropriate real ifa to point to.
  * This should be moved to /sys/net/link.c eventually.
  */
 static void
 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct ifaddr *ifa, *oifa;
 	struct sockaddr *dst;
 	struct ifnet *ifp;
 
 	RT_LOCK_ASSERT(rt);
 
 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
 		return;
 	ifa = ifaof_ifpforaddr(dst, ifp);
 	if (ifa) {
 		IFAREF(ifa);		/* XXX */
 		oifa = rt->rt_ifa;
 		rt->rt_ifa = ifa;
 		IFAFREE(oifa);
 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
 			ifa->ifa_rtrequest(cmd, rt, info);
 	}
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  * NOTE: must be called at splnet or eqivalent.
  */
 static void
 if_unroute(struct ifnet *ifp, int flag, int fam)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
 
 	ifp->if_flags &= ~flag;
 	getmicrotime(&ifp->if_lastchange);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
 	if_qflush(&ifp->if_snd);
 #ifdef DEV_CARP
 	if (ifp->if_carp)
 		carp_carpdev_state(ifp->if_carp);
 #endif
 	rt_ifmsg(ifp);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  * NOTE: must be called at splnet or eqivalent.
  */
 static void
 if_route(struct ifnet *ifp, int flag, int fam)
 {
 	struct ifaddr *ifa;
 
 	KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
 
 	ifp->if_flags |= flag;
 	getmicrotime(&ifp->if_lastchange);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
 #ifdef DEV_CARP
 	if (ifp->if_carp)
 		carp_carpdev_state(ifp->if_carp);
 #endif
 	rt_ifmsg(ifp);
 #ifdef INET6
 	in6_if_up(ifp);
 #endif
 }
 
 void	(*vlan_link_state_p)(struct ifnet *, int);	/* XXX: private from if_vlan */
 void	(*vlan_trunk_cap_p)(struct ifnet *);		/* XXX: private from if_vlan */
 
 /*
  * Handle a change in the interface link state. To avoid LORs
  * between driver lock and upper layer locks, as well as possible
  * recursions, we post event to taskqueue, and all job
  * is done in static do_link_state_change().
  */
 void
 if_link_state_change(struct ifnet *ifp, int link_state)
 {
 	/* Return if state hasn't changed. */
 	if (ifp->if_link_state == link_state)
 		return;
 
 	ifp->if_link_state = link_state;
 
 	taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
 }
 
 static void
 do_link_state_change(void *arg, int pending)
 {
 	struct ifnet *ifp = (struct ifnet *)arg;
 	int link_state = ifp->if_link_state;
 	int link;
 
 	/* Notify that the link state has changed. */
 	rt_ifmsg(ifp);
 	if (link_state == LINK_STATE_UP)
 		link = NOTE_LINKUP;
 	else if (link_state == LINK_STATE_DOWN)
 		link = NOTE_LINKDOWN;
 	else
 		link = NOTE_LINKINV;
 	KNOTE_UNLOCKED(&ifp->if_klist, link);
 	if (ifp->if_vlantrunk != NULL)
 		(*vlan_link_state_p)(ifp, link);
 
 	if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
 	    IFP2AC(ifp)->ac_netgraph != NULL)
 		(*ng_ether_link_state_p)(ifp, link_state);
 #ifdef DEV_CARP
 	if (ifp->if_carp)
 		carp_carpdev_state(ifp->if_carp);
 #endif
 	if (ifp->if_bridge) {
 		KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
 		(*bstp_linkstate_p)(ifp, link_state);
 	}
 	if (ifp->if_lagg) {
 		KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
 		(*lagg_linkstate_p)(ifp, link_state);
 	}
 
 	devctl_notify("IFNET", ifp->if_xname,
 	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
 	if (pending > 1)
 		if_printf(ifp, "%d link states coalesced\n", pending);
 	if (log_link_state_change)
 		log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
 		    (link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
 }
 
 /*
  * Mark an interface down and notify protocols of
  * the transition.
  * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_down(struct ifnet *ifp)
 {
 
 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
 }
 
 /*
  * Mark an interface up and notify protocols of
  * the transition.
  * NOTE: must be called at splnet or eqivalent.
  */
 void
 if_up(struct ifnet *ifp)
 {
 
 	if_route(ifp, IFF_UP, AF_UNSPEC);
 }
 
 /*
  * Flush an interface queue.
  */
 static void
 if_qflush(struct ifaltq *ifq)
 {
 	struct mbuf *m, *n;
 
 	IFQ_LOCK(ifq);
 #ifdef ALTQ
 	if (ALTQ_IS_ENABLED(ifq))
 		ALTQ_PURGE(ifq);
 #endif
 	n = ifq->ifq_head;
 	while ((m = n) != 0) {
 		n = m->m_act;
 		m_freem(m);
 	}
 	ifq->ifq_head = 0;
 	ifq->ifq_tail = 0;
 	ifq->ifq_len = 0;
 	IFQ_UNLOCK(ifq);
 }
 
 /*
  * Handle interface watchdog timer routines.  Called
  * from softclock, we decrement timers (if set) and
  * call the appropriate interface routine on expiration.
  *
  * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
  * holding Giant.  If we switch to an MPSAFE callout, we likely need to grab
  * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
  */
 static void
 if_slowtimo(void *arg)
 {
 	struct ifnet *ifp;
 	int s = splimp();
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (ifp->if_timer == 0 || --ifp->if_timer)
 			continue;
 		if (ifp->if_watchdog)
 			(*ifp->if_watchdog)(ifp);
 	}
 	IFNET_RUNLOCK();
 	splx(s);
 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
 }
 
 /*
  * Map interface name to
  * interface structure pointer.
  */
 struct ifnet *
 ifunit(const char *name)
 {
 	struct ifnet *ifp;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
 			break;
 	}
 	IFNET_RUNLOCK();
 	return (ifp);
 }
 
 /*
  * Hardware specific interface ioctls.
  */
 static int
 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 {
 	struct ifreq *ifr;
 	struct ifstat *ifs;
 	int error = 0;
 	int new_flags, temp_flags;
 	size_t namelen, onamelen;
 	char new_name[IFNAMSIZ];
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifr = (struct ifreq *)data;
 	switch (cmd) {
 	case SIOCGIFINDEX:
 		ifr->ifr_index = ifp->if_index;
 		break;
 
 	case SIOCGIFFLAGS:
 		temp_flags = ifp->if_flags | ifp->if_drv_flags;
 		ifr->ifr_flags = temp_flags & 0xffff;
 		ifr->ifr_flagshigh = temp_flags >> 16;
 		break;
 
 	case SIOCGIFCAP:
 		ifr->ifr_reqcap = ifp->if_capabilities;
 		ifr->ifr_curcap = ifp->if_capenable;
 		break;
 
 #ifdef MAC
 	case SIOCGIFMAC:
 		error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCGIFMETRIC:
 		ifr->ifr_metric = ifp->if_metric;
 		break;
 
 	case SIOCGIFMTU:
 		ifr->ifr_mtu = ifp->if_mtu;
 		break;
 
 	case SIOCGIFPHYS:
 		ifr->ifr_phys = ifp->if_physical;
 		break;
 
 	case SIOCSIFFLAGS:
 		error = priv_check(td, PRIV_NET_SETIFFLAGS);
 		if (error)
 			return (error);
 		/*
 		 * Currently, no driver owned flags pass the IFF_CANTCHANGE
 		 * check, so we don't need special handling here yet.
 		 */
 		new_flags = (ifr->ifr_flags & 0xffff) |
 		    (ifr->ifr_flagshigh << 16);
 		if (ifp->if_flags & IFF_SMART) {
 			/* Smart drivers twiddle their own routes */
 		} else if (ifp->if_flags & IFF_UP &&
 		    (new_flags & IFF_UP) == 0) {
 			int s = splimp();
 			if_down(ifp);
 			splx(s);
 		} else if (new_flags & IFF_UP &&
 		    (ifp->if_flags & IFF_UP) == 0) {
 			int s = splimp();
 			if_up(ifp);
 			splx(s);
 		}
 		/* See if permanently promiscuous mode bit is about to flip */
 		if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
 			if (new_flags & IFF_PPROMISC)
 				ifp->if_flags |= IFF_PROMISC;
 			else if (ifp->if_pcount == 0)
 				ifp->if_flags &= ~IFF_PROMISC;
 			log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
 			    ifp->if_xname,
 			    (new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
 		}
 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
 			(new_flags &~ IFF_CANTCHANGE);
 		if (ifp->if_ioctl) {
 			IFF_LOCKGIANT(ifp);
 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
 			IFF_UNLOCKGIANT(ifp);
 		}
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFCAP:
 		error = priv_check(td, PRIV_NET_SETIFCAP);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
 			return (EINVAL);
 		IFF_LOCKGIANT(ifp);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		IFF_UNLOCKGIANT(ifp);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 #ifdef MAC
 	case SIOCSIFMAC:
 		error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
 		break;
 #endif
 
 	case SIOCSIFNAME:
 		error = priv_check(td, PRIV_NET_SETIFNAME);
 		if (error)
 			return (error);
 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
 		if (error != 0)
 			return (error);
 		if (new_name[0] == '\0')
 			return (EINVAL);
 		if (ifunit(new_name) != NULL)
 			return (EEXIST);
 		
 		/* Announce the departure of the interface. */
 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
 
 		log(LOG_INFO, "%s: changing name to '%s'\n",
 		    ifp->if_xname, new_name);
 
 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
 		ifa = ifp->if_addr;
 		IFA_LOCK(ifa);
 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 		namelen = strlen(new_name);
 		onamelen = sdl->sdl_nlen;
 		/*
 		 * Move the address if needed.  This is safe because we
 		 * allocate space for a name of length IFNAMSIZ when we
 		 * create this in if_attach().
 		 */
 		if (namelen != onamelen) {
 			bcopy(sdl->sdl_data + onamelen,
 			    sdl->sdl_data + namelen, sdl->sdl_alen);
 		}
 		bcopy(new_name, sdl->sdl_data, namelen);
 		sdl->sdl_nlen = namelen;
 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
 		bzero(sdl->sdl_data, onamelen);
 		while (namelen != 0)
 			sdl->sdl_data[--namelen] = 0xff;
 		IFA_UNLOCK(ifa);
 
 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
 		/* Announce the return of the interface. */
 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
 		break;
 
 	case SIOCSIFMETRIC:
 		error = priv_check(td, PRIV_NET_SETIFMETRIC);
 		if (error)
 			return (error);
 		ifp->if_metric = ifr->ifr_metric;
 		getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYS:
 		error = priv_check(td, PRIV_NET_SETIFPHYS);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		IFF_LOCKGIANT(ifp);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		IFF_UNLOCKGIANT(ifp);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFMTU:
 	{
 		u_long oldmtu = ifp->if_mtu;
 
 		error = priv_check(td, PRIV_NET_SETIFMTU);
 		if (error)
 			return (error);
 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
 			return (EINVAL);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		IFF_LOCKGIANT(ifp);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		IFF_UNLOCKGIANT(ifp);
 		if (error == 0) {
 			getmicrotime(&ifp->if_lastchange);
 			rt_ifmsg(ifp);
 		}
 		/*
 		 * If the link MTU changed, do network layer specific procedure.
 		 */
 		if (ifp->if_mtu != oldmtu) {
 #ifdef INET6
 			nd6_setmtu(ifp);
 #endif
 		}
 		break;
 	}
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (cmd == SIOCADDMULTI)
 			error = priv_check(td, PRIV_NET_ADDMULTI);
 		else
 			error = priv_check(td, PRIV_NET_DELMULTI);
 		if (error)
 			return (error);
 
 		/* Don't allow group membership on non-multicast interfaces. */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
 			return (EOPNOTSUPP);
 
 		/* Don't let users screw up protocols' entries. */
 		if (ifr->ifr_addr.sa_family != AF_LINK)
 			return (EINVAL);
 
 		if (cmd == SIOCADDMULTI) {
 			struct ifmultiaddr *ifma;
 
 			/*
 			 * Userland is only permitted to join groups once
 			 * via the if_addmulti() KPI, because it cannot hold
 			 * struct ifmultiaddr * between calls. It may also
 			 * lose a race while we check if the membership
 			 * already exists.
 			 */
 			IF_ADDR_LOCK(ifp);
 			ifma = if_findmulti(ifp, &ifr->ifr_addr);
 			IF_ADDR_UNLOCK(ifp);
 			if (ifma != NULL)
 				error = EADDRINUSE;
 			else
 				error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
 		} else {
 			error = if_delmulti(ifp, &ifr->ifr_addr);
 		}
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCSIFPHYADDR:
 	case SIOCDIFPHYADDR:
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif
 	case SIOCSLIFPHYADDR:
 	case SIOCSIFMEDIA:
 	case SIOCSIFGENERIC:
 		error = priv_check(td, PRIV_NET_HWIOCTL);
 		if (error)
 			return (error);
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		IFF_LOCKGIANT(ifp);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		IFF_UNLOCKGIANT(ifp);
 		if (error == 0)
 			getmicrotime(&ifp->if_lastchange);
 		break;
 
 	case SIOCGIFSTATUS:
 		ifs = (struct ifstat *)data;
 		ifs->ascii[0] = '\0';
 
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 	case SIOCGLIFPHYADDR:
 	case SIOCGIFMEDIA:
 	case SIOCGIFGENERIC:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		IFF_LOCKGIANT(ifp);
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		IFF_UNLOCKGIANT(ifp);
 		break;
 
 	case SIOCSIFLLADDR:
 		error = priv_check(td, PRIV_NET_SETLLADDR);
 		if (error)
 			return (error);
 		error = if_setlladdr(ifp,
 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
 		break;
 
 	case SIOCAIFGROUP:
 	{
 		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
 
 		error = priv_check(td, PRIV_NET_ADDIFGROUP);
 		if (error)
 			return (error);
 		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
 			return (error);
 		break;
 	}
 
 	case SIOCGIFGROUP:
 		if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
 			return (error);
 		break;
 
 	case SIOCDIFGROUP:
 	{
 		struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
 
 		error = priv_check(td, PRIV_NET_DELIFGROUP);
 		if (error)
 			return (error);
 		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
 			return (error);
 		break;
 	}
 
 	default:
 		error = ENOIOCTL;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Interface ioctls.
  */
 int
 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
 {
 	struct ifnet *ifp;
 	struct ifreq *ifr;
 	int error;
 	int oif_flags;
 
 	switch (cmd) {
 	case SIOCGIFCONF:
 	case OSIOCGIFCONF:
 #ifdef __amd64__
 	case SIOCGIFCONF32:
 #endif
 		return (ifconf(cmd, data));
 	}
 	ifr = (struct ifreq *)data;
 
 	switch (cmd) {
 	case SIOCIFCREATE:
 	case SIOCIFCREATE2:
 		error = priv_check(td, PRIV_NET_IFCREATE);
 		if (error)
 			return (error);
 		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
 			cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
 	case SIOCIFDESTROY:
 		error = priv_check(td, PRIV_NET_IFDESTROY);
 		if (error)
 			return (error);
 		return if_clone_destroy(ifr->ifr_name);
 
 	case SIOCIFGCLONERS:
 		return (if_clone_list((struct if_clonereq *)data));
 	case SIOCGIFGMEMB:
 		return (if_getgroupmembers((struct ifgroupreq *)data));
 	}
 
 	ifp = ifunit(ifr->ifr_name);
 	if (ifp == 0)
 		return (ENXIO);
 
 	error = ifhwioctl(cmd, ifp, data, td);
 	if (error != ENOIOCTL)
 		return (error);
 
 	oif_flags = ifp->if_flags;
 	if (so->so_proto == 0)
 		return (EOPNOTSUPP);
 #ifndef COMPAT_43
 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
 								 data,
 								 ifp, td));
 #else
 	{
 		int ocmd = cmd;
 
 		switch (cmd) {
 
 		case SIOCSIFDSTADDR:
 		case SIOCSIFADDR:
 		case SIOCSIFBRDADDR:
 		case SIOCSIFNETMASK:
 #if BYTE_ORDER != BIG_ENDIAN
 			if (ifr->ifr_addr.sa_family == 0 &&
 			    ifr->ifr_addr.sa_len < 16) {
 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
 				ifr->ifr_addr.sa_len = 16;
 			}
 #else
 			if (ifr->ifr_addr.sa_len == 0)
 				ifr->ifr_addr.sa_len = 16;
 #endif
 			break;
 
 		case OSIOCGIFADDR:
 			cmd = SIOCGIFADDR;
 			break;
 
 		case OSIOCGIFDSTADDR:
 			cmd = SIOCGIFDSTADDR;
 			break;
 
 		case OSIOCGIFBRDADDR:
 			cmd = SIOCGIFBRDADDR;
 			break;
 
 		case OSIOCGIFNETMASK:
 			cmd = SIOCGIFNETMASK;
 		}
 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
 								   cmd,
 								   data,
 								   ifp, td));
 		switch (ocmd) {
 
 		case OSIOCGIFADDR:
 		case OSIOCGIFDSTADDR:
 		case OSIOCGIFBRDADDR:
 		case OSIOCGIFNETMASK:
 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
 
 		}
 	}
 #endif /* COMPAT_43 */
 
 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
 #ifdef INET6
 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
 		if (ifp->if_flags & IFF_UP) {
 			int s = splimp();
 			in6_if_up(ifp);
 			splx(s);
 		}
 #endif
 	}
 	return (error);
 }
 
 /*
  * The code common to handling reference counted flags,
  * e.g., in ifpromisc() and if_allmulti().
  * The "pflag" argument can specify a permanent mode flag to check,
  * such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
  *
  * Only to be used on stack-owned flags, not driver-owned flags.
  */
 static int
 if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
 {
 	struct ifreq ifr;
 	int error;
 	int oldflags, oldcount;
 
 	/* Sanity checks to catch programming errors */
 	KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
 	    ("%s: setting driver-owned flag %d", __func__, flag));
 
 	if (onswitch)
 		KASSERT(*refcount >= 0,
 		    ("%s: increment negative refcount %d for flag %d",
 		    __func__, *refcount, flag));
 	else
 		KASSERT(*refcount > 0,
 		    ("%s: decrement non-positive refcount %d for flag %d",
 		    __func__, *refcount, flag));
 
 	/* In case this mode is permanent, just touch refcount */
 	if (ifp->if_flags & pflag) {
 		*refcount += onswitch ? 1 : -1;
 		return (0);
 	}
 
 	/* Save ifnet parameters for if_ioctl() may fail */
 	oldcount = *refcount;
 	oldflags = ifp->if_flags;
 	
 	/*
 	 * See if we aren't the only and touching refcount is enough.
 	 * Actually toggle interface flag if we are the first or last.
 	 */
 	if (onswitch) {
 		if ((*refcount)++)
 			return (0);
 		ifp->if_flags |= flag;
 	} else {
 		if (--(*refcount))
 			return (0);
 		ifp->if_flags &= ~flag;
 	}
 
 	/* Call down the driver since we've changed interface flags */
 	if (ifp->if_ioctl == NULL) {
 		error = EOPNOTSUPP;
 		goto recover;
 	}
 	ifr.ifr_flags = ifp->if_flags & 0xffff;
 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
 	IFF_LOCKGIANT(ifp);
 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 	IFF_UNLOCKGIANT(ifp);
 	if (error)
 		goto recover;
 	/* Notify userland that interface flags have changed */
 	rt_ifmsg(ifp);
 	return (0);
 
 recover:
 	/* Recover after driver error */
 	*refcount = oldcount;
 	ifp->if_flags = oldflags;
 	return (error);
 }
 
 /*
  * Set/clear promiscuous mode on interface ifp based on the truth value
  * of pswitch.  The calls are reference counted so that only the first
  * "on" request actually has an effect, as does the final "off" request.
  * Results are undefined if the "off" and "on" requests are not matched.
  */
 int
 ifpromisc(struct ifnet *ifp, int pswitch)
 {
 	int error;
 	int oldflags = ifp->if_flags;
 
 	error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
 			   &ifp->if_pcount, pswitch);
 	/* If promiscuous mode status has changed, log a message */
 	if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
 		log(LOG_INFO, "%s: promiscuous mode %s\n",
 		    ifp->if_xname,
 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
 	return (error);
 }
 
 /*
  * Return interface configuration
  * of system.  List may be used
  * in later ioctl's (above) to get
  * other information.
  */
 /*ARGSUSED*/
 static int
 ifconf(u_long cmd, caddr_t data)
 {
 	struct ifconf *ifc = (struct ifconf *)data;
 #ifdef __amd64__
 	struct ifconf32 *ifc32 = (struct ifconf32 *)data;
 	struct ifconf ifc_swab;
 #endif
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 	struct sbuf *sb;
 	int error, full = 0, valid_len, max_len;
 
 #ifdef __amd64__
 	if (cmd == SIOCGIFCONF32) {
 		ifc_swab.ifc_len = ifc32->ifc_len;
 		ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
 		ifc = &ifc_swab;
 	}
 #endif
 	/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
 	max_len = MAXPHYS - 1;
 
 	/* Prevent hostile input from being able to crash the system */
 	if (ifc->ifc_len <= 0)
 		return (EINVAL);
 
 again:
 	if (ifc->ifc_len <= max_len) {
 		max_len = ifc->ifc_len;
 		full = 1;
 	}
 	sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
 	max_len = 0;
 	valid_len = 0;
 
 	IFNET_RLOCK();		/* could sleep XXX */
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		int addrs;
 
 		/*
 		 * Zero the ifr_name buffer to make sure we don't
 		 * disclose the contents of the stack.
 		 */
 		memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
 
 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
 		    >= sizeof(ifr.ifr_name)) {
 			sbuf_delete(sb);
 			IFNET_RUNLOCK();
 			return (ENAMETOOLONG);
 		}
 
 		addrs = 0;
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			struct sockaddr *sa = ifa->ifa_addr;
 
 			if (jailed(curthread->td_ucred) &&
 			    prison_if(curthread->td_ucred, sa))
 				continue;
 			addrs++;
 #ifdef COMPAT_43
 			if (cmd == OSIOCGIFCONF) {
 				struct osockaddr *osa =
 					 (struct osockaddr *)&ifr.ifr_addr;
 				ifr.ifr_addr = *sa;
 				osa->sa_family = sa->sa_family;
 				sbuf_bcat(sb, &ifr, sizeof(ifr));
 				max_len += sizeof(ifr);
 			} else
 #endif
 			if (sa->sa_len <= sizeof(*sa)) {
 				ifr.ifr_addr = *sa;
 				sbuf_bcat(sb, &ifr, sizeof(ifr));
 				max_len += sizeof(ifr);
 			} else {
 				sbuf_bcat(sb, &ifr,
 				    offsetof(struct ifreq, ifr_addr));
 				max_len += offsetof(struct ifreq, ifr_addr);
 				sbuf_bcat(sb, sa, sa->sa_len);
 				max_len += sa->sa_len;
 			}
 
 			if (!sbuf_overflowed(sb))
 				valid_len = sbuf_len(sb);
 		}
 		if (addrs == 0) {
 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
 			sbuf_bcat(sb, &ifr, sizeof(ifr));
 			max_len += sizeof(ifr);
 
 			if (!sbuf_overflowed(sb))
 				valid_len = sbuf_len(sb);
 		}
 	}
 	IFNET_RUNLOCK();
 
 	/*
 	 * If we didn't allocate enough space (uncommon), try again.  If
 	 * we have already allocated as much space as we are allowed,
 	 * return what we've got.
 	 */
 	if (valid_len != max_len && !full) {
 		sbuf_delete(sb);
 		goto again;
 	}
 
 	ifc->ifc_len = valid_len;
 #ifdef __amd64__
 	if (cmd == SIOCGIFCONF32)
 		ifc32->ifc_len = valid_len;
 #endif
 	sbuf_finish(sb);
 	error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
 	sbuf_delete(sb);
 	return (error);
 }
 
 /*
  * Just like ifpromisc(), but for all-multicast-reception mode.
  */
 int
 if_allmulti(struct ifnet *ifp, int onswitch)
 {
 
 	return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
 }
 
 struct ifmultiaddr *
 if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 
 	IF_ADDR_LOCK_ASSERT(ifp);
 
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (sa->sa_family == AF_LINK) {
 			if (sa_dl_equal(ifma->ifma_addr, sa))
 				break;
 		} else {
 			if (sa_equal(ifma->ifma_addr, sa))
 				break;
 		}
 	}
 
 	return ifma;
 }
 
 /*
  * Allocate a new ifmultiaddr and initialize based on passed arguments.  We
  * make copies of passed sockaddrs.  The ifmultiaddr will not be added to
  * the ifnet multicast address list here, so the caller must do that and
  * other setup work (such as notifying the device driver).  The reference
  * count is initialized to 1.
  */
 static struct ifmultiaddr *
 if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
     int mflags)
 {
 	struct ifmultiaddr *ifma;
 	struct sockaddr *dupsa;
 
 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, mflags |
 	    M_ZERO);
 	if (ifma == NULL)
 		return (NULL);
 
 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, mflags);
 	if (dupsa == NULL) {
 		FREE(ifma, M_IFMADDR);
 		return (NULL);
 	}
 	bcopy(sa, dupsa, sa->sa_len);
 	ifma->ifma_addr = dupsa;
 
 	ifma->ifma_ifp = ifp;
 	ifma->ifma_refcount = 1;
 	ifma->ifma_protospec = NULL;
 
 	if (llsa == NULL) {
 		ifma->ifma_lladdr = NULL;
 		return (ifma);
 	}
 
 	MALLOC(dupsa, struct sockaddr *, llsa->sa_len, M_IFMADDR, mflags);
 	if (dupsa == NULL) {
 		FREE(ifma->ifma_addr, M_IFMADDR);
 		FREE(ifma, M_IFMADDR);
 		return (NULL);
 	}
 	bcopy(llsa, dupsa, llsa->sa_len);
 	ifma->ifma_lladdr = dupsa;
 
 	return (ifma);
 }
 
 /*
  * if_freemulti: free ifmultiaddr structure and possibly attached related
  * addresses.  The caller is responsible for implementing reference
  * counting, notifying the driver, handling routing messages, and releasing
  * any dependent link layer state.
  */
 static void
 if_freemulti(struct ifmultiaddr *ifma)
 {
 
 	KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
 	    ifma->ifma_refcount));
 	KASSERT(ifma->ifma_protospec == NULL,
 	    ("if_freemulti: protospec not NULL"));
 
 	if (ifma->ifma_lladdr != NULL)
 		FREE(ifma->ifma_lladdr, M_IFMADDR);
 	FREE(ifma->ifma_addr, M_IFMADDR);
 	FREE(ifma, M_IFMADDR);
 }
 
 /*
  * Register an additional multicast address with a network interface.
  *
  * - If the address is already present, bump the reference count on the
  *   address and return.
  * - If the address is not link-layer, look up a link layer address.
  * - Allocate address structures for one or both addresses, and attach to the
  *   multicast address list on the interface.  If automatically adding a link
  *   layer address, the protocol address will own a reference to the link
  *   layer address, to be freed when it is freed.
  * - Notify the network device driver of an addition to the multicast address
  *   list.
  *
  * 'sa' points to caller-owned memory with the desired multicast address.
  *
  * 'retifma' will be used to return a pointer to the resulting multicast
  * address reference, if desired.
  */
 int
 if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
     struct ifmultiaddr **retifma)
 {
 	struct ifmultiaddr *ifma, *ll_ifma;
 	struct sockaddr *llsa;
 	int error;
 
 	/*
 	 * If the address is already present, return a new reference to it;
 	 * otherwise, allocate storage and set up a new address.
 	 */
 	IF_ADDR_LOCK(ifp);
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL) {
 		ifma->ifma_refcount++;
 		if (retifma != NULL)
 			*retifma = ifma;
 		IF_ADDR_UNLOCK(ifp);
 		return (0);
 	}
 
 	/*
 	 * The address isn't already present; resolve the protocol address
 	 * into a link layer address, and then look that up, bump its
 	 * refcount or allocate an ifma for that also.  If 'llsa' was
 	 * returned, we will need to free it later.
 	 */
 	llsa = NULL;
 	ll_ifma = NULL;
 	if (ifp->if_resolvemulti != NULL) {
 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
 		if (error)
 			goto unlock_out;
 	}
 
 	/*
 	 * Allocate the new address.  Don't hook it up yet, as we may also
 	 * need to allocate a link layer multicast address.
 	 */
 	ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
 	if (ifma == NULL) {
 		error = ENOMEM;
 		goto free_llsa_out;
 	}
 
 	/*
 	 * If a link layer address is found, we'll need to see if it's
 	 * already present in the address list, or allocate is as well.
 	 * When this block finishes, the link layer address will be on the
 	 * list.
 	 */
 	if (llsa != NULL) {
 		ll_ifma = if_findmulti(ifp, llsa);
 		if (ll_ifma == NULL) {
 			ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
 			if (ll_ifma == NULL) {
 				--ifma->ifma_refcount;
 				if_freemulti(ifma);
 				error = ENOMEM;
 				goto free_llsa_out;
 			}
 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
 			    ifma_link);
 		} else
 			ll_ifma->ifma_refcount++;
 		ifma->ifma_llifma = ll_ifma;
 	}
 
 	/*
 	 * We now have a new multicast address, ifma, and possibly a new or
 	 * referenced link layer address.  Add the primary address to the
 	 * ifnet address list.
 	 */
 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 
 	if (retifma != NULL)
 		*retifma = ifma;
 
 	/*
 	 * Must generate the message while holding the lock so that 'ifma'
 	 * pointer is still valid.
 	 */
 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
 	IF_ADDR_UNLOCK(ifp);
 
 	/*
 	 * We are certain we have added something, so call down to the
 	 * interface to let them know about it.
 	 */
 	if (ifp->if_ioctl != NULL) {
 		IFF_LOCKGIANT(ifp);
 		(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
 		IFF_UNLOCKGIANT(ifp);
 	}
 
 	if (llsa != NULL)
 		FREE(llsa, M_IFMADDR);
 
 	return (0);
 
 free_llsa_out:
 	if (llsa != NULL)
 		FREE(llsa, M_IFMADDR);
 
 unlock_out:
 	IF_ADDR_UNLOCK(ifp);
 	return (error);
 }
 
 /*
  * Delete a multicast group membership by network-layer group address.
  *
  * Returns ENOENT if the entry could not be found. If ifp no longer
  * exists, results are undefined. This entry point should only be used
  * from subsystems which do appropriate locking to hold ifp for the
  * duration of the call.
  * Network-layer protocol domains must use if_delmulti_ifma().
  */
 int
 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
 {
 	struct ifmultiaddr *ifma;
 	int lastref;
 #ifdef INVARIANTS
 	struct ifnet *oifp;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(oifp, &ifnet, if_link)
 		if (ifp == oifp)
 			break;
 	if (ifp != oifp)
 		ifp = NULL;
 	IFNET_RUNLOCK();
 
 	KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
 #endif
 	if (ifp == NULL)
 		return (ENOENT);
 
 	IF_ADDR_LOCK(ifp);
 	lastref = 0;
 	ifma = if_findmulti(ifp, sa);
 	if (ifma != NULL)
 		lastref = if_delmulti_locked(ifp, ifma, 0);
 	IF_ADDR_UNLOCK(ifp);
 
 	if (ifma == NULL)
 		return (ENOENT);
 
 	if (lastref && ifp->if_ioctl != NULL) {
 		IFF_LOCKGIANT(ifp);
 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 		IFF_UNLOCKGIANT(ifp);
 	}
 
 	return (0);
 }
 
 /*
  * Delete a multicast group membership by group membership pointer.
  * Network-layer protocol domains must use this routine.
  *
  * It is safe to call this routine if the ifp disappeared. Callers should
  * hold IFF_LOCKGIANT() to avoid a LOR in case the hardware needs to be
  * reconfigured.
  */
 void
 if_delmulti_ifma(struct ifmultiaddr *ifma)
 {
 	struct ifnet *ifp;
 	int lastref;
 
 	ifp = ifma->ifma_ifp;
 #ifdef DIAGNOSTIC
 	if (ifp == NULL) {
 		printf("%s: ifma_ifp seems to be detached\n", __func__);
 	} else {
 		struct ifnet *oifp;
 
 		IFNET_RLOCK();
 		TAILQ_FOREACH(oifp, &ifnet, if_link)
 			if (ifp == oifp)
 				break;
 		if (ifp != oifp) {
 			printf("%s: ifnet %p disappeared\n", __func__, ifp);
 			ifp = NULL;
 		}
 		IFNET_RUNLOCK();
 	}
 #endif
 	/*
 	 * If and only if the ifnet instance exists: Acquire the address lock.
 	 */
 	if (ifp != NULL)
 		IF_ADDR_LOCK(ifp);
 
 	lastref = if_delmulti_locked(ifp, ifma, 0);
 
 	if (ifp != NULL) {
 		/*
 		 * If and only if the ifnet instance exists:
 		 *  Release the address lock.
 		 *  If the group was left: update the hardware hash filter.
 		 */
 		IF_ADDR_UNLOCK(ifp);
 		if (lastref && ifp->if_ioctl != NULL) {
 			IFF_LOCKGIANT(ifp);
 			(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
 			IFF_UNLOCKGIANT(ifp);
 		}
 	}
 }
 
 /*
  * Perform deletion of network-layer and/or link-layer multicast address.
  *
  * Return 0 if the reference count was decremented.
  * Return 1 if the final reference was released, indicating that the
  * hardware hash filter should be reprogrammed.
  */
 static int
 if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
 {
 	struct ifmultiaddr *ll_ifma;
 
 	if (ifp != NULL && ifma->ifma_ifp != NULL) {
 		KASSERT(ifma->ifma_ifp == ifp,
 		    ("%s: inconsistent ifp %p", __func__, ifp));
 		IF_ADDR_LOCK_ASSERT(ifp);
 	}
 
 	ifp = ifma->ifma_ifp;
 
 	/*
 	 * If the ifnet is detaching, null out references to ifnet,
 	 * so that upper protocol layers will notice, and not attempt
 	 * to obtain locks for an ifnet which no longer exists. The
 	 * routing socket announcement must happen before the ifnet
 	 * instance is detached from the system.
 	 */
 	if (detaching) {
 #ifdef DIAGNOSTIC
 		printf("%s: detaching ifnet instance %p\n", __func__, ifp);
 #endif
 		/*
 		 * ifp may already be nulled out if we are being reentered
 		 * to delete the ll_ifma.
 		 */
 		if (ifp != NULL) {
 			rt_newmaddrmsg(RTM_DELMADDR, ifma);
 			ifma->ifma_ifp = NULL;
 		}
 	}
 
 	if (--ifma->ifma_refcount > 0)
 		return 0;
 
 	/*
 	 * If this ifma is a network-layer ifma, a link-layer ifma may
 	 * have been associated with it. Release it first if so.
 	 */
 	ll_ifma = ifma->ifma_llifma;
 	if (ll_ifma != NULL) {
 		KASSERT(ifma->ifma_lladdr != NULL,
 		    ("%s: llifma w/o lladdr", __func__));
 		if (detaching)
 			ll_ifma->ifma_ifp = NULL;	/* XXX */
 		if (--ll_ifma->ifma_refcount == 0) {
 			if (ifp != NULL) {
 				TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
 				    ifma_link);
 			}
 			if_freemulti(ll_ifma);
 		}
 	}
 
 	if (ifp != NULL)
 		TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
 
 	if_freemulti(ifma);
 
 	/*
 	 * The last reference to this instance of struct ifmultiaddr
 	 * was released; the hardware should be notified of this change.
 	 */
 	return 1;
 }
 
 /*
  * Set the link layer address on an interface.
  *
  * At this time we only support certain types of interfaces,
  * and we don't allow the length of the address to change.
  */
 int
 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 {
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 	struct ifreq ifr;
 
 	ifa = ifp->if_addr;
 	if (ifa == NULL)
 		return (EINVAL);
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	if (sdl == NULL)
 		return (EINVAL);
 	if (len != sdl->sdl_alen)	/* don't allow length to change */
 		return (EINVAL);
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_FDDI:
 	case IFT_XETHER:
 	case IFT_ISO88025:
 	case IFT_L2VLAN:
 	case IFT_BRIDGE:
 	case IFT_ARCNET:
 	case IFT_IEEE8023ADLAG:
 		bcopy(lladdr, LLADDR(sdl), len);
 		break;
 	default:
 		return (ENODEV);
 	}
 	/*
 	 * If the interface is already up, we need
 	 * to re-init it in order to reprogram its
 	 * address filter.
 	 */
 	if ((ifp->if_flags & IFF_UP) != 0) {
 		if (ifp->if_ioctl) {
 			IFF_LOCKGIANT(ifp);
 			ifp->if_flags &= ~IFF_UP;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 			ifp->if_flags |= IFF_UP;
 			ifr.ifr_flags = ifp->if_flags & 0xffff;
 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
 			(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
 			IFF_UNLOCKGIANT(ifp);
 		}
 #ifdef INET
 		/*
 		 * Also send gratuitous ARPs to notify other nodes about
 		 * the address change.
 		 */
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family == AF_INET)
 				arp_ifinit(ifp, ifa);
 		}
 #endif
 	}
 	return (0);
 }
 
 /*
  * The name argument must be a pointer to storage which will last as
  * long as the interface does.  For physical devices, the result of
  * device_get_name(dev) is a good choice and for pseudo-devices a
  * static string works well.
  */
 void
 if_initname(struct ifnet *ifp, const char *name, int unit)
 {
 	ifp->if_dname = name;
 	ifp->if_dunit = unit;
 	if (unit != IF_DUNIT_NONE)
 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
 	else
 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
 }
 
 int
 if_printf(struct ifnet *ifp, const char * fmt, ...)
 {
 	va_list ap;
 	int retval;
 
 	retval = printf("%s: ", ifp->if_xname);
 	va_start(ap, fmt);
 	retval += vprintf(fmt, ap);
 	va_end(ap);
 	return (retval);
 }
 
 /*
  * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
  * be called without Giant.  However, we often can't acquire the Giant lock
  * at those points; instead, we run it via a task queue that holds Giant via
  * if_start_deferred.
  *
  * XXXRW: We need to make sure that the ifnet isn't fully detached until any
  * outstanding if_start_deferred() tasks that will run after the free.  This
  * probably means waiting in if_detach().
  */
 void
 if_start(struct ifnet *ifp)
 {
 
 	if (ifp->if_flags & IFF_NEEDSGIANT) {
 		if (mtx_owned(&Giant))
 			(*(ifp)->if_start)(ifp);
 		else
 			taskqueue_enqueue(taskqueue_swi_giant,
 			    &ifp->if_starttask);
 	} else
 		(*(ifp)->if_start)(ifp);
 }
 
 static void
 if_start_deferred(void *context, int pending)
 {
 	struct ifnet *ifp;
 
 	GIANT_REQUIRED;
 
 	ifp = context;
 	(ifp->if_start)(ifp);
 }
 
 int
 if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
 {
 	int active = 0;
 
 	IF_LOCK(ifq);
 	if (_IF_QFULL(ifq)) {
 		_IF_DROP(ifq);
 		IF_UNLOCK(ifq);
 		m_freem(m);
 		return (0);
 	}
 	if (ifp != NULL) {
 		ifp->if_obytes += m->m_pkthdr.len + adjust;
 		if (m->m_flags & (M_BCAST|M_MCAST))
 			ifp->if_omcasts++;
 		active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
 	}
 	_IF_ENQUEUE(ifq, m);
 	IF_UNLOCK(ifq);
 	if (ifp != NULL && !active)
 		if_start(ifp);
 	return (1);
 }
 
 void
 if_register_com_alloc(u_char type,
     if_com_alloc_t *a, if_com_free_t *f)
 {
 	
 	KASSERT(if_com_alloc[type] == NULL,
 	    ("if_register_com_alloc: %d already registered", type));
 	KASSERT(if_com_free[type] == NULL,
 	    ("if_register_com_alloc: %d free already registered", type));
 
 	if_com_alloc[type] = a;
 	if_com_free[type] = f;
 }
 
 void
 if_deregister_com_alloc(u_char type)
 {
 	
 	KASSERT(if_com_alloc[type] != NULL,
 	    ("if_deregister_com_alloc: %d not registered", type));
 	KASSERT(if_com_free[type] != NULL,
 	    ("if_deregister_com_alloc: %d free not registered", type));
 	if_com_alloc[type] = NULL;
 	if_com_free[type] = NULL;
 }
Index: head/sys/net/if_atmsubr.c
===================================================================
--- head/sys/net/if_atmsubr.c	(revision 178887)
+++ head/sys/net/if_atmsubr.c	(revision 178888)
@@ -1,514 +1,515 @@
 /*      $NetBSD: if_atmsubr.c,v 1.10 1997/03/11 23:19:51 chuck Exp $       */
 
 /*-
  *
  * Copyright (c) 1996 Charles D. Cranor and Washington University.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Charles D. Cranor and 
  *	Washington University.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * if_atmsubr.c
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 #include "opt_natm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_atm.h>
 
 #include <netinet/in.h>
 #include <netinet/if_atm.h>
 #include <netinet/if_ether.h> /* XXX: for ETHERTYPE_* */
 #if defined(INET) || defined(INET6)
 #include <netinet/in_var.h>
 #endif
 #ifdef NATM
 #include <netnatm/natm.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 /*
  * Netgraph interface functions.
  * These need not be protected by a lock, because ng_atm nodes are persitent.
  * The ng_atm module can be unloaded only if all ATM interfaces have been
  * unloaded, so nobody should be in the code paths accessing these function
  * pointers.
  */
 void	(*ng_atm_attach_p)(struct ifnet *);
 void	(*ng_atm_detach_p)(struct ifnet *);
 int	(*ng_atm_output_p)(struct ifnet *, struct mbuf **);
 void	(*ng_atm_input_p)(struct ifnet *, struct mbuf **,
 	    struct atm_pseudohdr *, void *);
 void	(*ng_atm_input_orphan_p)(struct ifnet *, struct mbuf *,
 	    struct atm_pseudohdr *, void *);
 void	(*ng_atm_event_p)(struct ifnet *, uint32_t, void *);
 
 /*
  * Harp pseudo interface hooks
  */
 void	(*atm_harp_input_p)(struct ifnet *ifp, struct mbuf **m,
 	    struct atm_pseudohdr *ah, void *rxhand);
 void	(*atm_harp_attach_p)(struct ifnet *);
 void	(*atm_harp_detach_p)(struct ifnet *);
 void	(*atm_harp_event_p)(struct ifnet *, uint32_t, void *);
 
 SYSCTL_NODE(_hw, OID_AUTO, atm, CTLFLAG_RW, 0, "ATM hardware");
 
 MALLOC_DEFINE(M_IFATM, "ifatm", "atm interface internals");
 
 #ifndef ETHERTYPE_IPV6
 #define	ETHERTYPE_IPV6	0x86dd
 #endif
 
 #define	senderr(e) do { error = (e); goto bad; } while (0)
 
 /*
  * atm_output: ATM output routine
  *   inputs:
  *     "ifp" = ATM interface to output to
  *     "m0" = the packet to output
  *     "dst" = the sockaddr to send to (either IP addr, or raw VPI/VCI)
  *     "rt0" = the route to use
  *   returns: error code   [0 == ok]
  *
  *   note: special semantic: if (dst == NULL) then we assume "m" already
  *		has an atm_pseudohdr on it and just send it directly.
  *		[for native mode ATM output]   if dst is null, then
  *		rt0 must also be NULL.
  */
 int
 atm_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
     struct rtentry *rt0)
 {
 	u_int16_t etype = 0;			/* if using LLC/SNAP */
 	int error = 0, sz;
 	struct atm_pseudohdr atmdst, *ad;
 	struct mbuf *m = m0;
 	struct atmllc *atmllc;
 	struct atmllc *llc_hdr = NULL;
 	u_int32_t atm_flags;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
 	/*
 	 * check for non-native ATM traffic   (dst != NULL)
 	 */
 	if (dst) {
 		switch (dst->sa_family) {
 
 #if defined(INET) || defined(INET6)
 		case AF_INET:
 		case AF_INET6:
 		{
 			struct rtentry *rt = NULL;
 			/*  
 			 * check route
 			 */
 			if (rt0 != NULL) {
-				error = rt_check(&rt, &rt0, dst);
+				error = rt_check_fib(&rt, &rt0,
+				    dst, rt0->rt_fibnum);
 				if (error)
 					goto bad;
 				RT_UNLOCK(rt);
 			}
 
 			if (dst->sa_family == AF_INET6)
 			        etype = ETHERTYPE_IPV6;
 			else
 			        etype = ETHERTYPE_IP;
 			if (!atmresolve(rt, m, dst, &atmdst)) {
 				m = NULL; 
 				/* XXX: atmresolve already free'd it */
 				senderr(EHOSTUNREACH);
 				/* XXX: put ATMARP stuff here */
 				/* XXX: watch who frees m on failure */
 			}
 		}
 			break;
 #endif /* INET || INET6 */
 
 		case AF_UNSPEC:
 			/*
 			 * XXX: bpfwrite. assuming dst contains 12 bytes
 			 * (atm pseudo header (4) + LLC/SNAP (8))
 			 */
 			bcopy(dst->sa_data, &atmdst, sizeof(atmdst));
 			llc_hdr = (struct atmllc *)(dst->sa_data +
 			    sizeof(atmdst));
 			break;
 			
 		default:
 			printf("%s: can't handle af%d\n", ifp->if_xname, 
 			    dst->sa_family);
 			senderr(EAFNOSUPPORT);
 		}
 
 		/*
 		 * must add atm_pseudohdr to data
 		 */
 		sz = sizeof(atmdst);
 		atm_flags = ATM_PH_FLAGS(&atmdst);
 		if (atm_flags & ATM_PH_LLCSNAP)
 			sz += 8;	/* sizeof snap == 8 */
 		M_PREPEND(m, sz, M_DONTWAIT);
 		if (m == 0)
 			senderr(ENOBUFS);
 		ad = mtod(m, struct atm_pseudohdr *);
 		*ad = atmdst;
 		if (atm_flags & ATM_PH_LLCSNAP) {
 			atmllc = (struct atmllc *)(ad + 1);
 			if (llc_hdr == NULL) {
 			        bcopy(ATMLLC_HDR, atmllc->llchdr, 
 				      sizeof(atmllc->llchdr));
 				/* note: in host order */
 				ATM_LLC_SETTYPE(atmllc, etype); 
 			}
 			else
 			        bcopy(llc_hdr, atmllc, sizeof(struct atmllc));
 		}
 	}
 
 	if (ng_atm_output_p != NULL) {
 		if ((error = (*ng_atm_output_p)(ifp, &m)) != 0) {
 			if (m != NULL)
 				m_freem(m);
 			return (error);
 		}
 		if (m == NULL)
 			return (0);
 	}
 
 	/*
 	 * Queue message on interface, and start output if interface
 	 * not yet active.
 	 */
 	if (!IF_HANDOFF_ADJ(&ifp->if_snd, m, ifp,
 	    -(int)sizeof(struct atm_pseudohdr)))
 		return (ENOBUFS);
 	return (error);
 
 bad:
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Process a received ATM packet;
  * the packet is in the mbuf chain m.
  */
 void
 atm_input(struct ifnet *ifp, struct atm_pseudohdr *ah, struct mbuf *m,
     void *rxhand)
 {
 	int isr;
 	u_int16_t etype = ETHERTYPE_IP;		/* default */
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	ifp->if_ibytes += m->m_pkthdr.len;
 
 	if (ng_atm_input_p != NULL) {
 		(*ng_atm_input_p)(ifp, &m, ah, rxhand);
 		if (m == NULL)
 			return;
 	}
 
 	/* not eaten by ng_atm. Maybe it's a pseudo-harp PDU? */
 	if (atm_harp_input_p != NULL) {
 		(*atm_harp_input_p)(ifp, &m, ah, rxhand);
 		if (m == NULL)
 			return;
 	}
 
 	if (rxhand) {
 #ifdef NATM
 		struct natmpcb *npcb;
 
 		/*
 		 * XXXRW: this use of 'rxhand' is not a very good idea, and
 		 * was subject to races even before SMPng due to the release
 		 * of spl here.
 		 */
 		NATM_LOCK();
 		npcb = rxhand;
 		npcb->npcb_inq++;	/* count # in queue */
 		isr = NETISR_NATM;
 		m->m_pkthdr.rcvif = rxhand; /* XXX: overload */
 		NATM_UNLOCK();
 #else
 		printf("atm_input: NATM detected but not "
 		    "configured in kernel\n");
 		goto dropit;
 #endif
 	} else {
 		/*
 		 * handle LLC/SNAP header, if present
 		 */
 		if (ATM_PH_FLAGS(ah) & ATM_PH_LLCSNAP) {
 			struct atmllc *alc;
 
 			if (m->m_len < sizeof(*alc) &&
 			    (m = m_pullup(m, sizeof(*alc))) == 0)
 				return; /* failed */
 			alc = mtod(m, struct atmllc *);
 			if (bcmp(alc, ATMLLC_HDR, 6)) {
 				printf("%s: recv'd invalid LLC/SNAP frame "
 				    "[vp=%d,vc=%d]\n", ifp->if_xname,
 				    ATM_PH_VPI(ah), ATM_PH_VCI(ah));
 				m_freem(m);
 				return;
 			}
 			etype = ATM_LLC_TYPE(alc);
 			m_adj(m, sizeof(*alc));
 		}
 
 		switch (etype) {
 
 #ifdef INET
 		case ETHERTYPE_IP:
 			isr = NETISR_IP;
 			break;
 #endif
 
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			isr = NETISR_IPV6;
 			break;
 #endif
 		default:
 #ifndef NATM
   dropit:
 #endif
 			if (ng_atm_input_orphan_p != NULL)
 				(*ng_atm_input_orphan_p)(ifp, m, ah, rxhand);
 			else
 				m_freem(m);
 			return;
 		}
 	}
 	netisr_dispatch(isr, m);
 }
 
 /*
  * Perform common duties while attaching to interface list.
  */
 void
 atm_ifattach(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	struct ifatm *ifatm = ifp->if_l2com;
 
 	ifp->if_addrlen = 0;
 	ifp->if_hdrlen = 0;
 	if_attach(ifp);
 	ifp->if_mtu = ATMMTU;
 	ifp->if_output = atm_output;
 #if 0
 	ifp->if_input = atm_input;
 #endif
 	ifp->if_snd.ifq_maxlen = 50;	/* dummy */
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_LINK) {
 			sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 			sdl->sdl_type = IFT_ATM;
 			sdl->sdl_alen = ifp->if_addrlen;
 #ifdef notyet /* if using ATMARP, store hardware address using the next line */
 			bcopy(ifp->hw_addr, LLADDR(sdl), ifp->if_addrlen);
 #endif
 			break;
 		}
 
 	ifp->if_linkmib = &ifatm->mib;
 	ifp->if_linkmiblen = sizeof(ifatm->mib);
 
 	if(ng_atm_attach_p)
 		(*ng_atm_attach_p)(ifp);
 	if (atm_harp_attach_p)
 		(*atm_harp_attach_p)(ifp);
 }
 
 /*
  * Common stuff for detaching an ATM interface
  */
 void
 atm_ifdetach(struct ifnet *ifp)
 {
 	if (atm_harp_detach_p)
 		(*atm_harp_detach_p)(ifp);
 	if(ng_atm_detach_p)
 		(*ng_atm_detach_p)(ifp);
 	if_detach(ifp);
 }
 
 /*
  * Support routine for the SIOCATMGVCCS ioctl().
  *
  * This routine assumes, that the private VCC structures used by the driver
  * begin with a struct atmio_vcc.
  *
  * Return a table of VCCs in a freshly allocated memory area.
  * Here we have a problem: we first count, how many vccs we need
  * to return. The we allocate the memory and finally fill it in.
  * Because we cannot lock while calling malloc, the number of active
  * vccs may change while we're in malloc. So we allocate a couple of
  * vccs more and if space anyway is not enough re-iterate.
  *
  * We could use an sx lock for the vcc tables.
  */
 struct atmio_vcctable *
 atm_getvccs(struct atmio_vcc **table, u_int size, u_int start,
     struct mtx *lock, int waitok)
 {
 	u_int cid, alloc;
 	size_t len;
 	struct atmio_vcctable *vccs;
 	struct atmio_vcc *v;
 
 	alloc = start + 10;
 	vccs = NULL;
 
 	for (;;) {
 		len = sizeof(*vccs) + alloc * sizeof(vccs->vccs[0]);
 		vccs = reallocf(vccs, len, M_TEMP,
 		    waitok ? M_WAITOK : M_NOWAIT);
 		if (vccs == NULL)
 			return (NULL);
 		bzero(vccs, len);
 
 		vccs->count = 0;
 		v = vccs->vccs;
 
 		mtx_lock(lock);
 		for (cid = 0; cid < size; cid++)
 			if (table[cid] != NULL) {
 				if (++vccs->count == alloc)
 					/* too many - try again */
 					break;
 				*v++ = *table[cid];
 			}
 		mtx_unlock(lock);
 
 		if (cid == size)
 			break;
 
 		alloc *= 2;
 	}
 	return (vccs);
 }
 
 /*
  * Driver or channel state has changed. Inform whoever is interested
  * in these events.
  */
 void
 atm_event(struct ifnet *ifp, u_int event, void *arg)
 {
 	if (ng_atm_event_p != NULL)
 		(*ng_atm_event_p)(ifp, event, arg);
 	if (atm_harp_event_p != NULL)
 		(*atm_harp_event_p)(ifp, event, arg);
 }
 
 static void *
 atm_alloc(u_char type, struct ifnet *ifp)
 {
 	struct ifatm	*ifatm;
 
 	ifatm = malloc(sizeof(struct ifatm), M_IFATM, M_WAITOK | M_ZERO);
 	ifatm->ifp = ifp;
 
 	return (ifatm);
 }
 
 static void
 atm_free(void *com, u_char type)
 {
 
 	free(com, M_IFATM);
 }
 
 static int
 atm_modevent(module_t mod, int type, void *data)
 {
 	switch (type) {
 	case MOD_LOAD:
 		if_register_com_alloc(IFT_ATM, atm_alloc, atm_free);
 		break;
 	case MOD_UNLOAD:
 		if_deregister_com_alloc(IFT_ATM);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t atm_mod = {
         "atm",
         atm_modevent,
         0
 };
                 
 DECLARE_MODULE(atm, atm_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(atm, 1);
Index: head/sys/net/if_fwsubr.c
===================================================================
--- head/sys/net/if_fwsubr.c	(revision 178887)
+++ head/sys/net/if_fwsubr.c	(revision 178888)
@@ -1,856 +1,856 @@
 /*-
  * Copyright (c) 2004 Doug Rabson
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/firewire.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals");
 
 struct fw_hwaddr firewire_broadcastaddr = {
 	0xffffffff,
 	0xffffffff,
 	0xff,
 	0xff,
 	0xffff,
 	0xffffffff
 };
 
 static int
 firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
     struct rtentry *rt0)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	int error, type;
 	struct rtentry *rt = NULL;
 	struct m_tag *mtag;
 	union fw_encap *enc;
 	struct fw_hwaddr *destfw;
 	uint8_t speed;
 	uint16_t psize, fsize, dsize;
 	struct mbuf *mtail;
 	int unicast, dgl, foff;
 	static int next_dgl;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		goto bad;
 #endif
 
 	if (!((ifp->if_flags & IFF_UP) &&
 	   (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 		error = ENETDOWN;
 		goto bad;
 	}
 
 	if (rt0 != NULL) {
-		error = rt_check(&rt, &rt0, dst);
+		error = rt_check_fib(&rt, &rt0, dst, rt0->rt_fibnum);
 		if (error)
 			goto bad;
 		RT_UNLOCK(rt);
 	}
 
 	/*
 	 * For unicast, we make a tag to store the lladdr of the
 	 * destination. This might not be the first time we have seen
 	 * the packet (for instance, the arp code might be trying to
 	 * re-send it after receiving an arp reply) so we only
 	 * allocate a tag if there isn't one there already. For
 	 * multicast, we will eventually use a different tag to store
 	 * the channel number.
 	 */
 	unicast = !(m->m_flags & (M_BCAST | M_MCAST));
 	if (unicast) {
 		mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, NULL);
 		if (!mtag) {
 			mtag = m_tag_alloc(MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR,
 			    sizeof (struct fw_hwaddr), M_NOWAIT);
 			if (!mtag) {
 				error = ENOMEM;
 				goto bad;
 			}
 			m_tag_prepend(m, mtag);
 		}
 		destfw = (struct fw_hwaddr *)(mtag + 1);
 	} else {
 		destfw = 0;
 	}
 
 	switch (dst->sa_family) {
 #ifdef AF_INET
 	case AF_INET:
 		/*
 		 * Only bother with arp for unicast. Allocation of
 		 * channels etc. for firewire is quite different and
 		 * doesn't fit into the arp model.
 		 */
 		if (unicast) {
 			error = arpresolve(ifp, rt, m, dst, (u_char *) destfw);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
 		type = ETHERTYPE_IP;
 		break;
 
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_IEEE1394);
 		type = ETHERTYPE_ARP;
 		if (unicast)
 			*destfw = *(struct fw_hwaddr *) ar_tha(ah);
 
 		/*
 		 * The standard arp code leaves a hole for the target
 		 * hardware address which we need to close up.
 		 */
 		bcopy(ar_tpa(ah), ar_tha(ah), ah->ar_pln);
 		m_adj(m, -ah->ar_hln);
 		break;
 	}
 #endif
 
 #ifdef INET6
 	case AF_INET6:
 		if (unicast) {
 			error = nd6_storelladdr(fc->fc_ifp, rt, m, dst,
 			    (u_char *) destfw);
 			if (error)
 				return (error);
 		}
 		type = ETHERTYPE_IPV6;
 		break;
 #endif
 
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		error = EAFNOSUPPORT;
 		goto bad;
 	}
 
 	/*
 	 * Let BPF tap off a copy before we encapsulate.
 	 */
 	if (bpf_peers_present(ifp->if_bpf)) {
 		struct fw_bpfhdr h;
 		if (unicast)
 			bcopy(destfw, h.firewire_dhost, 8);
 		else
 			bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8);
 		bcopy(&fc->fc_hwaddr, h.firewire_shost, 8);
 		h.firewire_type = htons(type);
 		bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m);
 	}
 
 	/*
 	 * Punt on MCAP for now and send all multicast packets on the
 	 * broadcast channel.
 	 */
 	if (m->m_flags & M_MCAST)
 		m->m_flags |= M_BCAST;
 
 	/*
 	 * Figure out what speed to use and what the largest supported
 	 * packet size is. For unicast, this is the minimum of what we
 	 * can speak and what they can hear. For broadcast, lets be
 	 * conservative and use S100. We could possibly improve that
 	 * by examining the bus manager's speed map or similar. We
 	 * also reduce the packet size for broadcast to account for
 	 * the GASP header.
 	 */
 	if (unicast) {
 		speed = min(fc->fc_speed, destfw->sspd);
 		psize = min(512 << speed, 2 << destfw->sender_max_rec);
 	} else {
 		speed = 0;
 		psize = 512 - 2*sizeof(uint32_t);
 	}
 
 	/*
 	 * Next, we encapsulate, possibly fragmenting the original
 	 * datagram if it won't fit into a single packet.
 	 */
 	if (m->m_pkthdr.len <= psize - sizeof(uint32_t)) {
 		/*
 		 * No fragmentation is necessary.
 		 */
 		M_PREPEND(m, sizeof(uint32_t), M_DONTWAIT);
 		if (!m) {
 			error = ENOBUFS;
 			goto bad;
 		}
 		enc = mtod(m, union fw_encap *);
 		enc->unfrag.ether_type = type;
 		enc->unfrag.lf = FW_ENCAP_UNFRAG;
 		enc->unfrag.reserved = 0;
 
 		/*
 		 * Byte swap the encapsulation header manually.
 		 */
 		enc->ul[0] = htonl(enc->ul[0]);
 
 		IFQ_HANDOFF(ifp, m, error);
 		return (error);
 	} else {
 		/*
 		 * Fragment the datagram, making sure to leave enough
 		 * space for the encapsulation header in each packet.
 		 */
 		fsize = psize - 2*sizeof(uint32_t);
 		dgl = next_dgl++;
 		dsize = m->m_pkthdr.len;
 		foff = 0;
 		while (m) {
 			if (m->m_pkthdr.len > fsize) {
 				/*
 				 * Split off the tail segment from the
 				 * datagram, copying our tags over.
 				 */
 				mtail = m_split(m, fsize, M_DONTWAIT);
 				m_tag_copy_chain(mtail, m, M_NOWAIT);
 			} else {
 				mtail = 0;
 			}
 
 			/*
 			 * Add our encapsulation header to this
 			 * fragment and hand it off to the link.
 			 */
 			M_PREPEND(m, 2*sizeof(uint32_t), M_DONTWAIT);
 			if (!m) {
 				error = ENOBUFS;
 				goto bad;
 			}
 			enc = mtod(m, union fw_encap *);
 			if (foff == 0) {
 				enc->firstfrag.lf = FW_ENCAP_FIRST;
 				enc->firstfrag.reserved1 = 0;
 				enc->firstfrag.reserved2 = 0;
 				enc->firstfrag.datagram_size = dsize - 1;
 				enc->firstfrag.ether_type = type;
 				enc->firstfrag.dgl = dgl;
 			} else {
 				if (mtail)
 					enc->nextfrag.lf = FW_ENCAP_NEXT;
 				else
 					enc->nextfrag.lf = FW_ENCAP_LAST;
 				enc->nextfrag.reserved1 = 0;
 				enc->nextfrag.reserved2 = 0;
 				enc->nextfrag.reserved3 = 0;
 				enc->nextfrag.datagram_size = dsize - 1;
 				enc->nextfrag.fragment_offset = foff;
 				enc->nextfrag.dgl = dgl;
 			}
 			foff += m->m_pkthdr.len - 2*sizeof(uint32_t);
 
 			/*
 			 * Byte swap the encapsulation header manually.
 			 */
 			enc->ul[0] = htonl(enc->ul[0]);
 			enc->ul[1] = htonl(enc->ul[1]);
 
 			IFQ_HANDOFF(ifp, m, error);
 			if (error) {
 				if (mtail)
 					m_freem(mtail);
 				return (ENOBUFS);
 			}
 
 			m = mtail;
 		}
 
 		return (0);
 	}
 
 bad:
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 static struct mbuf *
 firewire_input_fragment(struct fw_com *fc, struct mbuf *m, int src)
 {
 	union fw_encap *enc;
 	struct fw_reass *r;
 	struct mbuf *mf, *mprev;
 	int dsize;
 	int fstart, fend, start, end, islast;
 	uint32_t id;
 
 	/*
 	 * Find an existing reassembly buffer or create a new one.
 	 */
 	enc = mtod(m, union fw_encap *);
 	id = enc->firstfrag.dgl | (src << 16);
 	STAILQ_FOREACH(r, &fc->fc_frags, fr_link)
 		if (r->fr_id == id)
 			break;
 	if (!r) {
 		r = malloc(sizeof(struct fw_reass), M_TEMP, M_NOWAIT);
 		if (!r) {
 			m_freem(m);
 			return 0;
 		}
 		r->fr_id = id;
 		r->fr_frags = 0;
 		STAILQ_INSERT_HEAD(&fc->fc_frags, r, fr_link);
 	}
 
 	/*
 	 * If this fragment overlaps any other fragment, we must discard
 	 * the partial reassembly and start again.
 	 */
 	if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 		fstart = 0;
 	else
 		fstart = enc->nextfrag.fragment_offset;
 	fend = fstart + m->m_pkthdr.len - 2*sizeof(uint32_t);
 	dsize = enc->nextfrag.datagram_size;
 	islast = (enc->nextfrag.lf == FW_ENCAP_LAST);
 
 	for (mf = r->fr_frags; mf; mf = mf->m_nextpkt) {
 		enc = mtod(mf, union fw_encap *);
 		if (enc->nextfrag.datagram_size != dsize) {
 			/*
 			 * This fragment must be from a different
 			 * packet.
 			 */
 			goto bad;
 		}
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		end = start + mf->m_pkthdr.len - 2*sizeof(uint32_t);
 		if ((fstart < end && fend > start) ||
 		    (islast && enc->nextfrag.lf == FW_ENCAP_LAST)) {
 			/*
 			 * Overlap - discard reassembly buffer and start
 			 * again with this fragment.
 			 */
 			goto bad;
 		}
 	}
 
 	/*
 	 * Find where to put this fragment in the list.
 	 */
 	for (mf = r->fr_frags, mprev = NULL; mf;
 	    mprev = mf, mf = mf->m_nextpkt) {
 		enc = mtod(mf, union fw_encap *);
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		if (start >= fend)
 			break;
 	}
 
 	/*
 	 * If this is a last fragment and we are not adding at the end
 	 * of the list, discard the buffer.
 	 */
 	if (islast && mprev && mprev->m_nextpkt)
 		goto bad;
 
 	if (mprev) {
 		m->m_nextpkt = mprev->m_nextpkt;
 		mprev->m_nextpkt = m;
 
 		/*
 		 * Coalesce forwards and see if we can make a whole
 		 * datagram.
 		 */
 		enc = mtod(mprev, union fw_encap *);
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		end = start + mprev->m_pkthdr.len - 2*sizeof(uint32_t);
 		while (end == fstart) {
 			/*
 			 * Strip off the encap header from m and
 			 * append it to mprev, freeing m.
 			 */
 			m_adj(m, 2*sizeof(uint32_t));
 			mprev->m_nextpkt = m->m_nextpkt;
 			mprev->m_pkthdr.len += m->m_pkthdr.len;
 			m_cat(mprev, m);
 
 			if (mprev->m_pkthdr.len == dsize + 1 + 2*sizeof(uint32_t)) {
 				/*
 				 * We have assembled a complete packet
 				 * we must be finished. Make sure we have
 				 * merged the whole chain.
 				 */
 				STAILQ_REMOVE(&fc->fc_frags, r, fw_reass, fr_link);
 				free(r, M_TEMP);
 				m = mprev->m_nextpkt;
 				while (m) {
 					mf = m->m_nextpkt;
 					m_freem(m);
 					m = mf;
 				}
 				mprev->m_nextpkt = NULL;
 
 				return (mprev);
 			}
 
 			/*
 			 * See if we can continue merging forwards.
 			 */
 			end = fend;
 			m = mprev->m_nextpkt;
 			if (m) {
 				enc = mtod(m, union fw_encap *);
 				if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 					fstart = 0;
 				else
 					fstart = enc->nextfrag.fragment_offset;
 				fend = fstart + m->m_pkthdr.len
 				    - 2*sizeof(uint32_t);
 			} else {
 				break;
 			}
 		}
 	} else {
 		m->m_nextpkt = 0;
 		r->fr_frags = m;
 	}
 
 	return (0);
 
 bad:
 	while (r->fr_frags) {
 		mf = r->fr_frags;
 		r->fr_frags = mf->m_nextpkt;
 		m_freem(mf);
 	}
 	m->m_nextpkt = 0;
 	r->fr_frags = m;
 
 	return (0);
 }
 
 void
 firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	union fw_encap *enc;
 	int type, isr;
 
 	/*
 	 * The caller has already stripped off the packet header
 	 * (stream or wreqb) and marked the mbuf's M_BCAST flag
 	 * appropriately. We de-encapsulate the IP packet and pass it
 	 * up the line after handling link-level fragmentation.
 	 */
 	if (m->m_pkthdr.len < sizeof(uint32_t)) {
 		if_printf(ifp, "discarding frame without "
 		    "encapsulation header (len %u pkt len %u)\n",
 		    m->m_len, m->m_pkthdr.len);
 	}
 
 	m = m_pullup(m, sizeof(uint32_t));
 	if (m == NULL)
 		return;
 	enc = mtod(m, union fw_encap *);
 
 	/*
 	 * Byte swap the encapsulation header manually.
 	 */
 	enc->ul[0] = ntohl(enc->ul[0]);
 
 	if (enc->unfrag.lf != 0) {
 		m = m_pullup(m, 2*sizeof(uint32_t));
 		if (!m)
 			return;
 		enc = mtod(m, union fw_encap *);
 		enc->ul[1] = ntohl(enc->ul[1]);
 		m = firewire_input_fragment(fc, m, src);
 		if (!m)
 			return;
 		enc = mtod(m, union fw_encap *);
 		type = enc->firstfrag.ether_type;
 		m_adj(m, 2*sizeof(uint32_t));
 	} else {
 		type = enc->unfrag.ether_type;
 		m_adj(m, sizeof(uint32_t));
 	}
 
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		ifp->if_ierrors++;
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if (m->m_pkthdr.rcvif != ifp) {
 		if_printf(ifp, "Warning, frame marked as received on %s\n",
 			m->m_pkthdr.rcvif->if_xname);
 	}
 #endif
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet. The link-level driver
 	 * should have left us a tag with the EUID of the sender.
 	 */
 	if (bpf_peers_present(ifp->if_bpf)) {
 		struct fw_bpfhdr h;
 		struct m_tag *mtag;
 
 		mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_SENDER_EUID, 0);
 		if (mtag)
 			bcopy(mtag + 1, h.firewire_shost, 8);
 		else
 			bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8);
 		bcopy(&fc->fc_hwaddr, h.firewire_dhost, 8);
 		h.firewire_type = htons(type);
 		bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m);
 	}
 
 	if (ifp->if_flags & IFF_MONITOR) {
 		/*
 		 * Interface marked for monitoring; discard packet.
 		 */
 		m_freem(m);
 		return;
 	}
 
 	ifp->if_ibytes += m->m_pkthdr.len;
 
 	/* Discard packet if interface is not up */
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 
 	if (m->m_flags & (M_BCAST|M_MCAST))
 		ifp->if_imcasts++;
 
 	switch (type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		if ((m = ip_fastforward(m)) == NULL)
 			return;
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 
 		/*
 		 * Adjust the arp packet to insert an empty tha slot.
 		 */
 		m->m_len += ah->ar_hln;
 		m->m_pkthdr.len += ah->ar_hln;
 		bcopy(ar_tha(ah), ar_tpa(ah), ah->ar_pln);
 		isr = NETISR_ARP;
 		break;
 	}
 #endif
 
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 
 	default:
 		m_freem(m);
 		return;
 	}
 
 	netisr_dispatch(isr, m);
 }
 
 int
 firewire_ioctl(struct ifnet *ifp, int command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		{
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
 			bcopy(&IFP2FWC(ifp)->fc_hwaddr,
 			    (caddr_t) sa->sa_data, sizeof(struct fw_hwaddr));
 		}
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > 1500) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
     struct sockaddr *sa)
 {
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed.
 		 */
 		*llsa = 0;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 void
 firewire_ifattach(struct ifnet *ifp, struct fw_hwaddr *llc)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	static const char* speeds[] = {
 		"S100", "S200", "S400", "S800",
 		"S1600", "S3200"
 	};
 
 	fc->fc_speed = llc->sspd;
 	STAILQ_INIT(&fc->fc_frags);
 
 	ifp->if_addrlen = sizeof(struct fw_hwaddr);
 	ifp->if_hdrlen = 0;
 	if_attach(ifp);
 	ifp->if_mtu = 1500;	/* XXX */
 	ifp->if_output = firewire_output;
 	ifp->if_resolvemulti = firewire_resolvemulti;
 	ifp->if_broadcastaddr = (u_char *) &firewire_broadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_IEEE1394;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(llc, LLADDR(sdl), ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_APPLE_IP_OVER_IEEE1394,
 	    sizeof(struct fw_hwaddr));
 
 	if_printf(ifp, "Firewire address: %8D @ 0x%04x%08x, %s, maxrec %d\n",
 	    (uint8_t *) &llc->sender_unique_ID_hi, ":",
 	    ntohs(llc->sender_unicast_FIFO_hi),
 	    ntohl(llc->sender_unicast_FIFO_lo),
 	    speeds[llc->sspd],
 	    (2 << llc->sender_max_rec));
 }
 
 void
 firewire_ifdetach(struct ifnet *ifp)
 {
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 void
 firewire_busreset(struct ifnet *ifp)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	struct fw_reass *r;
 	struct mbuf *m;
 
 	/*
 	 * Discard any partial datagrams since the host ids may have changed.
 	 */
 	while ((r = STAILQ_FIRST(&fc->fc_frags))) {
 		STAILQ_REMOVE_HEAD(&fc->fc_frags, fr_link);
 		while (r->fr_frags) {
 			m = r->fr_frags;
 			r->fr_frags = m->m_nextpkt;
 			m_freem(m);
 		}
 		free(r, M_TEMP);
 	}
 }
 
 static void *
 firewire_alloc(u_char type, struct ifnet *ifp)
 {
 	struct fw_com	*fc;
 
 	fc = malloc(sizeof(struct fw_com), M_FWCOM, M_WAITOK | M_ZERO);
 	fc->fc_ifp = ifp;
 
 	return (fc);
 }
 
 static void
 firewire_free(void *com, u_char type)
 {
 
 	free(com, M_FWCOM);
 }
 
 static int
 firewire_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		if_register_com_alloc(IFT_IEEE1394,
 		    firewire_alloc, firewire_free);
 		break;
 	case MOD_UNLOAD:
 		if_deregister_com_alloc(IFT_IEEE1394);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t firewire_mod = {
 	"if_firewire",
 	firewire_modevent,
 	0
 };
 
 DECLARE_MODULE(if_firewire, firewire_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(if_firewire, 1);
Index: head/sys/net/if_gif.c
===================================================================
--- head/sys/net/if_gif.c	(revision 178887)
+++ head/sys/net/if_gif.c	(revision 178888)
@@ -1,978 +1,981 @@
 /*	$FreeBSD$	*/
 /*	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
+#include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/conf.h>
 #include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef	INET
 #include <netinet/in_var.h>
 #include <netinet/in_gif.h>
 #include <netinet/ip_var.h>
 #endif	/* INET */
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_gif.h>
 #include <netinet6/ip6protosw.h>
 #endif /* INET6 */
 
 #include <netinet/ip_encap.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_gif.h>
 
 #include <security/mac/mac_framework.h>
 
 #define GIFNAME		"gif"
 
 /*
  * gif_mtx protects the global gif_softc_list.
  */
 static struct mtx gif_mtx;
 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
 static LIST_HEAD(, gif_softc) gif_softc_list;
 
 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
 void	(*ng_gif_attach_p)(struct ifnet *ifp);
 void	(*ng_gif_detach_p)(struct ifnet *ifp);
 
 static void	gif_start(struct ifnet *);
 static int	gif_clone_create(struct if_clone *, int, caddr_t);
 static void	gif_clone_destroy(struct ifnet *);
 
 IFC_SIMPLE_DECLARE(gif, 0);
 
 static int gifmodevent(module_t, int, void *);
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
     "Generic Tunnel Interface");
 #ifndef MAX_GIF_NEST
 /*
  * This macro controls the default upper limitation on nesting of gif tunnels.
  * Since, setting a large value to this macro with a careless configuration
  * may introduce system crash, we don't allow any nestings by default.
  * If you need to configure nested gif tunnels, you can define this macro
  * in your kernel configuration file.  However, if you do so, please be
  * careful to configure the tunnels so that it won't make a loop.
  */
 #define MAX_GIF_NEST 1
 #endif
 static int max_gif_nesting = MAX_GIF_NEST;
 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
     &max_gif_nesting, 0, "Max nested tunnels");
 
 /*
  * By default, we disallow creation of multiple tunnels between the same
  * pair of addresses.  Some applications require this functionality so
  * we allow control over this check here.
  */
 #ifdef XBONEHACK
 static int parallel_tunnels = 1;
 #else
 static int parallel_tunnels = 0;
 #endif
 SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
     &parallel_tunnels, 0, "Allow parallel tunnels?");
 
 /* copy from src/sys/net/if_ethersubr.c */
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 #ifndef ETHER_IS_BROADCAST
 #define ETHER_IS_BROADCAST(addr) \
 	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
 #endif
 
 static int
 gif_clone_create(ifc, unit, params)
 	struct if_clone *ifc;
 	int unit;
 	caddr_t params;
 {
 	struct gif_softc *sc;
 
 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
+	sc->gif_fibnum = curthread->td_proc->p_fibnum;
 	GIF2IFP(sc) = if_alloc(IFT_GIF);
 	if (GIF2IFP(sc) == NULL) {
 		free(sc, M_GIF);
 		return (ENOSPC);
 	}
 
 	GIF_LOCK_INIT(sc);
 
 	GIF2IFP(sc)->if_softc = sc;
 	if_initname(GIF2IFP(sc), ifc->ifc_name, unit);
 
 	sc->encap_cookie4 = sc->encap_cookie6 = NULL;
 
 	GIF2IFP(sc)->if_addrlen = 0;
 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
 #if 0
 	/* turn off ingress filter */
 	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
 #endif
 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
 	GIF2IFP(sc)->if_start  = gif_start;
 	GIF2IFP(sc)->if_output = gif_output;
 	GIF2IFP(sc)->if_snd.ifq_maxlen = IFQ_MAXLEN;
 	if_attach(GIF2IFP(sc));
 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	if (ng_gif_attach_p != NULL)
 		(*ng_gif_attach_p)(GIF2IFP(sc));
 
 	mtx_lock(&gif_mtx);
 	LIST_INSERT_HEAD(&gif_softc_list, sc, gif_list);
 	mtx_unlock(&gif_mtx);
 
 	return (0);
 }
 
 static void
 gif_clone_destroy(ifp)
 	struct ifnet *ifp;
 {
 	int err;
 	struct gif_softc *sc = ifp->if_softc;
 
 	mtx_lock(&gif_mtx);
 	LIST_REMOVE(sc, gif_list);
 	mtx_unlock(&gif_mtx);
 
 	gif_delete_tunnel(ifp);
 #ifdef INET6
 	if (sc->encap_cookie6 != NULL) {
 		err = encap_detach(sc->encap_cookie6);
 		KASSERT(err == 0, ("Unexpected error detaching encap_cookie6"));
 	}
 #endif
 #ifdef INET
 	if (sc->encap_cookie4 != NULL) {
 		err = encap_detach(sc->encap_cookie4);
 		KASSERT(err == 0, ("Unexpected error detaching encap_cookie4"));
 	}
 #endif
 
 	if (ng_gif_detach_p != NULL)
 		(*ng_gif_detach_p)(ifp);
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 
 	GIF_LOCK_DESTROY(sc);
 
 	free(sc, M_GIF);
 }
 
 static int
 gifmodevent(mod, type, data)
 	module_t mod;
 	int type;
 	void *data;
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		mtx_init(&gif_mtx, "gif_mtx", NULL, MTX_DEF);
 		LIST_INIT(&gif_softc_list);
 		if_clone_attach(&gif_cloner);
 
 #ifdef INET6
 		ip6_gif_hlim = GIF_HLIM;
 #endif
 
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&gif_cloner);
 		mtx_destroy(&gif_mtx);
 #ifdef INET6
 		ip6_gif_hlim = 0;
 #endif
 		break;
 	default:
 		return EOPNOTSUPP;
 	}
 	return 0;
 }
 
 static moduledata_t gif_mod = {
 	"if_gif",
 	gifmodevent,
 	0
 };
 
 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gif, 1);
 
 int
 gif_encapcheck(m, off, proto, arg)
 	const struct mbuf *m;
 	int off;
 	int proto;
 	void *arg;
 {
 	struct ip ip;
 	struct gif_softc *sc;
 
 	sc = (struct gif_softc *)arg;
 	if (sc == NULL)
 		return 0;
 
 	if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
 		return 0;
 
 	/* no physical address */
 	if (!sc->gif_psrc || !sc->gif_pdst)
 		return 0;
 
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
 		break;
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
 		break;
 #endif
 	case IPPROTO_ETHERIP:
 		break;
 
 	default:
 		return 0;
 	}
 
 	/* Bail on short packets */
 	if (m->m_pkthdr.len < sizeof(ip))
 		return 0;
 
 	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
 
 	switch (ip.ip_v) {
 #ifdef INET
 	case 4:
 		if (sc->gif_psrc->sa_family != AF_INET ||
 		    sc->gif_pdst->sa_family != AF_INET)
 			return 0;
 		return gif_encapcheck4(m, off, proto, arg);
 #endif
 #ifdef INET6
 	case 6:
 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
 			return 0;
 		if (sc->gif_psrc->sa_family != AF_INET6 ||
 		    sc->gif_pdst->sa_family != AF_INET6)
 			return 0;
 		return gif_encapcheck6(m, off, proto, arg);
 #endif
 	default:
 		return 0;
 	}
 }
 
 static void
 gif_start(struct ifnet *ifp)
 {
 	struct gif_softc *sc;
 	struct mbuf *m;
 
 	sc = ifp->if_softc;
 
 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	for (;;) {
 		IFQ_DEQUEUE(&ifp->if_snd, m);
 		if (m == 0)
 			break;
 
 		gif_output(ifp, m, sc->gif_pdst, NULL);
 
 	}
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	return;
 }
 
 int
 gif_output(ifp, m, dst, rt)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct sockaddr *dst;
 	struct rtentry *rt;	/* added in net2 */
 {
 	struct gif_softc *sc = ifp->if_softc;
 	struct m_tag *mtag;
 	int error = 0;
 	int gif_called;
 	u_int32_t af;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		goto end;
 	}
 #endif
 
 	/*
 	 * gif may cause infinite recursion calls when misconfigured.
 	 * We'll prevent this by detecting loops.
 	 *
 	 * High nesting level may cause stack exhaustion.
 	 * We'll prevent this by introducing upper limit.
 	 */
 	gif_called = 1;
 	mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
 	while (mtag != NULL) {
 		if (*(struct ifnet **)(mtag + 1) == ifp) {
 			log(LOG_NOTICE,
 			    "gif_output: loop detected on %s\n",
 			    (*(struct ifnet **)(mtag + 1))->if_xname);
 			m_freem(m);
 			error = EIO;	/* is there better errno? */
 			goto end;
 		}
 		mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
 		gif_called++;
 	}
 	if (gif_called > max_gif_nesting) {
 		log(LOG_NOTICE,
 		    "gif_output: recursively called too many times(%d)\n",
 		    gif_called);
 		m_freem(m);
 		error = EIO;	/* is there better errno? */
 		goto end;
 	}
 	mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
 	    M_NOWAIT);
 	if (mtag == NULL) {
 		m_freem(m);
 		error = ENOMEM;
 		goto end;
 	}
 	*(struct ifnet **)(mtag + 1) = ifp;
 	m_tag_prepend(m, mtag);
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
 	GIF_LOCK(sc);
 
 	if (!(ifp->if_flags & IFF_UP) ||
 	    sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
 		GIF_UNLOCK(sc);
 		m_freem(m);
 		error = ENETDOWN;
 		goto end;
 	}
 
 	/* BPF writes need to be handled specially. */
 	if (dst->sa_family == AF_UNSPEC) {
 		bcopy(dst->sa_data, &af, sizeof(af));
 		dst->sa_family = af;
 	}
 
 	af = dst->sa_family;
 	BPF_MTAP2(ifp, &af, sizeof(af), m);
 	ifp->if_opackets++;	
 	ifp->if_obytes += m->m_pkthdr.len;
 
 	/* override to IPPROTO_ETHERIP for bridged traffic */
 	if (ifp->if_bridge)
 		af = AF_LINK;
 
+	M_SETFIB(m, sc->gif_fibnum);
 	/* inner AF-specific encapsulation */
 
 	/* XXX should we check if our outer source is legal? */
 
 	/* dispatch to output logic based on outer AF */
 	switch (sc->gif_psrc->sa_family) {
 #ifdef INET
 	case AF_INET:
 		error = in_gif_output(ifp, af, m);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		error = in6_gif_output(ifp, af, m);
 		break;
 #endif
 	default:
 		m_freem(m);		
 		error = ENETDOWN;
 	}
 
 	GIF_UNLOCK(sc);
   end:
 	if (error)
 		ifp->if_oerrors++;
 	return (error);
 }
 
 void
 gif_input(m, af, ifp)
 	struct mbuf *m;
 	int af;
 	struct ifnet *ifp;
 {
 	int isr, n;
 	struct etherip_header *eip;
 	struct ether_header *eh;
 	struct ifnet *oldifp;
 
 	if (ifp == NULL) {
 		/* just in case */
 		m_freem(m);
 		return;
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		u_int32_t af1 = af;
 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
 	}
 
 	if (ng_gif_input_p != NULL) {
 		(*ng_gif_input_p)(ifp, &m, af);
 		if (m == NULL)
 			return;
 	}
 
 	/*
 	 * Put the packet to the network layer input queue according to the
 	 * specified address family.
 	 * Note: older versions of gif_input directly called network layer
 	 * input functions, e.g. ip6_input, here.  We changed the policy to
 	 * prevent too many recursive calls of such input functions, which
 	 * might cause kernel panic.  But the change may introduce another
 	 * problem; if the input queue is full, packets are discarded.
 	 * The kernel stack overflow really happened, and we believed
 	 * queue-full rarely occurs, so we changed the policy.
 	 */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	case AF_LINK:
 		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
 		if (n > m->m_len) {
 			m = m_pullup(m, n);
 			if (m == NULL) {
 				ifp->if_ierrors++;
 				return;
 			}
 		}
 
 		eip = mtod(m, struct etherip_header *);
  		if (eip->eip_ver !=
 		    (ETHERIP_VERSION & ETHERIP_VER_VERS_MASK)) {
 			/* discard unknown versions */
 			m_freem(m);
 			return;
 		}
 		m_adj(m, sizeof(struct etherip_header));
 
 		m->m_flags &= ~(M_BCAST|M_MCAST);
 		m->m_pkthdr.rcvif = ifp;
 
 		if (ifp->if_bridge) {
 			oldifp = ifp;
 			eh = mtod(m, struct ether_header *);
 			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 				if (ETHER_IS_BROADCAST(eh->ether_dhost))
 					m->m_flags |= M_BCAST;
 				else
 					m->m_flags |= M_MCAST;
 				ifp->if_imcasts++;
 			}
 			BRIDGE_INPUT(ifp, m);
 
 			if (m != NULL && ifp != oldifp) {
 				/*
 				 * The bridge gave us back itself or one of the
 				 * members for which the frame is addressed.
 				 */
 				ether_demux(ifp, m);
 				return;
 			}
 		}
 		if (m != NULL)
 			m_freem(m);
 		return;
 
 	default:
 		if (ng_gif_input_orphan_p != NULL)
 			(*ng_gif_input_orphan_p)(ifp, m, af);
 		else
 			m_freem(m);
 		return;
 	}
 
 	ifp->if_ipackets++;
 	ifp->if_ibytes += m->m_pkthdr.len;
 	netisr_dispatch(isr, m);
 }
 
 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
 int
 gif_ioctl(ifp, cmd, data)
 	struct ifnet *ifp;
 	u_long cmd;
 	caddr_t data;
 {
 	struct gif_softc *sc  = ifp->if_softc;
 	struct ifreq     *ifr = (struct ifreq*)data;
 	int error = 0, size;
 	struct sockaddr *dst, *src;
 #ifdef	SIOCSIFMTU /* xxx */
 	u_long mtu;
 #endif
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		break;
 		
 	case SIOCSIFDSTADDR:
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 
 #ifdef	SIOCSIFMTU /* xxx */
 	case SIOCGIFMTU:
 		break;
 
 	case SIOCSIFMTU:
 		mtu = ifr->ifr_mtu;
 		if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX)
 			return (EINVAL);
 		ifp->if_mtu = mtu;
 		break;
 #endif /* SIOCSIFMTU */
 
 #ifdef INET
 	case SIOCSIFPHYADDR:
 #endif
 #ifdef INET6
 	case SIOCSIFPHYADDR_IN6:
 #endif /* INET6 */
 	case SIOCSLIFPHYADDR:
 		switch (cmd) {
 #ifdef INET
 		case SIOCSIFPHYADDR:
 			src = (struct sockaddr *)
 				&(((struct in_aliasreq *)data)->ifra_addr);
 			dst = (struct sockaddr *)
 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
 #ifdef INET6
 		case SIOCSIFPHYADDR_IN6:
 			src = (struct sockaddr *)
 				&(((struct in6_aliasreq *)data)->ifra_addr);
 			dst = (struct sockaddr *)
 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
 			break;
 #endif
 		case SIOCSLIFPHYADDR:
 			src = (struct sockaddr *)
 				&(((struct if_laddrreq *)data)->addr);
 			dst = (struct sockaddr *)
 				&(((struct if_laddrreq *)data)->dstaddr);
 			break;
 		default:
 			return EINVAL;
 		}
 
 		/* sa_family must be equal */
 		if (src->sa_family != dst->sa_family)
 			return EINVAL;
 
 		/* validate sa_len */
 		switch (src->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (src->sa_len != sizeof(struct sockaddr_in))
 				return EINVAL;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (src->sa_len != sizeof(struct sockaddr_in6))
 				return EINVAL;
 			break;
 #endif
 		default:
 			return EAFNOSUPPORT;
 		}
 		switch (dst->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (dst->sa_len != sizeof(struct sockaddr_in))
 				return EINVAL;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (dst->sa_len != sizeof(struct sockaddr_in6))
 				return EINVAL;
 			break;
 #endif
 		default:
 			return EAFNOSUPPORT;
 		}
 
 		/* check sa_family looks sane for the cmd */
 		switch (cmd) {
 		case SIOCSIFPHYADDR:
 			if (src->sa_family == AF_INET)
 				break;
 			return EAFNOSUPPORT;
 #ifdef INET6
 		case SIOCSIFPHYADDR_IN6:
 			if (src->sa_family == AF_INET6)
 				break;
 			return EAFNOSUPPORT;
 #endif /* INET6 */
 		case SIOCSLIFPHYADDR:
 			/* checks done in the above */
 			break;
 		}
 
 		error = gif_set_tunnel(GIF2IFP(sc), src, dst);
 		break;
 
 #ifdef SIOCDIFPHYADDR
 	case SIOCDIFPHYADDR:
 		gif_delete_tunnel(GIF2IFP(sc));
 		break;
 #endif
 			
 	case SIOCGIFPSRCADDR:
 #ifdef INET6
 	case SIOCGIFPSRCADDR_IN6:
 #endif /* INET6 */
 		if (sc->gif_psrc == NULL) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		src = sc->gif_psrc;
 		switch (cmd) {
 #ifdef INET
 		case SIOCGIFPSRCADDR:
 			dst = &ifr->ifr_addr;
 			size = sizeof(ifr->ifr_addr);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case SIOCGIFPSRCADDR_IN6:
 			dst = (struct sockaddr *)
 				&(((struct in6_ifreq *)data)->ifr_addr);
 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
 			break;
 #endif /* INET6 */
 		default:
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if (src->sa_len > size)
 			return EINVAL;
 		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
 #ifdef INET6
 		if (dst->sa_family == AF_INET6) {
 			error = sa6_recoverscope((struct sockaddr_in6 *)dst);
 			if (error != 0)
 				return (error);
 		}
 #endif
 		break;
 			
 	case SIOCGIFPDSTADDR:
 #ifdef INET6
 	case SIOCGIFPDSTADDR_IN6:
 #endif /* INET6 */
 		if (sc->gif_pdst == NULL) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		src = sc->gif_pdst;
 		switch (cmd) {
 #ifdef INET
 		case SIOCGIFPDSTADDR:
 			dst = &ifr->ifr_addr;
 			size = sizeof(ifr->ifr_addr);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case SIOCGIFPDSTADDR_IN6:
 			dst = (struct sockaddr *)
 				&(((struct in6_ifreq *)data)->ifr_addr);
 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
 			break;
 #endif /* INET6 */
 		default:
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if (src->sa_len > size)
 			return EINVAL;
 		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
 #ifdef INET6
 		if (dst->sa_family == AF_INET6) {
 			error = sa6_recoverscope((struct sockaddr_in6 *)dst);
 			if (error != 0)
 				return (error);
 		}
 #endif
 		break;
 
 	case SIOCGLIFPHYADDR:
 		if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 
 		/* copy src */
 		src = sc->gif_psrc;
 		dst = (struct sockaddr *)
 			&(((struct if_laddrreq *)data)->addr);
 		size = sizeof(((struct if_laddrreq *)data)->addr);
 		if (src->sa_len > size)
 			return EINVAL;
 		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
 
 		/* copy dst */
 		src = sc->gif_pdst;
 		dst = (struct sockaddr *)
 			&(((struct if_laddrreq *)data)->dstaddr);
 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
 		if (src->sa_len > size)
 			return EINVAL;
 		bcopy((caddr_t)src, (caddr_t)dst, src->sa_len);
 		break;
 
 	case SIOCSIFFLAGS:
 		/* if_ioctl() takes care of it */
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
  bad:
 	return error;
 }
 
 /*
  * XXXRW: There's a general event-ordering issue here: the code to check
  * if a given tunnel is already present happens before we perform a
  * potentially blocking setup of the tunnel.  This code needs to be
  * re-ordered so that the check and replacement can be atomic using
  * a mutex.
  */
 int
 gif_set_tunnel(ifp, src, dst)
 	struct ifnet *ifp;
 	struct sockaddr *src;
 	struct sockaddr *dst;
 {
 	struct gif_softc *sc = ifp->if_softc;
 	struct gif_softc *sc2;
 	struct sockaddr *osrc, *odst, *sa;
 	int error = 0; 
 
 	mtx_lock(&gif_mtx);
 	LIST_FOREACH(sc2, &gif_softc_list, gif_list) {
 		if (sc2 == sc)
 			continue;
 		if (!sc2->gif_pdst || !sc2->gif_psrc)
 			continue;
 		if (sc2->gif_pdst->sa_family != dst->sa_family ||
 		    sc2->gif_pdst->sa_len != dst->sa_len ||
 		    sc2->gif_psrc->sa_family != src->sa_family ||
 		    sc2->gif_psrc->sa_len != src->sa_len)
 			continue;
 
 		/*
 		 * Disallow parallel tunnels unless instructed
 		 * otherwise.
 		 */
 		if (!parallel_tunnels &&
 		    bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 &&
 		    bcmp(sc2->gif_psrc, src, src->sa_len) == 0) {
 			error = EADDRNOTAVAIL;
 			mtx_unlock(&gif_mtx);
 			goto bad;
 		}
 
 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
 	}
 	mtx_unlock(&gif_mtx);
 
 	/* XXX we can detach from both, but be polite just in case */
 	if (sc->gif_psrc)
 		switch (sc->gif_psrc->sa_family) {
 #ifdef INET
 		case AF_INET:
 			(void)in_gif_detach(sc);
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			(void)in6_gif_detach(sc);
 			break;
 #endif
 		}
 
 	osrc = sc->gif_psrc;
 	sa = (struct sockaddr *)malloc(src->sa_len, M_IFADDR, M_WAITOK);
 	bcopy((caddr_t)src, (caddr_t)sa, src->sa_len);
 	sc->gif_psrc = sa;
 
 	odst = sc->gif_pdst;
 	sa = (struct sockaddr *)malloc(dst->sa_len, M_IFADDR, M_WAITOK);
 	bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len);
 	sc->gif_pdst = sa;
 
 	switch (sc->gif_psrc->sa_family) {
 #ifdef INET
 	case AF_INET:
 		error = in_gif_attach(sc);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		/*
 		 * Check validity of the scope zone ID of the addresses, and
 		 * convert it into the kernel internal form if necessary.
 		 */
 		error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_psrc, 0);
 		if (error != 0)
 			break;
 		error = sa6_embedscope((struct sockaddr_in6 *)sc->gif_pdst, 0);
 		if (error != 0)
 			break;
 		error = in6_gif_attach(sc);
 		break;
 #endif
 	}
 	if (error) {
 		/* rollback */
 		free((caddr_t)sc->gif_psrc, M_IFADDR);
 		free((caddr_t)sc->gif_pdst, M_IFADDR);
 		sc->gif_psrc = osrc;
 		sc->gif_pdst = odst;
 		goto bad;
 	}
 
 	if (osrc)
 		free((caddr_t)osrc, M_IFADDR);
 	if (odst)
 		free((caddr_t)odst, M_IFADDR);
 
  bad:
 	if (sc->gif_psrc && sc->gif_pdst)
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	else
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	return error;
 }
 
 void
 gif_delete_tunnel(ifp)
 	struct ifnet *ifp;
 {
 	struct gif_softc *sc = ifp->if_softc;
 
 	if (sc->gif_psrc) {
 		free((caddr_t)sc->gif_psrc, M_IFADDR);
 		sc->gif_psrc = NULL;
 	}
 	if (sc->gif_pdst) {
 		free((caddr_t)sc->gif_pdst, M_IFADDR);
 		sc->gif_pdst = NULL;
 	}
 	/* it is safe to detach from both */
 #ifdef INET
 	(void)in_gif_detach(sc);
 #endif
 #ifdef INET6
 	(void)in6_gif_detach(sc);
 #endif
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 }
Index: head/sys/net/if_gif.h
===================================================================
--- head/sys/net/if_gif.h	(revision 178887)
+++ head/sys/net/if_gif.h	(revision 178888)
@@ -1,114 +1,115 @@
 /*	$FreeBSD$	*/
 /*	$KAME: if_gif.h,v 1.17 2000/09/11 11:36:41 sumikawa Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * if_gif.h
  */
 
 #ifndef _NET_IF_GIF_H_
 #define _NET_IF_GIF_H_
 
 
 #ifdef _KERNEL
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <netinet/in.h>
 /* xxx sigh, why route have struct route instead of pointer? */
 
 struct encaptab;
 
 extern	void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
 		int af);
 extern	void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m,
 		int af);
 extern	int  (*ng_gif_output_p)(struct ifnet *ifp, struct mbuf **mp);
 extern	void (*ng_gif_attach_p)(struct ifnet *ifp);
 extern	void (*ng_gif_detach_p)(struct ifnet *ifp);
 
 struct gif_softc {
 	struct ifnet	*gif_ifp;
 	struct mtx	gif_mtx;
 	struct sockaddr	*gif_psrc; /* Physical src addr */
 	struct sockaddr	*gif_pdst; /* Physical dst addr */
 	union {
 		struct route  gifscr_ro;    /* xxx */
 #ifdef INET6
 		struct route_in6 gifscr_ro6; /* xxx */
 #endif
 	} gifsc_gifscr;
 	int		gif_flags;
+	u_int		gif_fibnum;
 	const struct encaptab *encap_cookie4;
 	const struct encaptab *encap_cookie6;
 	void		*gif_netgraph;	/* ng_gif(4) netgraph node info */
 	LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
 };
 #define	GIF2IFP(sc)	((sc)->gif_ifp)
 #define	GIF_LOCK_INIT(sc)	mtx_init(&(sc)->gif_mtx, "gif softc",	\
 				     NULL, MTX_DEF)
 #define	GIF_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->gif_mtx)
 #define	GIF_LOCK(sc)		mtx_lock(&(sc)->gif_mtx)
 #define	GIF_UNLOCK(sc)		mtx_unlock(&(sc)->gif_mtx)
 #define	GIF_LOCK_ASSERT(sc)	mtx_assert(&(sc)->gif_mtx, MA_OWNED)
 
 #define gif_ro gifsc_gifscr.gifscr_ro
 #ifdef INET6
 #define gif_ro6 gifsc_gifscr.gifscr_ro6
 #endif
 
 #define GIF_MTU		(1280)	/* Default MTU */
 #define	GIF_MTU_MIN	(1280)	/* Minimum MTU */
 #define	GIF_MTU_MAX	(8192)	/* Maximum MTU */
 
 #define	MTAG_GIF	1080679712
 #define	MTAG_GIF_CALLED	0
 
 struct etherip_header {
 	u_int8_t eip_ver;	/* version/reserved */
 	u_int8_t eip_pad;	/* required padding byte */
 };
 #define ETHERIP_VER_VERS_MASK   0x0f
 #define ETHERIP_VER_RSVD_MASK   0xf0
 #define ETHERIP_VERSION         0x03
 
 /* Prototypes */
 void gif_input(struct mbuf *, int, struct ifnet *);
 int gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 	       struct rtentry *);
 int gif_ioctl(struct ifnet *, u_long, caddr_t);
 int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *);
 void gif_delete_tunnel(struct ifnet *);
 int gif_encapcheck(const struct mbuf *, int, int, void *);
 
 #endif /* _KERNEL */
 
 #endif /* _NET_IF_GIF_H_ */
Index: head/sys/net/if_gre.c
===================================================================
--- head/sys/net/if_gre.c	(revision 178887)
+++ head/sys/net/if_gre.c	(revision 178888)
@@ -1,854 +1,859 @@
 /*	$NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */
 /*	 $FreeBSD$ */
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Heiko W.Rupp <hwr@pilhuhn.de>
  *
  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *        This product includes software developed by the NetBSD
  *        Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Encapsulate L3 protocols into IP
  * See RFC 2784 (successor of RFC 1701 and 1702) for more details.
  * If_gre is compatible with Cisco GRE tunnels, so you can
  * have a NetBSD box as the other end of a tunnel interface of a Cisco
  * router. See gre(4) for more details.
  * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
  */
 
 #include "opt_atalk.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
+#include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_gre.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_encap.h>
 #else
 #error "Huh? if_gre without inet?"
 #endif
 
 #include <net/bpf.h>
 
 #include <net/if_gre.h>
 
 /*
  * It is not easy to calculate the right value for a GRE MTU.
  * We leave this task to the admin and use the same default that
  * other vendors use.
  */
 #define GREMTU	1476
 
 #define GRENAME	"gre"
 
 /*
  * gre_mtx protects all global variables in if_gre.c.
  * XXX: gre_softc data not protected yet.
  */
 struct mtx gre_mtx;
 static MALLOC_DEFINE(M_GRE, GRENAME, "Generic Routing Encapsulation");
 
 struct gre_softc_head gre_softc_list;
 
 static int	gre_clone_create(struct if_clone *, int, caddr_t);
 static void	gre_clone_destroy(struct ifnet *);
 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *rt);
 
 IFC_SIMPLE_DECLARE(gre, 0);
 
 static int gre_compute_route(struct gre_softc *sc);
 
 static void	greattach(void);
 
 #ifdef INET
 extern struct domain inetdomain;
 static const struct protosw in_gre_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_GRE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		gre_input,
 	.pr_output =		(pr_output_t *)rip_output,
 	.pr_ctlinput =		rip_ctlinput,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 static const struct protosw in_mobile_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_MOBILE,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		gre_mobile_input,
 	.pr_output =		(pr_output_t *)rip_output,
 	.pr_ctlinput =		rip_ctlinput,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 #endif
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
     "Generic Routing Encapsulation");
 #ifndef MAX_GRE_NEST
 /*
  * This macro controls the default upper limitation on nesting of gre tunnels.
  * Since, setting a large value to this macro with a careless configuration
  * may introduce system crash, we don't allow any nestings by default.
  * If you need to configure nested gre tunnels, you can define this macro
  * in your kernel configuration file.  However, if you do so, please be
  * careful to configure the tunnels so that it won't make a loop.
  */
 #define MAX_GRE_NEST 1
 #endif
 static int max_gre_nesting = MAX_GRE_NEST;
 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW,
     &max_gre_nesting, 0, "Max nested tunnels");
 
 /* ARGSUSED */
 static void
 greattach(void)
 {
 
 	mtx_init(&gre_mtx, "gre_mtx", NULL, MTX_DEF);
 	LIST_INIT(&gre_softc_list);
 	if_clone_attach(&gre_cloner);
 }
 
 static int
 gre_clone_create(ifc, unit, params)
 	struct if_clone *ifc;
 	int unit;
 	caddr_t params;
 {
 	struct gre_softc *sc;
 
 	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
 
 	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
 	if (GRE2IFP(sc) == NULL) {
 		free(sc, M_GRE);
 		return (ENOSPC);
 	}
 
 	GRE2IFP(sc)->if_softc = sc;
 	if_initname(GRE2IFP(sc), ifc->ifc_name, unit);
 
 	GRE2IFP(sc)->if_snd.ifq_maxlen = IFQ_MAXLEN;
 	GRE2IFP(sc)->if_addrlen = 0;
 	GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */
 	GRE2IFP(sc)->if_mtu = GREMTU;
 	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
 	GRE2IFP(sc)->if_output = gre_output;
 	GRE2IFP(sc)->if_ioctl = gre_ioctl;
 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
 	sc->g_proto = IPPROTO_GRE;
 	GRE2IFP(sc)->if_flags |= IFF_LINK0;
 	sc->encap = NULL;
 	sc->called = 0;
+	sc->gre_fibnum = curthread->td_proc->p_fibnum;
 	sc->wccp_ver = WCCP_V1;
 	if_attach(GRE2IFP(sc));
 	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
 	mtx_lock(&gre_mtx);
 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
 	mtx_unlock(&gre_mtx);
 	return (0);
 }
 
 static void
 gre_clone_destroy(ifp)
 	struct ifnet *ifp;
 {
 	struct gre_softc *sc = ifp->if_softc;
 
 	mtx_lock(&gre_mtx);
 	LIST_REMOVE(sc, sc_list);
 	mtx_unlock(&gre_mtx);
 
 #ifdef INET
 	if (sc->encap != NULL)
 		encap_detach(sc->encap);
 #endif
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 	free(sc, M_GRE);
 }
 
 /*
  * The output routine. Takes a packet and encapsulates it in the protocol
  * given by sc->g_proto. See also RFC 1701 and RFC 2004
  */
 static int
 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 	   struct rtentry *rt)
 {
 	int error = 0;
 	struct gre_softc *sc = ifp->if_softc;
 	struct greip *gh;
 	struct ip *ip;
 	u_short ip_id = 0;
 	uint8_t ip_tos = 0;
 	u_int16_t etype = 0;
 	struct mobile_h mob_h;
 	u_int32_t af;
 
 	/*
 	 * gre may cause infinite recursion calls when misconfigured.
 	 * We'll prevent this by introducing upper limit.
 	 */
 	if (++(sc->called) > max_gre_nesting) {
 		printf("%s: gre_output: recursively called too many "
 		       "times(%d)\n", if_name(GRE2IFP(sc)), sc->called);
 		m_freem(m);
 		error = EIO;    /* is there better errno? */
 		goto end;
 	}
 
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) ||
 	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
 		m_freem(m);
 		error = ENETDOWN;
 		goto end;
 	}
 
 	gh = NULL;
 	ip = NULL;
 
 	/* BPF writes need to be handled specially. */
 	if (dst->sa_family == AF_UNSPEC) {
 		bcopy(dst->sa_data, &af, sizeof(af));
 		dst->sa_family = af;
 	}
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		af = dst->sa_family;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
 	if (sc->g_proto == IPPROTO_MOBILE) {
 		if (dst->sa_family == AF_INET) {
 			struct mbuf *m0;
 			int msiz;
 
 			ip = mtod(m, struct ip *);
 
 			/*
 			 * RFC2004 specifies that fragmented diagrams shouldn't
 			 * be encapsulated.
 			 */
 			if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
 				_IF_DROP(&ifp->if_snd);
 				m_freem(m);
 				error = EINVAL;    /* is there better errno? */
 				goto end;
 			}
 			memset(&mob_h, 0, MOB_H_SIZ_L);
 			mob_h.proto = (ip->ip_p) << 8;
 			mob_h.odst = ip->ip_dst.s_addr;
 			ip->ip_dst.s_addr = sc->g_dst.s_addr;
 
 			/*
 			 * If the packet comes from our host, we only change
 			 * the destination address in the IP header.
 			 * Else we also need to save and change the source
 			 */
 			if (in_hosteq(ip->ip_src, sc->g_src)) {
 				msiz = MOB_H_SIZ_S;
 			} else {
 				mob_h.proto |= MOB_H_SBIT;
 				mob_h.osrc = ip->ip_src.s_addr;
 				ip->ip_src.s_addr = sc->g_src.s_addr;
 				msiz = MOB_H_SIZ_L;
 			}
 			mob_h.proto = htons(mob_h.proto);
 			mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
 
 			if ((m->m_data - msiz) < m->m_pktdat) {
 				/* need new mbuf */
 				MGETHDR(m0, M_DONTWAIT, MT_DATA);
 				if (m0 == NULL) {
 					_IF_DROP(&ifp->if_snd);
 					m_freem(m);
 					error = ENOBUFS;
 					goto end;
 				}
 				m0->m_next = m;
 				m->m_data += sizeof(struct ip);
 				m->m_len -= sizeof(struct ip);
 				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
 				m0->m_len = msiz + sizeof(struct ip);
 				m0->m_data += max_linkhdr;
 				memcpy(mtod(m0, caddr_t), (caddr_t)ip,
 				       sizeof(struct ip));
 				m = m0;
 			} else {  /* we have some space left in the old one */
 				m->m_data -= msiz;
 				m->m_len += msiz;
 				m->m_pkthdr.len += msiz;
 				bcopy(ip, mtod(m, caddr_t),
 					sizeof(struct ip));
 			}
 			ip = mtod(m, struct ip *);
 			memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
 			ip->ip_len = ntohs(ip->ip_len) + msiz;
 		} else {  /* AF_INET */
 			_IF_DROP(&ifp->if_snd);
 			m_freem(m);
 			error = EINVAL;
 			goto end;
 		}
 	} else if (sc->g_proto == IPPROTO_GRE) {
 		switch (dst->sa_family) {
 		case AF_INET:
 			ip = mtod(m, struct ip *);
 			ip_tos = ip->ip_tos;
 			ip_id = ip->ip_id;
 			etype = ETHERTYPE_IP;
 			break;
 #ifdef INET6
 		case AF_INET6:
 			ip_id = ip_newid();
 			etype = ETHERTYPE_IPV6;
 			break;
 #endif
 #ifdef NETATALK
 		case AF_APPLETALK:
 			etype = ETHERTYPE_ATALK;
 			break;
 #endif
 		default:
 			_IF_DROP(&ifp->if_snd);
 			m_freem(m);
 			error = EAFNOSUPPORT;
 			goto end;
 		}
 		M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
 	} else {
 		_IF_DROP(&ifp->if_snd);
 		m_freem(m);
 		error = EINVAL;
 		goto end;
 	}
 
 	if (m == NULL) {	/* mbuf allocation failed */
 		_IF_DROP(&ifp->if_snd);
 		error = ENOBUFS;
 		goto end;
 	}
 
+	M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */
+
 	gh = mtod(m, struct greip *);
 	if (sc->g_proto == IPPROTO_GRE) {
 		/* we don't have any GRE flags for now */
 		memset((void *)gh, 0, sizeof(struct greip));
 		gh->gi_ptype = htons(etype);
 	}
 
 	gh->gi_pr = sc->g_proto;
 	if (sc->g_proto != IPPROTO_MOBILE) {
 		gh->gi_src = sc->g_src;
 		gh->gi_dst = sc->g_dst;
 		((struct ip*)gh)->ip_v = IPPROTO_IPV4;
 		((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
 		((struct ip*)gh)->ip_ttl = GRE_TTL;
 		((struct ip*)gh)->ip_tos = ip_tos;
 		((struct ip*)gh)->ip_id = ip_id;
 		gh->gi_len = m->m_pkthdr.len;
 	}
 
 	ifp->if_opackets++;
 	ifp->if_obytes += m->m_pkthdr.len;
 	/*
 	 * Send it off and with IP_FORWARD flag to prevent it from
 	 * overwriting the ip_id again.  ip_id is already set to the
 	 * ip_id of the encapsulated packet.
 	 */
 	error = ip_output(m, NULL, &sc->route, IP_FORWARDING,
 	    (struct ip_moptions *)NULL, (struct inpcb *)NULL);
   end:
 	sc->called = 0;
 	if (error)
 		ifp->if_oerrors++;
 	return (error);
 }
 
 static int
 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct if_laddrreq *lifr = (struct if_laddrreq *)data;
 	struct in_aliasreq *aifr = (struct in_aliasreq *)data;
 	struct gre_softc *sc = ifp->if_softc;
 	int s;
 	struct sockaddr_in si;
 	struct sockaddr *sa = NULL;
 	int error;
 	struct sockaddr_in sp, sm, dp, dm;
 
 	error = 0;
 
 	s = splnet();
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		break;
 	case SIOCSIFDSTADDR:
 		break;
 	case SIOCSIFFLAGS:
 		/*
 		 * XXXRW: Isn't this priv_check() redundant to the ifnet
 		 * layer check?
 		 */
 		if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0)
 			break;
 		if ((ifr->ifr_flags & IFF_LINK0) != 0)
 			sc->g_proto = IPPROTO_GRE;
 		else
 			sc->g_proto = IPPROTO_MOBILE;
 		if ((ifr->ifr_flags & IFF_LINK2) != 0)
 			sc->wccp_ver = WCCP_V2;
 		else
 			sc->wccp_ver = WCCP_V1;
 		goto recompute;
 	case SIOCSIFMTU:
 		/*
 		 * XXXRW: Isn't this priv_check() redundant to the ifnet
 		 * layer check?
 		 */
 		if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0)
 			break;
 		if (ifr->ifr_mtu < 576) {
 			error = EINVAL;
 			break;
 		}
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	case SIOCGIFMTU:
 		ifr->ifr_mtu = GRE2IFP(sc)->if_mtu;
 		break;
 	case SIOCADDMULTI:
 		/*
 		 * XXXRW: Isn't this priv_checkr() redundant to the ifnet
 		 * layer check?
 		 */
 		if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0)
 			break;
 		if (ifr == 0) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 	case SIOCDELMULTI:
 		/*
 		 * XXXRW: Isn't this priv_check() redundant to the ifnet
 		 * layer check?
 		 */
 		if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0)
 			break;
 		if (ifr == 0) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 	case GRESPROTO:
 		/*
 		 * XXXRW: Isn't this priv_check() redundant to the ifnet
 		 * layer check?
 		 */
 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
 			break;
 		sc->g_proto = ifr->ifr_flags;
 		switch (sc->g_proto) {
 		case IPPROTO_GRE:
 			ifp->if_flags |= IFF_LINK0;
 			break;
 		case IPPROTO_MOBILE:
 			ifp->if_flags &= ~IFF_LINK0;
 			break;
 		default:
 			error = EPROTONOSUPPORT;
 			break;
 		}
 		goto recompute;
 	case GREGPROTO:
 		ifr->ifr_flags = sc->g_proto;
 		break;
 	case GRESADDRS:
 	case GRESADDRD:
 		error = priv_check(curthread, PRIV_NET_GRE);
 		if (error)
 			return (error);
 		/*
 		 * set tunnel endpoints, compute a less specific route
 		 * to the remote end and mark if as up
 		 */
 		sa = &ifr->ifr_addr;
 		if (cmd == GRESADDRS)
 			sc->g_src = (satosin(sa))->sin_addr;
 		if (cmd == GRESADDRD)
 			sc->g_dst = (satosin(sa))->sin_addr;
 	recompute:
 #ifdef INET
 		if (sc->encap != NULL) {
 			encap_detach(sc->encap);
 			sc->encap = NULL;
 		}
 #endif
 		if ((sc->g_src.s_addr != INADDR_ANY) &&
 		    (sc->g_dst.s_addr != INADDR_ANY)) {
 			bzero(&sp, sizeof(sp));
 			bzero(&sm, sizeof(sm));
 			bzero(&dp, sizeof(dp));
 			bzero(&dm, sizeof(dm));
 			sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len =
 			    sizeof(struct sockaddr_in);
 			sp.sin_family = sm.sin_family = dp.sin_family =
 			    dm.sin_family = AF_INET;
 			sp.sin_addr = sc->g_src;
 			dp.sin_addr = sc->g_dst;
 			sm.sin_addr.s_addr = dm.sin_addr.s_addr =
 			    INADDR_BROADCAST;
 #ifdef INET
 			sc->encap = encap_attach(AF_INET, sc->g_proto,
 			    sintosa(&sp), sintosa(&sm), sintosa(&dp),
 			    sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ?
 				&in_gre_protosw : &in_mobile_protosw, sc);
 			if (sc->encap == NULL)
 				printf("%s: unable to attach encap\n",
 				    if_name(GRE2IFP(sc)));
 #endif
 			if (sc->route.ro_rt != 0) /* free old route */
 				RTFREE(sc->route.ro_rt);
 			if (gre_compute_route(sc) == 0)
 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
 			else
 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		}
 		break;
 	case GREGADDRS:
 		memset(&si, 0, sizeof(si));
 		si.sin_family = AF_INET;
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_src.s_addr;
 		sa = sintosa(&si);
 		ifr->ifr_addr = *sa;
 		break;
 	case GREGADDRD:
 		memset(&si, 0, sizeof(si));
 		si.sin_family = AF_INET;
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_dst.s_addr;
 		sa = sintosa(&si);
 		ifr->ifr_addr = *sa;
 		break;
 	case SIOCSIFPHYADDR:
 		/*
 		 * XXXRW: Isn't this priv_check() redundant to the ifnet
 		 * layer check?
 		 */
 		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
 			break;
 		if (aifr->ifra_addr.sin_family != AF_INET ||
 		    aifr->ifra_dstaddr.sin_family != AF_INET) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 		if (aifr->ifra_addr.sin_len != sizeof(si) ||
 		    aifr->ifra_dstaddr.sin_len != sizeof(si)) {
 			error = EINVAL;
 			break;
 		}
 		sc->g_src = aifr->ifra_addr.sin_addr;
 		sc->g_dst = aifr->ifra_dstaddr.sin_addr;
 		goto recompute;
 	case SIOCSLIFPHYADDR:
 		/*
 		 * XXXRW: Isn't this priv_check() redundant to the ifnet
 		 * layer check?
 		 */
 		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
 			break;
 		if (lifr->addr.ss_family != AF_INET ||
 		    lifr->dstaddr.ss_family != AF_INET) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 		if (lifr->addr.ss_len != sizeof(si) ||
 		    lifr->dstaddr.ss_len != sizeof(si)) {
 			error = EINVAL;
 			break;
 		}
 		sc->g_src = (satosin(&lifr->addr))->sin_addr;
 		sc->g_dst =
 		    (satosin(&lifr->dstaddr))->sin_addr;
 		goto recompute;
 	case SIOCDIFPHYADDR:
 		/*
 		 * XXXRW: Isn't this priv_check() redundant to the ifnet
 		 * layer check?
 		 */
 		if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0)
 			break;
 		sc->g_src.s_addr = INADDR_ANY;
 		sc->g_dst.s_addr = INADDR_ANY;
 		goto recompute;
 	case SIOCGLIFPHYADDR:
 		if (sc->g_src.s_addr == INADDR_ANY ||
 		    sc->g_dst.s_addr == INADDR_ANY) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		memset(&si, 0, sizeof(si));
 		si.sin_family = AF_INET;
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_src.s_addr;
 		memcpy(&lifr->addr, &si, sizeof(si));
 		si.sin_addr.s_addr = sc->g_dst.s_addr;
 		memcpy(&lifr->dstaddr, &si, sizeof(si));
 		break;
 	case SIOCGIFPSRCADDR:
 #ifdef INET6
 	case SIOCGIFPSRCADDR_IN6:
 #endif
 		if (sc->g_src.s_addr == INADDR_ANY) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		memset(&si, 0, sizeof(si));
 		si.sin_family = AF_INET;
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_src.s_addr;
 		bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
 		break;
 	case SIOCGIFPDSTADDR:
 #ifdef INET6
 	case SIOCGIFPDSTADDR_IN6:
 #endif
 		if (sc->g_dst.s_addr == INADDR_ANY) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		memset(&si, 0, sizeof(si));
 		si.sin_family = AF_INET;
 		si.sin_len = sizeof(struct sockaddr_in);
 		si.sin_addr.s_addr = sc->g_dst.s_addr;
 		bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr));
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	splx(s);
 	return (error);
 }
 
 /*
  * computes a route to our destination that is not the one
  * which would be taken by ip_output(), as this one will loop back to
  * us. If the interface is p2p as  a--->b, then a routing entry exists
  * If we now send a packet to b (e.g. ping b), this will come down here
  * gets src=a, dst=b tacked on and would from ip_output() sent back to
  * if_gre.
  * Goal here is to compute a route to b that is less specific than
  * a-->b. We know that this one exists as in normal operation we have
  * at least a default route which matches.
  */
 static int
 gre_compute_route(struct gre_softc *sc)
 {
 	struct route *ro;
 
 	ro = &sc->route;
 
 	memset(ro, 0, sizeof(struct route));
 	((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
 	ro->ro_dst.sa_family = AF_INET;
 	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
 
 	/*
 	 * toggle last bit, so our interface is not found, but a less
 	 * specific route. I'd rather like to specify a shorter mask,
 	 * but this is not possible. Should work though. XXX
+	 * XXX MRT Use a different FIB for the tunnel to solve this problem.
 	 */
 	if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) {
 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^=
 		    htonl(0x01);
 	}
 
 #ifdef DIAGNOSTIC
 	printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)),
 	    inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
 #endif
 
-	rtalloc(ro);
+	rtalloc_fib(ro, sc->gre_fibnum);
 
 	/*
 	 * check if this returned a route at all and this route is no
 	 * recursion to ourself
 	 */
 	if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
 #ifdef DIAGNOSTIC
 		if (ro->ro_rt == NULL)
 			printf(" - no route found!\n");
 		else
 			printf(" - route loops back to ourself!\n");
 #endif
 		return EADDRNOTAVAIL;
 	}
 
 	/*
 	 * now change it back - else ip_output will just drop
 	 * the route and search one to this interface ...
 	 */
 	if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0)
 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
 
 #ifdef DIAGNOSTIC
 	printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp),
 	    inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
 	printf("\n");
 #endif
 
 	return 0;
 }
 
 /*
  * do a checksum of a buffer - much like in_cksum, which operates on
  * mbufs.
  */
 u_int16_t
 gre_in_cksum(u_int16_t *p, u_int len)
 {
 	u_int32_t sum = 0;
 	int nwords = len >> 1;
 
 	while (nwords-- != 0)
 		sum += *p++;
 
 	if (len & 1) {
 		union {
 			u_short w;
 			u_char c[2];
 		} u;
 		u.c[0] = *(u_char *)p;
 		u.c[1] = 0;
 		sum += u.w;
 	}
 
 	/* end-around-carry */
 	sum = (sum >> 16) + (sum & 0xffff);
 	sum += (sum >> 16);
 	return (~sum);
 }
 
 static int
 gremodevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		greattach();
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&gre_cloner);
 		mtx_destroy(&gre_mtx);
 		break;
 	default:
 		return EOPNOTSUPP;
 	}
 	return 0;
 }
 
 static moduledata_t gre_mod = {
 	"if_gre",
 	gremodevent,
 	0
 };
 
 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_gre, 1);
Index: head/sys/net/if_gre.h
===================================================================
--- head/sys/net/if_gre.h	(revision 178887)
+++ head/sys/net/if_gre.h	(revision 178888)
@@ -1,186 +1,187 @@
 /*	$NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */
 /*	 $FreeBSD$ */
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * All rights reserved
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Heiko W.Rupp <hwr@pilhuhn.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *        This product includes software developed by the NetBSD
  *        Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef _NET_IF_GRE_H
 #define _NET_IF_GRE_H
 
 #include <sys/ioccom.h>
 #ifdef _KERNEL
 #include <sys/queue.h>
 
 /*
  * Version of the WCCP, need to be configured manually since
  * header for version 2 is the same but IP payload is prepended
  * with additional 4-bytes field.
  */
 typedef enum {
 	WCCP_V1 = 0,
 	WCCP_V2
 } wccp_ver_t;
 
 struct gre_softc {
 	struct ifnet *sc_ifp;
 	LIST_ENTRY(gre_softc) sc_list;
 	int gre_unit;
 	int gre_flags;
+	u_int	gre_fibnum;	/* use this fib for envelopes */
 	struct in_addr g_src;	/* source address of gre packets */
 	struct in_addr g_dst;	/* destination address of gre packets */
 	struct route route;	/* routing entry that determines, where a
 				   encapsulated packet should go */
 	u_char g_proto;		/* protocol of encapsulator */
 
 	const struct encaptab *encap;	/* encapsulation cookie */
 
 	int called;		/* infinite recursion preventer */
 
 	wccp_ver_t wccp_ver;	/* version of the WCCP */
 };
 #define	GRE2IFP(sc)	((sc)->sc_ifp)
 
 
 struct gre_h {
 	u_int16_t flags;	/* GRE flags */
 	u_int16_t ptype;	/* protocol type of payload typically
 				   Ether protocol type*/
 /*
  *  from here on: fields are optional, presence indicated by flags
  *
 	u_int_16 checksum	checksum (one-complements of GRE header
 				and payload
 				Present if (ck_pres | rt_pres == 1).
 				Valid if (ck_pres == 1).
 	u_int_16 offset		offset from start of routing filed to
 				first octet of active SRE (see below).
 				Present if (ck_pres | rt_pres == 1).
 				Valid if (rt_pres == 1).
 	u_int_32 key		inserted by encapsulator e.g. for
 				authentication
 				Present if (key_pres ==1 ).
 	u_int_32 seq_num	Sequence number to allow for packet order
 				Present if (seq_pres ==1 ).
 	struct gre_sre[] routing Routing fileds (see below)
 				Present if (rt_pres == 1)
  */
 } __packed;
 
 struct greip {
 	struct ip gi_i;
 	struct gre_h  gi_g;
 } __packed;
 
 #define gi_pr		gi_i.ip_p
 #define gi_len		gi_i.ip_len
 #define gi_src		gi_i.ip_src
 #define gi_dst		gi_i.ip_dst
 #define gi_ptype	gi_g.ptype
 #define gi_flags	gi_g.flags
 
 #define GRE_CP		0x8000  /* Checksum Present */
 #define GRE_RP		0x4000  /* Routing Present */
 #define GRE_KP		0x2000  /* Key Present */
 #define GRE_SP		0x1000  /* Sequence Present */
 #define GRE_SS		0x0800	/* Strict Source Route */
 
 /*
  * CISCO uses special type for GRE tunnel created as part of WCCP
  * connection, while in fact those packets are just IPv4 encapsulated
  * into GRE.
  */
 #define WCCP_PROTOCOL_TYPE	0x883E
 
 /*
  * gre_sre defines a Source route Entry. These are needed if packets
  * should be routed over more than one tunnel hop by hop
  */
 struct gre_sre {
 	u_int16_t sre_family;	/* address family */
 	u_char	sre_offset;	/* offset to first octet of active entry */
 	u_char	sre_length;	/* number of octets in the SRE.
 				   sre_lengthl==0 -> last entry. */
 	u_char	*sre_rtinfo;	/* the routing information */
 };
 
 struct greioctl {
 	int unit;
 	struct in_addr addr;
 };
 
 /* for mobile encaps */
 
 struct mobile_h {
 	u_int16_t proto;		/* protocol and S-bit */
 	u_int16_t hcrc;			/* header checksum */
 	u_int32_t odst;			/* original destination address */
 	u_int32_t osrc;			/* original source addr, if S-bit set */
 } __packed;
 
 struct mobip_h {
 	struct ip	mi;
 	struct mobile_h	mh;
 } __packed;
 
 
 #define MOB_H_SIZ_S		(sizeof(struct mobile_h) - sizeof(u_int32_t))
 #define MOB_H_SIZ_L		(sizeof(struct mobile_h))
 #define MOB_H_SBIT	0x0080
 
 #define	GRE_TTL	30
 
 #endif /* _KERNEL */
 
 /*
  * ioctls needed to manipulate the interface
  */
 
 #define GRESADDRS	_IOW('i', 101, struct ifreq)
 #define GRESADDRD	_IOW('i', 102, struct ifreq)
 #define GREGADDRS	_IOWR('i', 103, struct ifreq)
 #define GREGADDRD	_IOWR('i', 104, struct ifreq)
 #define GRESPROTO	_IOW('i' , 105, struct ifreq)
 #define GREGPROTO	_IOWR('i', 106, struct ifreq)
 
 #ifdef _KERNEL
 LIST_HEAD(gre_softc_head, gre_softc);
 extern struct mtx gre_mtx;
 extern struct gre_softc_head gre_softc_list;
 
 u_int16_t	gre_in_cksum(u_int16_t *, u_int);
 #endif /* _KERNEL */
 
 #endif
Index: head/sys/net/if_iso88025subr.c
===================================================================
--- head/sys/net/if_iso88025subr.c	(revision 178887)
+++ head/sys/net/if_iso88025subr.c	(revision 178888)
@@ -1,825 +1,826 @@
 /*-
  * Copyright (c) 1998, Larry Lile
  * All rights reserved.
  *
  * For latest sources and information on this driver, please
  * go to http://anarchy.stdio.com.
  *
  * Questions, comments or suggestions should be directed to
  * Larry Lile <lile@stdio.com>.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 /*
  *
  * General ISO 802.5 (Token Ring) support routines
  * 
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipx.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h> 
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_llc.h>
 #include <net/if_types.h>
 
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/iso88025.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #ifdef IPX
 #include <netipx/ipx.h>
 #include <netipx/ipx_if.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static int iso88025_resolvemulti (struct ifnet *, struct sockaddr **,
 				  struct sockaddr *);
 
 #define	senderr(e)	do { error = (e); goto bad; } while (0)
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 iso88025_ifattach(struct ifnet *ifp, const u_int8_t *lla, int bpf)
 {
     struct ifaddr *ifa;
     struct sockaddr_dl *sdl;
 
     ifa = NULL;
 
     ifp->if_type = IFT_ISO88025;
     ifp->if_addrlen = ISO88025_ADDR_LEN;
     ifp->if_hdrlen = ISO88025_HDR_LEN;
 
     if_attach(ifp);	/* Must be called before additional assignments */
 
     ifp->if_output = iso88025_output;
     ifp->if_input = iso88025_input;
     ifp->if_resolvemulti = iso88025_resolvemulti;
     ifp->if_broadcastaddr = iso88025_broadcastaddr;
 
     if (ifp->if_baudrate == 0)
         ifp->if_baudrate = TR_16MBPS; /* 16Mbit should be a safe default */
     if (ifp->if_mtu == 0)
         ifp->if_mtu = ISO88025_DEFAULT_MTU;
 
     ifa = ifp->if_addr;
     KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 
     sdl = (struct sockaddr_dl *)ifa->ifa_addr;
     sdl->sdl_type = IFT_ISO88025;
     sdl->sdl_alen = ifp->if_addrlen;
     bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
     if (bpf)
         bpfattach(ifp, DLT_IEEE802, ISO88025_HDR_LEN);
 
     return;
 }
 
 /*
  * Perform common duties while detaching a Token Ring interface
  */
 void
 iso88025_ifdetach(ifp, bpf)
         struct ifnet *ifp;
         int bpf;
 {
 
 	if (bpf)
                 bpfdetach(ifp);
 
 	if_detach(ifp);
 
 	return;
 }
 
 int
 iso88025_ioctl(struct ifnet *ifp, int command, caddr_t data)
 {
         struct ifaddr *ifa;
         struct ifreq *ifr;
         int error;
 
 	ifa = (struct ifaddr *) data;
 	ifr = (struct ifreq *) data;
 	error = 0;
 
         switch (command) {
         case SIOCSIFADDR:
                 ifp->if_flags |= IFF_UP;
 
                 switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
                 case AF_INET:
                         ifp->if_init(ifp->if_softc);    /* before arpwhohas */
                         arp_ifinit(ifp, ifa);
                         break;
 #endif	/* INET */
 #ifdef IPX
                 /*
                  * XXX - This code is probably wrong
                  */
                 case AF_IPX: {
 				struct ipx_addr *ina;
 
 				ina = &(IA_SIPX(ifa)->sipx_addr);
 
 				if (ipx_nullhost(*ina))
 					ina->x_host = *(union ipx_host *)
 							IF_LLADDR(ifp);
 				else
 					bcopy((caddr_t) ina->x_host.c_host,
 					      (caddr_t) IF_LLADDR(ifp),
 					      ISO88025_ADDR_LEN);
 
 				/*
 				 * Set new address
 				 */
 				ifp->if_init(ifp->if_softc);
 			}
 			break;
 #endif	/* IPX */
                 default:
                         ifp->if_init(ifp->if_softc);
                         break;
                 }
                 break;
 
         case SIOCGIFADDR: {
                         struct sockaddr *sa;
 
                         sa = (struct sockaddr *) & ifr->ifr_data;
                         bcopy(IF_LLADDR(ifp),
                               (caddr_t) sa->sa_data, ISO88025_ADDR_LEN);
                 }
                 break;
 
         case SIOCSIFMTU:
                 /*
                  * Set the interface MTU.
                  */
                 if (ifr->ifr_mtu > ISO88025_MAX_MTU) {
                         error = EINVAL;
                 } else {
                         ifp->if_mtu = ifr->ifr_mtu;
                 }
                 break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
         }
 
         return (error);
 }
 
 /*
  * ISO88025 encapsulation
  */
 int
 iso88025_output(ifp, m, dst, rt0)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct sockaddr *dst;
 	struct rtentry *rt0;
 {
 	u_int16_t snap_type = 0;
 	int loop_copy = 0, error = 0, rif_len = 0;
 	u_char edst[ISO88025_ADDR_LEN];
 	struct iso88025_header *th;
 	struct iso88025_header gen_th;
 	struct sockaddr_dl *sdl = NULL;
 	struct rtentry *rt = NULL;
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 	getmicrotime(&ifp->if_lastchange);
 
 	/* Calculate routing info length based on arp table entry */
 	/* XXX any better way to do this ? */
 	if (rt0 != NULL) {
-		error = rt_check(&rt, &rt0, dst);
+/* XXX MRT *//* Guess only */
+		error = rt_check_fib(&rt, &rt0, dst, rt0->rt_fibnum);
 		if (error)
 			goto bad;
 		RT_UNLOCK(rt);
 	}
 
 	if (rt && (sdl = (struct sockaddr_dl *)rt->rt_gateway))
 		if (SDL_ISO88025(sdl)->trld_rcf != 0)
 			rif_len = TR_RCF_RIFLEN(SDL_ISO88025(sdl)->trld_rcf);
 
 	/* Generate a generic 802.5 header for the packet */
 	gen_th.ac = TR_AC;
 	gen_th.fc = TR_LLC_FRAME;
 	(void)memcpy((caddr_t)gen_th.iso88025_shost, IF_LLADDR(ifp),
 		     ISO88025_ADDR_LEN);
 	if (rif_len) {
 		gen_th.iso88025_shost[0] |= TR_RII;
 		if (rif_len > 2) {
 			gen_th.rcf = SDL_ISO88025(sdl)->trld_rcf;
 			(void)memcpy((caddr_t)gen_th.rd,
 				(caddr_t)SDL_ISO88025(sdl)->trld_route,
 				rif_len - 2);
 		}
 	}
 	
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		error = arpresolve(ifp, rt0, m, dst, edst);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		snap_type = ETHERTYPE_IP;
 		break;
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_IEEE802);
 
 		loop_copy = -1; /* if this is for us, don't do it */
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			snap_type = ETHERTYPE_REVARP;
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			snap_type = ETHERTYPE_ARP;
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, edst, ISO88025_ADDR_LEN);
 		else
 			bcopy(ar_tha(ah), edst, ISO88025_ADDR_LEN);
 
 	}
 	break;
 #endif	/* INET */
 #ifdef INET6
 	case AF_INET6:
 		error = nd6_storelladdr(ifp, rt0, m, dst, (u_char *)edst);
 		if (error)
 			return (error);
 		snap_type = ETHERTYPE_IPV6;
 		break;
 #endif	/* INET6 */
 #ifdef IPX
 	case AF_IPX:
 	{
 		u_int8_t	*cp;
 
 		bcopy((caddr_t)&(satoipx_addr(dst).x_host), (caddr_t)edst,
 		      ISO88025_ADDR_LEN);
 
 		M_PREPEND(m, 3, M_WAIT);
 		m = m_pullup(m, 3);
 		if (m == 0)
 			senderr(ENOBUFS);
 		cp = mtod(m, u_int8_t *);
 		*cp++ = ETHERTYPE_IPX_8022;
 		*cp++ = ETHERTYPE_IPX_8022;
 		*cp++ = LLC_UI;
 	}
 	break;
 #endif	/* IPX */
 	case AF_UNSPEC:
 	{
 		struct iso88025_sockaddr_data *sd;
 		/*
 		 * For AF_UNSPEC sockaddr.sa_data must contain all of the
 		 * mac information needed to send the packet.  This allows
 		 * full mac, llc, and source routing function to be controlled.
 		 * llc and source routing information must already be in the
 		 * mbuf provided, ac/fc are set in sa_data.  sockaddr.sa_data
 		 * should be an iso88025_sockaddr_data structure see iso88025.h
 		 */
                 loop_copy = -1;
 		sd = (struct iso88025_sockaddr_data *)dst->sa_data;
 		gen_th.ac = sd->ac;
 		gen_th.fc = sd->fc;
 		(void)memcpy((caddr_t)edst, (caddr_t)sd->ether_dhost,
 			     ISO88025_ADDR_LEN);
 		(void)memcpy((caddr_t)gen_th.iso88025_shost,
 			     (caddr_t)sd->ether_shost, ISO88025_ADDR_LEN);
 		rif_len = 0;
 		break;
 	}
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 		break;
 	}
 
 	/*
 	 * Add LLC header.
 	 */
 	if (snap_type != 0) {
         	struct llc *l;
 		M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
 		if (m == 0)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
 		l->llc_control = LLC_UI;
 		l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
 		l->llc_snap.org_code[0] =
 			l->llc_snap.org_code[1] =
 			l->llc_snap.org_code[2] = 0;
 		l->llc_snap.ether_type = htons(snap_type);
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_DONTWAIT);
 	if (m == 0)
 		senderr(ENOBUFS);
 	th = mtod(m, struct iso88025_header *);
 	bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN);
 
 	/* Copy as much of the generic header as is needed into the mbuf */
 	memcpy(th, &gen_th, ISO88025_HDR_LEN + rif_len);
 
         /*
          * If a simplex interface, and the packet is being sent to our
          * Ethernet address or a broadcast address, loopback a copy.
          * XXX To make a simplex device behave exactly like a duplex
          * device, we should copy in the case of sending to our own
          * ethernet address (thus letting the original actually appear
          * on the wire). However, we don't do that here for security
          * reasons and compatibility with the original behavior.
          */     
         if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
                 if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { 
                         struct mbuf *n;
 			n = m_copy(m, 0, (int)M_COPYALL);
                         (void) if_simloop(ifp, n, dst->sa_family,
 					  ISO88025_HDR_LEN);
                 } else if (bcmp(th->iso88025_dhost, th->iso88025_shost,
 				 ETHER_ADDR_LEN) == 0) {
 			(void) if_simloop(ifp, m, dst->sa_family,
 					  ISO88025_HDR_LEN);
                        	return(0);      /* XXX */
 		}       
         }      
 
 	IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error);
 	if (error) {
 		printf("iso88025_output: packet dropped QFULL.\n");
 		ifp->if_oerrors++;
 	}
 	return (error);
 
 bad:
 	ifp->if_oerrors++;
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * ISO 88025 de-encapsulation
  */
 void
 iso88025_input(ifp, m)
 	struct ifnet *ifp;
 	struct mbuf *m;
 {
 	struct iso88025_header *th;
 	struct llc *l;
 	int isr;
 	int mac_hdr_len;
 
 	/*
 	 * Do consistency checks to verify assumptions
 	 * made by code past this point.
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
 		ifp->if_ierrors++;
 		m_freem(m);
 		return;
 	}
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		ifp->if_ierrors++;
  		m_freem(m);
 		return;
 	}
 
 	m = m_pullup(m, ISO88025_HDR_LEN);
 	if (m == NULL) {
 		ifp->if_ierrors++;
 		goto dropanyway;
 	}
 	th = mtod(m, struct iso88025_header *);
 	m->m_pkthdr.header = (void *)th;
 
 	/*
 	 * Discard packet if interface is not up.
 	 */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		goto dropanyway;
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	BPF_MTAP(ifp, m);
 
 	/*
 	 * Interface marked for monitoring; discard packet.
 	 */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		return;
 	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Update interface statistics.
 	 */
 	ifp->if_ibytes += m->m_pkthdr.len;
 	getmicrotime(&ifp->if_lastchange);
 
 	/*
 	 * Discard non local unicast packets when interface
 	 * is in promiscuous mode.
 	 */
 	if ((ifp->if_flags & IFF_PROMISC) &&
 	    ((th->iso88025_dhost[0] & 1) == 0) &&
 	     (bcmp(IF_LLADDR(ifp), (caddr_t) th->iso88025_dhost,
 	     ISO88025_ADDR_LEN) != 0))
 		goto dropanyway;
 
 	/*
 	 * Set mbuf flags for bcast/mcast.
 	 */
 	if (th->iso88025_dhost[0] & 1) {
 		if (bcmp(iso88025_broadcastaddr, th->iso88025_dhost,
 		    ISO88025_ADDR_LEN) == 0)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		ifp->if_imcasts++;
 	}
 
 	mac_hdr_len = ISO88025_HDR_LEN;
 	/* Check for source routing info */
 	if (th->iso88025_shost[0] & TR_RII)
 		mac_hdr_len += TR_RCF_RIFLEN(th->rcf);
 
 	/* Strip off ISO88025 header. */
 	m_adj(m, mac_hdr_len);
 
 	m = m_pullup(m, LLC_SNAPFRAMELEN);
 	if (m == 0) {
 		ifp->if_ierrors++;
 		goto dropanyway;
 	}
 	l = mtod(m, struct llc *);
 
 	switch (l->llc_dsap) {
 #ifdef IPX
 	case ETHERTYPE_IPX_8022:	/* Thanks a bunch Novell */
 		if ((l->llc_control != LLC_UI) ||
 		    (l->llc_ssap != ETHERTYPE_IPX_8022)) {
 			ifp->if_noproto++;
 			goto dropanyway;
 		}
 
 		th->iso88025_shost[0] &= ~(TR_RII); 
 		m_adj(m, 3);
 		isr = NETISR_IPX;
 		break;
 #endif	/* IPX */
 	case LLC_SNAP_LSAP: {
 		u_int16_t type;
 		if ((l->llc_control != LLC_UI) ||
 		    (l->llc_ssap != LLC_SNAP_LSAP)) {
 			ifp->if_noproto++;
 			goto dropanyway;
 		}
 
 		if (l->llc_snap.org_code[0] != 0 ||
 		    l->llc_snap.org_code[1] != 0 ||
 		    l->llc_snap.org_code[2] != 0) {
 			ifp->if_noproto++;
 			goto dropanyway;
 		}
 
 		type = ntohs(l->llc_snap.ether_type);
 		m_adj(m, LLC_SNAPFRAMELEN);
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
 			th->iso88025_shost[0] &= ~(TR_RII); 
 			if ((m = ip_fastforward(m)) == NULL)
 				return;
 			isr = NETISR_IP;
 			break;
 
 		case ETHERTYPE_ARP:
 			if (ifp->if_flags & IFF_NOARP)
 				goto dropanyway;
 			isr = NETISR_ARP;
 			break;
 #endif	/* INET */
 #ifdef IPX_SNAP	/* XXX: Not supported! */
 		case ETHERTYPE_IPX:
 			th->iso88025_shost[0] &= ~(TR_RII); 
 			isr = NETISR_IPX;
 			break;
 #endif	/* IPX_SNAP */
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			th->iso88025_shost[0] &= ~(TR_RII); 
 			isr = NETISR_IPV6;
 			break;
 #endif	/* INET6 */
 		default:
 			printf("iso88025_input: unexpected llc_snap ether_type  0x%02x\n", type);
 			ifp->if_noproto++;
 			goto dropanyway;
 		}
 		break;
 	}
 #ifdef ISO
 	case LLC_ISO_LSAP:
 		switch (l->llc_control) {
 		case LLC_UI:
 			ifp->if_noproto++;
 			goto dropanyway;
 			break;
                 case LLC_XID:
                 case LLC_XID_P:
 			if(m->m_len < ISO88025_ADDR_LEN)
 				goto dropanyway;
 			l->llc_window = 0;
 			l->llc_fid = 9;  
 			l->llc_class = 1;
 			l->llc_dsap = l->llc_ssap = 0;
 			/* Fall through to */  
 		case LLC_TEST:
 		case LLC_TEST_P:
 		{
 			struct sockaddr sa;
 			struct arpcom *ac;
 			struct iso88025_sockaddr_data *th2;
 			int i;
 			u_char c;
 
 			c = l->llc_dsap;
 
 			if (th->iso88025_shost[0] & TR_RII) { /* XXX */
 				printf("iso88025_input: dropping source routed LLC_TEST\n");
 				goto dropanyway;
 			}
 			l->llc_dsap = l->llc_ssap;
 			l->llc_ssap = c;
 			if (m->m_flags & (M_BCAST | M_MCAST))
 				bcopy((caddr_t)IF_LLADDR(ifp),
 				      (caddr_t)th->iso88025_dhost,
 					ISO88025_ADDR_LEN);
 			sa.sa_family = AF_UNSPEC;
 			sa.sa_len = sizeof(sa);
 			th2 = (struct iso88025_sockaddr_data *)sa.sa_data;
 			for (i = 0; i < ISO88025_ADDR_LEN; i++) {
 				th2->ether_shost[i] = c = th->iso88025_dhost[i];
 				th2->ether_dhost[i] = th->iso88025_dhost[i] =
 					th->iso88025_shost[i];
 				th->iso88025_shost[i] = c;
 			}
 			th2->ac = TR_AC;
 			th2->fc = TR_LLC_FRAME;
 			ifp->if_output(ifp, m, &sa, NULL);
 			return;
 		}
 		default:
 			printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control);
 			ifp->if_noproto++;
 			goto dropanyway;
 			break;
 		}
 		break;
 #endif	/* ISO */
 	default:
 		printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap);
 		ifp->if_noproto++;
 		goto dropanyway;
 		break;
 	}
 
 	netisr_dispatch(isr, m);
 	return;
 
 dropanyway:
 	ifp->if_iqdrops++;
 	if (m)
 		m_freem(m);
 	return;
 }
 
 static int
 iso88025_resolvemulti (ifp, llsa, sa)
 	struct ifnet *ifp;
 	struct sockaddr **llsa;
 	struct sockaddr *sa;
 {
 	struct sockaddr_dl *sdl;
 	struct sockaddr_in *sin;
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if ((e_addr[0] & 1) != 1) {
 			return (EADDRNOTAVAIL);
 		}
 		*llsa = 0;
 		return (0);
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			return (EADDRNOTAVAIL);
 		}
 		MALLOC(sdl, struct sockaddr_dl *, sizeof *sdl, M_IFMADDR,
 		       M_NOWAIT|M_ZERO);
 		if (sdl == NULL)
 			return (ENOMEM);
 		sdl->sdl_len = sizeof *sdl;
 		sdl->sdl_family = AF_LINK;
 		sdl->sdl_index = ifp->if_index;
 		sdl->sdl_type = IFT_ISO88025;
 		sdl->sdl_alen = ISO88025_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return (0);
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 			return (EADDRNOTAVAIL);
 		}
 		MALLOC(sdl, struct sockaddr_dl *, sizeof *sdl, M_IFMADDR,
 		       M_NOWAIT|M_ZERO);
 		if (sdl == NULL)
 			return (ENOMEM);
 		sdl->sdl_len = sizeof *sdl;
 		sdl->sdl_family = AF_LINK;
 		sdl->sdl_index = ifp->if_index;
 		sdl->sdl_type = IFT_ISO88025;
 		sdl->sdl_alen = ISO88025_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return (EAFNOSUPPORT);
 	}
 
 	return (0);
 }
 
 MALLOC_DEFINE(M_ISO88025, "arpcom", "802.5 interface internals");
 
 static void*
 iso88025_alloc(u_char type, struct ifnet *ifp)
 {
 	struct arpcom	*ac;
  
         ac = malloc(sizeof(struct arpcom), M_ISO88025, M_WAITOK | M_ZERO);
 	ac->ac_ifp = ifp;
 
 	return (ac);
 } 
 
 static void
 iso88025_free(void *com, u_char type)
 {
  
         free(com, M_ISO88025);
 }
  
 static int
 iso88025_modevent(module_t mod, int type, void *data)
 {
   
         switch (type) {
         case MOD_LOAD:
                 if_register_com_alloc(IFT_ISO88025, iso88025_alloc,
                     iso88025_free);
                 break;
         case MOD_UNLOAD:
                 if_deregister_com_alloc(IFT_ISO88025);
                 break;
         default:
                 return EOPNOTSUPP;
         }
 
         return (0);
 }
 
 static moduledata_t iso88025_mod = {
 	"iso88025",
 	iso88025_modevent,
 	0
 };
 
 DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(iso88025, 1);
Index: head/sys/net/if_stf.c
===================================================================
--- head/sys/net/if_stf.c	(revision 178887)
+++ head/sys/net/if_stf.c	(revision 178888)
@@ -1,804 +1,809 @@
 /*	$FreeBSD$	*/
 /*	$KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $	*/
 
 /*-
  * Copyright (C) 2000 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * 6to4 interface, based on RFC3056.
  *
  * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
  * There is no address mapping defined from IPv6 multicast address to IPv4
  * address.  Therefore, we do not have IFF_MULTICAST on the interface.
  *
  * Due to the lack of address mapping for link-local addresses, we cannot
  * throw packets toward link-local addresses (fe80::x).  Also, we cannot throw
  * packets to link-local multicast addresses (ff02::x).
  *
  * Here are interesting symptoms due to the lack of link-local address:
  *
  * Unicast routing exchange:
  * - RIPng: Impossible.  Uses link-local multicast packet toward ff02::9,
  *   and link-local addresses as nexthop.
  * - OSPFv6: Impossible.  OSPFv6 assumes that there's link-local address
  *   assigned to the link, and makes use of them.  Also, HELLO packets use
  *   link-local multicast addresses (ff02::5 and ff02::6).
  * - BGP4+: Maybe.  You can only use global address as nexthop, and global
  *   address as TCP endpoint address.
  *
  * Multicast routing protocols:
  * - PIM: Hello packet cannot be used to discover adjacent PIM routers.
  *   Adjacent PIM routers must be configured manually (is it really spec-wise
  *   correct thing to do?).
  *
  * ICMPv6:
  * - Redirects cannot be used due to the lack of link-local address.
  *
  * stf interface does not have, and will not need, a link-local address.  
  * It seems to have no real benefit and does not help the above symptoms much.
  * Even if we assign link-locals to interface, we cannot really
  * use link-local unicast/multicast on top of 6to4 cloud (since there's no
  * encapsulation defined for link-local address), and the above analysis does
  * not change.  RFC3056 does not mandate the assignment of link-local address
  * either.
  *
  * 6to4 interface has security issues.  Refer to
  * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt
  * for details.  The code tries to filter out some of malicious packets.
  * Note that there is no way to be 100% secure.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
+#include <sys/proc.h>
 #include <sys/queue.h>
 #include <machine/cpu.h>
 
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/if_types.h>
 #include <net/if_stf.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip_ecn.h>
 
 #include <netinet/ip_encap.h>
 
 #include <machine/stdarg.h>
 
 #include <net/bpf.h>
 
 #include <security/mac/mac_framework.h>
 
 #define STFNAME		"stf"
 #define STFUNIT		0
 
 #define IN6_IS_ADDR_6TO4(x)	(ntohs((x)->s6_addr16[0]) == 0x2002)
 
 /*
  * XXX: Return a pointer with 16-bit aligned.  Don't cast it to
  * struct in_addr *; use bcopy() instead.
  */
 #define GET_V4(x)	((caddr_t)(&(x)->s6_addr16[1]))
 
 struct stf_softc {
 	struct ifnet	*sc_ifp;
 	union {
 		struct route  __sc_ro4;
 		struct route_in6 __sc_ro6; /* just for safety */
 	} __sc_ro46;
 #define sc_ro	__sc_ro46.__sc_ro4
+	u_int	sc_fibnum;
 	const struct encaptab *encap_cookie;
 };
 #define STF2IFP(sc)	((sc)->sc_ifp)
 
 /*
  * XXXRW: Note that mutable fields in the softc are not currently locked:
  * in particular, sc_ro needs to be protected from concurrent entrance
  * of stf_output().
  */
 static MALLOC_DEFINE(M_STF, STFNAME, "6to4 Tunnel Interface");
 static const int ip_stf_ttl = 40;
 
 extern  struct domain inetdomain;
 struct protosw in_stf_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_IPV6,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		in_stf_input,
 	.pr_output =		(pr_output_t *)rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 
 static char *stfnames[] = {"stf0", "stf", "6to4", NULL};
 
 static int stfmodevent(module_t, int, void *);
 static int stf_encapcheck(const struct mbuf *, int, int, void *);
 static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *);
 static int stf_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 	struct rtentry *);
 static int isrfc1918addr(struct in_addr *);
 static int stf_checkaddr4(struct stf_softc *, struct in_addr *,
 	struct ifnet *);
 static int stf_checkaddr6(struct stf_softc *, struct in6_addr *,
 	struct ifnet *);
 static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int stf_ioctl(struct ifnet *, u_long, caddr_t);
 
 static int stf_clone_match(struct if_clone *, const char *);
 static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t);
 static int stf_clone_destroy(struct if_clone *, struct ifnet *);
 struct if_clone stf_cloner = IFC_CLONE_INITIALIZER(STFNAME, NULL, 0,
     NULL, stf_clone_match, stf_clone_create, stf_clone_destroy);
 
 static int
 stf_clone_match(struct if_clone *ifc, const char *name)
 {
 	int i;
 
 	for(i = 0; stfnames[i] != NULL; i++) {
 		if (strcmp(stfnames[i], name) == 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 static int
 stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 {
 	int err, unit;
 	struct stf_softc *sc;
 	struct ifnet *ifp;
 
 	/*
 	 * We can only have one unit, but since unit allocation is
 	 * already locked, we use it to keep from allocating extra
 	 * interfaces.
 	 */
 	unit = STFUNIT;
 	err = ifc_alloc_unit(ifc, &unit);
 	if (err != 0)
 		return (err);
 
 	sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO);
 	ifp = STF2IFP(sc) = if_alloc(IFT_STF);
 	if (ifp == NULL) {
 		free(sc, M_STF);
 		ifc_free_unit(ifc, unit);
 		return (ENOSPC);
 	}
 	ifp->if_softc = sc;
+	sc->sc_fibnum = curthread->td_proc->p_fibnum;
 
 	/*
 	 * Set the name manually rather then using if_initname because
 	 * we don't conform to the default naming convention for interfaces.
 	 */
 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
 	ifp->if_dname = ifc->ifc_name;
 	ifp->if_dunit = IF_DUNIT_NONE;
 
 	sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6,
 	    stf_encapcheck, &in_stf_protosw, sc);
 	if (sc->encap_cookie == NULL) {
 		if_printf(ifp, "attach failed\n");
 		free(sc, M_STF);
 		ifc_free_unit(ifc, unit);
 		return (ENOMEM);
 	}
 
 	ifp->if_mtu    = IPV6_MMTU;
 	ifp->if_ioctl  = stf_ioctl;
 	ifp->if_output = stf_output;
 	ifp->if_snd.ifq_maxlen = IFQ_MAXLEN;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 	return (0);
 }
 
 static int
 stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
 {
 	struct stf_softc *sc = ifp->if_softc;
 	int err;
 
 	err = encap_detach(sc->encap_cookie);
 	KASSERT(err == 0, ("Unexpected error detaching encap_cookie"));
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 
 	free(sc, M_STF);
 	ifc_free_unit(ifc, STFUNIT);
 
 	return (0);
 }
 
 static int
 stfmodevent(mod, type, data)
 	module_t mod;
 	int type;
 	void *data;
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		if_clone_attach(&stf_cloner);
 		break;
 	case MOD_UNLOAD:
 		if_clone_detach(&stf_cloner);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t stf_mod = {
 	"if_stf",
 	stfmodevent,
 	0
 };
 
 DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 static int
 stf_encapcheck(m, off, proto, arg)
 	const struct mbuf *m;
 	int off;
 	int proto;
 	void *arg;
 {
 	struct ip ip;
 	struct in6_ifaddr *ia6;
 	struct stf_softc *sc;
 	struct in_addr a, b, mask;
 
 	sc = (struct stf_softc *)arg;
 	if (sc == NULL)
 		return 0;
 
 	if ((STF2IFP(sc)->if_flags & IFF_UP) == 0)
 		return 0;
 
 	/* IFF_LINK0 means "no decapsulation" */
 	if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0)
 		return 0;
 
 	if (proto != IPPROTO_IPV6)
 		return 0;
 
 	/* LINTED const cast */
 	m_copydata((struct mbuf *)(uintptr_t)m, 0, sizeof(ip), (caddr_t)&ip);
 
 	if (ip.ip_v != 4)
 		return 0;
 
 	ia6 = stf_getsrcifa6(STF2IFP(sc));
 	if (ia6 == NULL)
 		return 0;
 
 	/*
 	 * check if IPv4 dst matches the IPv4 address derived from the
 	 * local 6to4 address.
 	 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
 	 */
 	if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst,
 	    sizeof(ip.ip_dst)) != 0)
 		return 0;
 
 	/*
 	 * check if IPv4 src matches the IPv4 address derived from the
 	 * local 6to4 address masked by prefixmask.
 	 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24
 	 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
 	 */
 	bzero(&a, sizeof(a));
 	bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a));
 	bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask));
 	a.s_addr &= mask.s_addr;
 	b = ip.ip_src;
 	b.s_addr &= mask.s_addr;
 	if (a.s_addr != b.s_addr)
 		return 0;
 
 	/* stf interface makes single side match only */
 	return 32;
 }
 
 static struct in6_ifaddr *
 stf_getsrcifa6(ifp)
 	struct ifnet *ifp;
 {
 	struct ifaddr *ia;
 	struct in_ifaddr *ia4;
 	struct sockaddr_in6 *sin6;
 	struct in_addr in;
 
 	TAILQ_FOREACH(ia, &ifp->if_addrlist, ifa_list) {
 		if (ia->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ia->ifa_addr;
 		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr))
 			continue;
 
 		bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in));
 		LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash)
 			if (ia4->ia_addr.sin_addr.s_addr == in.s_addr)
 				break;
 		if (ia4 == NULL)
 			continue;
 
 		return (struct in6_ifaddr *)ia;
 	}
 
 	return NULL;
 }
 
 static int
 stf_output(ifp, m, dst, rt)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct sockaddr *dst;
 	struct rtentry *rt;
 {
 	struct stf_softc *sc;
 	struct sockaddr_in6 *dst6;
 	struct in_addr in4;
 	caddr_t ptr;
 	struct sockaddr_in *dst4;
 	u_int8_t tos;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	struct in6_ifaddr *ia6;
 	u_int32_t af;
 #ifdef MAC
 	int error;
 
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 #endif
 
 	sc = ifp->if_softc;
 	dst6 = (struct sockaddr_in6 *)dst;
 
 	/* just in case */
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		ifp->if_oerrors++;
 		return ENETDOWN;
 	}
 
 	/*
 	 * If we don't have an ip4 address that match my inner ip6 address,
 	 * we shouldn't generate output.  Without this check, we'll end up
 	 * using wrong IPv4 source.
 	 */
 	ia6 = stf_getsrcifa6(ifp);
 	if (ia6 == NULL) {
 		m_freem(m);
 		ifp->if_oerrors++;
 		return ENETDOWN;
 	}
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m) {
 			ifp->if_oerrors++;
 			return ENOBUFS;
 		}
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 
 	/*
 	 * BPF writes need to be handled specially.
 	 * This is a null operation, nothing here checks dst->sa_family.
 	 */
 	if (dst->sa_family == AF_UNSPEC) {
 		bcopy(dst->sa_data, &af, sizeof(af));
 		dst->sa_family = af;
 	}
 
 	/*
 	 * Pickup the right outer dst addr from the list of candidates.
 	 * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
 	 */
 	ptr = NULL;
 	if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst))
 		ptr = GET_V4(&ip6->ip6_dst);
 	else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
 		ptr = GET_V4(&dst6->sin6_addr);
 	else {
 		m_freem(m);
 		ifp->if_oerrors++;
 		return ENETUNREACH;
 	}
 	bcopy(ptr, &in4, sizeof(in4));
 
 	if (bpf_peers_present(ifp->if_bpf)) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		af = AF_INET6;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
 	if (m && m->m_len < sizeof(struct ip))
 		m = m_pullup(m, sizeof(struct ip));
 	if (m == NULL) {
 		ifp->if_oerrors++;
 		return ENOBUFS;
 	}
 	ip = mtod(m, struct ip *);
 
 	bzero(ip, sizeof(*ip));
 
 	bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr),
 	    &ip->ip_src, sizeof(ip->ip_src));
 	bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst));
 	ip->ip_p = IPPROTO_IPV6;
 	ip->ip_ttl = ip_stf_ttl;
 	ip->ip_len = m->m_pkthdr.len;	/*host order*/
 	if (ifp->if_flags & IFF_LINK1)
 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
 	else
 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
 	/*
 	 * XXXRW: Locking of sc_ro required.
 	 */
 	dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
 	if (dst4->sin_family != AF_INET ||
 	    bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
 		/* cache route doesn't match */
 		dst4->sin_family = AF_INET;
 		dst4->sin_len = sizeof(struct sockaddr_in);
 		bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr));
 		if (sc->sc_ro.ro_rt) {
 			RTFREE(sc->sc_ro.ro_rt);
 			sc->sc_ro.ro_rt = NULL;
 		}
 	}
 
 	if (sc->sc_ro.ro_rt == NULL) {
-		rtalloc(&sc->sc_ro);
+		rtalloc_fib(&sc->sc_ro, sc->sc_fibnum);
 		if (sc->sc_ro.ro_rt == NULL) {
 			m_freem(m);
 			ifp->if_oerrors++;
 			return ENETUNREACH;
 		}
 	}
 
+	M_SETFIB(m, sc->sc_fibnum);
 	ifp->if_opackets++;
 	return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL);
 }
 
 static int
 isrfc1918addr(in)
 	struct in_addr *in;
 {
 	/*
 	 * returns 1 if private address range:
 	 * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16
 	 */
 	if ((ntohl(in->s_addr) & 0xff000000) >> 24 == 10 ||
 	    (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 ||
 	    (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168)
 		return 1;
 
 	return 0;
 }
 
 static int
 stf_checkaddr4(sc, in, inifp)
 	struct stf_softc *sc;
 	struct in_addr *in;
 	struct ifnet *inifp;	/* incoming interface */
 {
 	struct in_ifaddr *ia4;
 
 	/*
 	 * reject packets with the following address:
 	 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8
 	 */
 	if (IN_MULTICAST(ntohl(in->s_addr)))
 		return -1;
 	switch ((ntohl(in->s_addr) & 0xff000000) >> 24) {
 	case 0: case 127: case 255:
 		return -1;
 	}
 
 	/*
 	 * reject packets with private address range.
 	 * (requirement from RFC3056 section 2 1st paragraph)
 	 */
 	if (isrfc1918addr(in))
 		return -1;
 
 	/*
 	 * reject packets with broadcast
 	 */
 	for (ia4 = TAILQ_FIRST(&in_ifaddrhead);
 	     ia4;
 	     ia4 = TAILQ_NEXT(ia4, ia_link))
 	{
 		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
 			continue;
 		if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr)
 			return -1;
 	}
 
 	/*
 	 * perform ingress filter
 	 */
 	if (sc && (STF2IFP(sc)->if_flags & IFF_LINK2) == 0 && inifp) {
 		struct sockaddr_in sin;
 		struct rtentry *rt;
 
 		bzero(&sin, sizeof(sin));
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(struct sockaddr_in);
 		sin.sin_addr = *in;
-		rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+		rt = rtalloc1_fib((struct sockaddr *)&sin, 0,
+		    0UL, sc->sc_fibnum);
 		if (!rt || rt->rt_ifp != inifp) {
 #if 0
 			log(LOG_WARNING, "%s: packet from 0x%x dropped "
 			    "due to ingress filter\n", if_name(STF2IFP(sc)),
 			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
 #endif
 			if (rt)
 				RTFREE_LOCKED(rt);
 			return -1;
 		}
 		RTFREE_LOCKED(rt);
 	}
 
 	return 0;
 }
 
 static int
 stf_checkaddr6(sc, in6, inifp)
 	struct stf_softc *sc;
 	struct in6_addr *in6;
 	struct ifnet *inifp;	/* incoming interface */
 {
 	/*
 	 * check 6to4 addresses
 	 */
 	if (IN6_IS_ADDR_6TO4(in6)) {
 		struct in_addr in4;
 		bcopy(GET_V4(in6), &in4, sizeof(in4));
 		return stf_checkaddr4(sc, &in4, inifp);
 	}
 
 	/*
 	 * reject anything that look suspicious.  the test is implemented
 	 * in ip6_input too, but we check here as well to
 	 * (1) reject bad packets earlier, and
 	 * (2) to be safe against future ip6_input change.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(in6) || IN6_IS_ADDR_V4MAPPED(in6))
 		return -1;
 
 	return 0;
 }
 
 void
 in_stf_input(m, off)
 	struct mbuf *m;
 	int off;
 {
 	int proto;
 	struct stf_softc *sc;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	u_int8_t otos, itos;
 	struct ifnet *ifp;
 
 	proto = mtod(m, struct ip *)->ip_p;
 
 	if (proto != IPPROTO_IPV6) {
 		m_freem(m);
 		return;
 	}
 
 	ip = mtod(m, struct ip *);
 
 	sc = (struct stf_softc *)encap_getarg(m);
 
 	if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 
 	ifp = STF2IFP(sc);
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * perform sanity check against outer src/dst.
 	 * for source, perform ingress filter as well.
 	 */
 	if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 ||
 	    stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
 		return;
 	}
 
 	otos = ip->ip_tos;
 	m_adj(m, off);
 
 	if (m->m_len < sizeof(*ip6)) {
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m)
 			return;
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * perform sanity check against inner src/dst.
 	 * for source, perform ingress filter as well.
 	 */
 	if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 ||
 	    stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) {
 		m_freem(m);
 		return;
 	}
 
 	itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 	if ((ifp->if_flags & IFF_LINK1) != 0)
 		ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
 	else
 		ip_ecn_egress(ECN_NOCARE, &otos, &itos);
 	ip6->ip6_flow &= ~htonl(0xff << 20);
 	ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
 
 	m->m_pkthdr.rcvif = ifp;
 	
 	if (bpf_peers_present(ifp->if_bpf)) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer a to it).
 		 */
 		u_int32_t af = AF_INET6;
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
 	}
 
 	/*
 	 * Put the packet to the network layer input queue according to the
 	 * specified address family.
 	 * See net/if_gif.c for possible issues with packet processing
 	 * reorder due to extra queueing.
 	 */
 	ifp->if_ipackets++;
 	ifp->if_ibytes += m->m_pkthdr.len;
 	netisr_dispatch(NETISR_IPV6, m);
 }
 
 /* ARGSUSED */
 static void
 stf_rtrequest(cmd, rt, info)
 	int cmd;
 	struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
 	RT_LOCK_ASSERT(rt);
 	rt->rt_rmx.rmx_mtu = IPV6_MMTU;
 }
 
 static int
 stf_ioctl(ifp, cmd, data)
 	struct ifnet *ifp;
 	u_long cmd;
 	caddr_t data;
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr;
 	struct sockaddr_in6 *sin6;
 	struct in_addr addr;
 	int error;
 
 	error = 0;
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifa = (struct ifaddr *)data;
 		if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			break;
 		}
 		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr));
 		if (isrfc1918addr(&addr)) {
 			error = EINVAL;
 			break;
 		}
 
 		ifa->ifa_rtrequest = stf_rtrequest;
 		ifp->if_flags |= IFF_UP;
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		ifr = (struct ifreq *)data;
 		if (ifr && ifr->ifr_addr.sa_family == AF_INET6)
 			;
 		else
 			error = EAFNOSUPPORT;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
Index: head/sys/net/if_var.h
===================================================================
--- head/sys/net/if_var.h	(revision 178887)
+++ head/sys/net/if_var.h	(revision 178888)
@@ -1,715 +1,717 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)if.h	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #ifndef	_NET_IF_VAR_H_
 #define	_NET_IF_VAR_H_
 
 /*
  * Structures defining a network interface, providing a packet
  * transport mechanism (ala level 0 of the PUP protocols).
  *
  * Each interface accepts output datagrams of a specified maximum
  * length, and provides higher level routines with input datagrams
  * received from its medium.
  *
  * Output occurs when the routine if_output is called, with three parameters:
  *	(*ifp->if_output)(ifp, m, dst, rt)
  * Here m is the mbuf chain to be sent and dst is the destination address.
  * The output routine encapsulates the supplied datagram if necessary,
  * and then transmits it on its medium.
  *
  * On input, each interface unwraps the data received by it, and either
  * places it on the input queue of an internetwork datagram routine
  * and posts the associated software interrupt, or passes the datagram to a raw
  * packet input routine.
  *
  * Routines exist for locating interfaces by their addresses
  * or for locating an interface on a certain network, as well as more general
  * routing and gateway routines maintaining information used to locate
  * interfaces.  These routines live in the files if.c and route.c
  */
 
 #ifdef __STDC__
 /*
  * Forward structure declarations for function prototypes [sic].
  */
 struct	mbuf;
 struct	thread;
 struct	rtentry;
 struct	rt_addrinfo;
 struct	socket;
 struct	ether_header;
 struct	carp_if;
 struct  ifvlantrunk;
 #endif
 
 #include <sys/queue.h>		/* get TAILQ macros */
 
 #ifdef _KERNEL
 #include <sys/mbuf.h>
 #include <sys/eventhandler.h>
 #endif /* _KERNEL */
 #include <sys/lock.h>		/* XXX */
 #include <sys/mutex.h>		/* XXX */
 #include <sys/event.h>		/* XXX */
 #include <sys/_task.h>
 
 #define	IF_DUNIT_NONE	-1
 
 #include <altq/if_altq.h>
 
 TAILQ_HEAD(ifnethead, ifnet);	/* we use TAILQs so that the order of */
 TAILQ_HEAD(ifaddrhead, ifaddr);	/* instantiation is preserved in the list */
 TAILQ_HEAD(ifprefixhead, ifprefix);
 TAILQ_HEAD(ifmultihead, ifmultiaddr);
 TAILQ_HEAD(ifgrouphead, ifg_group);
 
 /*
  * Structure defining a queue for a network interface.
  */
 struct	ifqueue {
 	struct	mbuf *ifq_head;
 	struct	mbuf *ifq_tail;
 	int	ifq_len;
 	int	ifq_maxlen;
 	int	ifq_drops;
 	struct	mtx ifq_mtx;
 };
 
 /*
  * Structure defining a network interface.
  *
  * (Would like to call this struct ``if'', but C isn't PL/1.)
  */
 
 struct ifnet {
 	void	*if_softc;		/* pointer to driver state */
 	void	*if_l2com;		/* pointer to protocol bits */
 	TAILQ_ENTRY(ifnet) if_link; 	/* all struct ifnets are chained */
 	char	if_xname[IFNAMSIZ];	/* external name (name + unit) */
 	const char *if_dname;		/* driver name */
 	int	if_dunit;		/* unit or IF_DUNIT_NONE */
 	struct	ifaddrhead if_addrhead;	/* linked list of addresses per if */
 		/*
 		 * if_addrhead is the list of all addresses associated to
 		 * an interface.
 		 * Some code in the kernel assumes that first element
 		 * of the list has type AF_LINK, and contains sockaddr_dl
 		 * addresses which store the link-level address and the name
 		 * of the interface.
 		 * However, access to the AF_LINK address through this
 		 * field is deprecated. Use if_addr or ifaddr_byindex() instead.
 		 */
 	struct	knlist if_klist;	/* events attached to this if */
 	int	if_pcount;		/* number of promiscuous listeners */
 	struct	carp_if *if_carp;	/* carp interface structure */
 	struct	bpf_if *if_bpf;		/* packet filter structure */
 	u_short	if_index;		/* numeric abbreviation for this if  */
 	short	if_timer;		/* time 'til if_watchdog called */
 	struct  ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
 	int	if_flags;		/* up/down, broadcast, etc. */
 	int	if_capabilities;	/* interface features & capabilities */
 	int	if_capenable;		/* enabled features & capabilities */
 	void	*if_linkmib;		/* link-type-specific MIB data */
 	size_t	if_linkmiblen;		/* length of above data */
 	struct	if_data if_data;
 	struct	ifmultihead if_multiaddrs; /* multicast addresses configured */
 	int	if_amcount;		/* number of all-multicast requests */
 /* procedure handles */
 	int	(*if_output)		/* output routine (enqueue) */
 		(struct ifnet *, struct mbuf *, struct sockaddr *,
 		     struct rtentry *);
 	void	(*if_input)		/* input routine (from h/w driver) */
 		(struct ifnet *, struct mbuf *);
 	void	(*if_start)		/* initiate output routine */
 		(struct ifnet *);
 	int	(*if_ioctl)		/* ioctl routine */
 		(struct ifnet *, u_long, caddr_t);
 	void	(*if_watchdog)		/* timer routine */
 		(struct ifnet *);
 	void	(*if_init)		/* Init routine */
 		(void *);
 	int	(*if_resolvemulti)	/* validate/resolve multicast */
 		(struct ifnet *, struct sockaddr **, struct sockaddr *);
 	struct	ifaddr	*if_addr;	/* pointer to link-level address */
 	void	*if_llsoftc;		/* link layer softc */
 	int	if_drv_flags;		/* driver-managed status flags */
 	u_int	if_spare_flags2;	/* spare flags 2 */
 	struct  ifaltq if_snd;		/* output queue (includes altq) */
 	const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
 
 	void	*if_bridge;		/* bridge glue */
 
 	struct	lltable *lltables;	/* list of L3-L2 resolution tables */
 
 	struct	label *if_label;	/* interface MAC label */
 
 	/* these are only used by IPv6 */
 	struct	ifprefixhead if_prefixhead; /* list of prefixes per if */
 	void	*if_afdata[AF_MAX];
 	int	if_afdata_initialized;
 	struct	mtx if_afdata_mtx;
 	struct	task if_starttask;	/* task for IFF_NEEDSGIANT */
 	struct	task if_linktask;	/* task for link change events */
 	struct	mtx if_addr_mtx;	/* mutex to protect address lists */
 	LIST_ENTRY(ifnet) if_clones;	/* interfaces of a cloner */
 	TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
 					/* protected by if_addr_mtx */
 	void	*if_pf_kif;
 	void	*if_lagg;		/* lagg glue */
 	void	*if_pspare[10];		/* multiq/TOE 3; vimage 3; general use 4 */
 	int	if_ispare[2];		/* general use 2 */
 };
 
 typedef void if_init_f_t(void *);
 
 /*
  * XXX These aliases are terribly dangerous because they could apply
  * to anything.
  */
 #define	if_mtu		if_data.ifi_mtu
 #define	if_type		if_data.ifi_type
 #define if_physical	if_data.ifi_physical
 #define	if_addrlen	if_data.ifi_addrlen
 #define	if_hdrlen	if_data.ifi_hdrlen
 #define	if_metric	if_data.ifi_metric
 #define	if_link_state	if_data.ifi_link_state
 #define	if_baudrate	if_data.ifi_baudrate
 #define	if_hwassist	if_data.ifi_hwassist
 #define	if_ipackets	if_data.ifi_ipackets
 #define	if_ierrors	if_data.ifi_ierrors
 #define	if_opackets	if_data.ifi_opackets
 #define	if_oerrors	if_data.ifi_oerrors
 #define	if_collisions	if_data.ifi_collisions
 #define	if_ibytes	if_data.ifi_ibytes
 #define	if_obytes	if_data.ifi_obytes
 #define	if_imcasts	if_data.ifi_imcasts
 #define	if_omcasts	if_data.ifi_omcasts
 #define	if_iqdrops	if_data.ifi_iqdrops
 #define	if_noproto	if_data.ifi_noproto
 #define	if_lastchange	if_data.ifi_lastchange
 #define if_rawoutput(if, m, sa) if_output(if, m, sa, (struct rtentry *)NULL)
 
 /* for compatibility with other BSDs */
 #define	if_addrlist	if_addrhead
 #define	if_list		if_link
 #define	if_name(ifp)	((ifp)->if_xname)
 
 /*
  * Locks for address lists on the network interface.
  */
 #define	IF_ADDR_LOCK_INIT(if)	mtx_init(&(if)->if_addr_mtx,		\
 				    "if_addr_mtx", NULL, MTX_DEF)
 #define	IF_ADDR_LOCK_DESTROY(if)	mtx_destroy(&(if)->if_addr_mtx)
 #define	IF_ADDR_LOCK(if)	mtx_lock(&(if)->if_addr_mtx)
 #define	IF_ADDR_UNLOCK(if)	mtx_unlock(&(if)->if_addr_mtx)
 #define	IF_ADDR_LOCK_ASSERT(if)	mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
 
 /*
  * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
  * are queues of messages stored on ifqueue structures
  * (defined above).  Entries are added to and deleted from these structures
  * by these macros, which should be called with ipl raised to splimp().
  */
 #define IF_LOCK(ifq)		mtx_lock(&(ifq)->ifq_mtx)
 #define IF_UNLOCK(ifq)		mtx_unlock(&(ifq)->ifq_mtx)
 #define	IF_LOCK_ASSERT(ifq)	mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
 #define	_IF_QFULL(ifq)		((ifq)->ifq_len >= (ifq)->ifq_maxlen)
 #define	_IF_DROP(ifq)		((ifq)->ifq_drops++)
 #define	_IF_QLEN(ifq)		((ifq)->ifq_len)
 
 #define	_IF_ENQUEUE(ifq, m) do { 				\
 	(m)->m_nextpkt = NULL;					\
 	if ((ifq)->ifq_tail == NULL) 				\
 		(ifq)->ifq_head = m; 				\
 	else 							\
 		(ifq)->ifq_tail->m_nextpkt = m; 		\
 	(ifq)->ifq_tail = m; 					\
 	(ifq)->ifq_len++; 					\
 } while (0)
 
 #define IF_ENQUEUE(ifq, m) do {					\
 	IF_LOCK(ifq); 						\
 	_IF_ENQUEUE(ifq, m); 					\
 	IF_UNLOCK(ifq); 					\
 } while (0)
 
 #define	_IF_PREPEND(ifq, m) do {				\
 	(m)->m_nextpkt = (ifq)->ifq_head; 			\
 	if ((ifq)->ifq_tail == NULL) 				\
 		(ifq)->ifq_tail = (m); 				\
 	(ifq)->ifq_head = (m); 					\
 	(ifq)->ifq_len++; 					\
 } while (0)
 
 #define IF_PREPEND(ifq, m) do {		 			\
 	IF_LOCK(ifq); 						\
 	_IF_PREPEND(ifq, m); 					\
 	IF_UNLOCK(ifq); 					\
 } while (0)
 
 #define	_IF_DEQUEUE(ifq, m) do { 				\
 	(m) = (ifq)->ifq_head; 					\
 	if (m) { 						\
 		if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL)	\
 			(ifq)->ifq_tail = NULL; 		\
 		(m)->m_nextpkt = NULL; 				\
 		(ifq)->ifq_len--; 				\
 	} 							\
 } while (0)
 
 #define IF_DEQUEUE(ifq, m) do { 				\
 	IF_LOCK(ifq); 						\
 	_IF_DEQUEUE(ifq, m); 					\
 	IF_UNLOCK(ifq); 					\
 } while (0)
 
 #define	_IF_POLL(ifq, m)	((m) = (ifq)->ifq_head)
 #define	IF_POLL(ifq, m)		_IF_POLL(ifq, m)
 
 #define _IF_DRAIN(ifq) do { 					\
 	struct mbuf *m; 					\
 	for (;;) { 						\
 		_IF_DEQUEUE(ifq, m); 				\
 		if (m == NULL) 					\
 			break; 					\
 		m_freem(m); 					\
 	} 							\
 } while (0)
 
 #define IF_DRAIN(ifq) do {					\
 	IF_LOCK(ifq);						\
 	_IF_DRAIN(ifq);						\
 	IF_UNLOCK(ifq);						\
 } while(0)
 
 #ifdef _KERNEL
 /* interface address change event */
 typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *);
 EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t);
 /* new interface arrival event */
 typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *);
 EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t);
 /* interface departure event */
 typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *);
 EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
 
 /*
  * interface groups
  */
 struct ifg_group {
 	char				 ifg_group[IFNAMSIZ];
 	u_int				 ifg_refcnt;
 	void				*ifg_pf_kif;
 	TAILQ_HEAD(, ifg_member)	 ifg_members;
 	TAILQ_ENTRY(ifg_group)		 ifg_next;
 };
 
 struct ifg_member {
 	TAILQ_ENTRY(ifg_member)	 ifgm_next;
 	struct ifnet		*ifgm_ifp;
 };
 
 struct ifg_list {
 	struct ifg_group	*ifgl_group;
 	TAILQ_ENTRY(ifg_list)	 ifgl_next;
 };
 
 /* group attach event */
 typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
 EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
 /* group detach event */
 typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *);
 EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
 /* group change event */
 typedef void (*group_change_event_handler_t)(void *, const char *);
 EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
 
 #define	IF_AFDATA_LOCK_INIT(ifp)	\
     mtx_init(&(ifp)->if_afdata_mtx, "if_afdata", NULL, MTX_DEF)
 #define	IF_AFDATA_LOCK(ifp)	mtx_lock(&(ifp)->if_afdata_mtx)
 #define	IF_AFDATA_TRYLOCK(ifp)	mtx_trylock(&(ifp)->if_afdata_mtx)
 #define	IF_AFDATA_UNLOCK(ifp)	mtx_unlock(&(ifp)->if_afdata_mtx)
 #define	IF_AFDATA_DESTROY(ifp)	mtx_destroy(&(ifp)->if_afdata_mtx)
 
 #define	IFF_LOCKGIANT(ifp) do {						\
 	if ((ifp)->if_flags & IFF_NEEDSGIANT)				\
 		mtx_lock(&Giant);					\
 } while (0)
 
 #define	IFF_UNLOCKGIANT(ifp) do {					\
 	if ((ifp)->if_flags & IFF_NEEDSGIANT)				\
 		mtx_unlock(&Giant);					\
 } while (0)
 
 int	if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
 	    int adjust);
 #define	IF_HANDOFF(ifq, m, ifp)			\
 	if_handoff((struct ifqueue *)ifq, m, ifp, 0)
 #define	IF_HANDOFF_ADJ(ifq, m, ifp, adj)	\
 	if_handoff((struct ifqueue *)ifq, m, ifp, adj)
 
 void	if_start(struct ifnet *);
 
 #define	IFQ_ENQUEUE(ifq, m, err)					\
 do {									\
 	IF_LOCK(ifq);							\
 	if (ALTQ_IS_ENABLED(ifq))					\
 		ALTQ_ENQUEUE(ifq, m, NULL, err);			\
 	else {								\
 		if (_IF_QFULL(ifq)) {					\
 			m_freem(m);					\
 			(err) = ENOBUFS;				\
 		} else {						\
 			_IF_ENQUEUE(ifq, m);				\
 			(err) = 0;					\
 		}							\
 	}								\
 	if (err)							\
 		(ifq)->ifq_drops++;					\
 	IF_UNLOCK(ifq);							\
 } while (0)
 
 #define	IFQ_DEQUEUE_NOLOCK(ifq, m)					\
 do {									\
 	if (TBR_IS_ENABLED(ifq))					\
 		(m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE);		\
 	else if (ALTQ_IS_ENABLED(ifq))					\
 		ALTQ_DEQUEUE(ifq, m);					\
 	else								\
 		_IF_DEQUEUE(ifq, m);					\
 } while (0)
 
 #define	IFQ_DEQUEUE(ifq, m)						\
 do {									\
 	IF_LOCK(ifq);							\
 	IFQ_DEQUEUE_NOLOCK(ifq, m);					\
 	IF_UNLOCK(ifq);							\
 } while (0)
 
 #define	IFQ_POLL_NOLOCK(ifq, m)						\
 do {									\
 	if (TBR_IS_ENABLED(ifq))					\
 		(m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL);			\
 	else if (ALTQ_IS_ENABLED(ifq))					\
 		ALTQ_POLL(ifq, m);					\
 	else								\
 		_IF_POLL(ifq, m);					\
 } while (0)
 
 #define	IFQ_POLL(ifq, m)						\
 do {									\
 	IF_LOCK(ifq);							\
 	IFQ_POLL_NOLOCK(ifq, m);					\
 	IF_UNLOCK(ifq);							\
 } while (0)
 
 #define	IFQ_PURGE_NOLOCK(ifq)						\
 do {									\
 	if (ALTQ_IS_ENABLED(ifq)) {					\
 		ALTQ_PURGE(ifq);					\
 	} else								\
 		_IF_DRAIN(ifq);						\
 } while (0)
 
 #define	IFQ_PURGE(ifq)							\
 do {									\
 	IF_LOCK(ifq);							\
 	IFQ_PURGE_NOLOCK(ifq);						\
 	IF_UNLOCK(ifq);							\
 } while (0)
 
 #define	IFQ_SET_READY(ifq)						\
 	do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
 
 #define	IFQ_LOCK(ifq)			IF_LOCK(ifq)
 #define	IFQ_UNLOCK(ifq)			IF_UNLOCK(ifq)
 #define	IFQ_LOCK_ASSERT(ifq)		IF_LOCK_ASSERT(ifq)
 #define	IFQ_IS_EMPTY(ifq)		((ifq)->ifq_len == 0)
 #define	IFQ_INC_LEN(ifq)		((ifq)->ifq_len++)
 #define	IFQ_DEC_LEN(ifq)		(--(ifq)->ifq_len)
 #define	IFQ_INC_DROPS(ifq)		((ifq)->ifq_drops++)
 #define	IFQ_SET_MAXLEN(ifq, len)	((ifq)->ifq_maxlen = (len))
 
 /*
  * The IFF_DRV_OACTIVE test should really occur in the device driver, not in
  * the handoff logic, as that flag is locked by the device driver.
  */
 #define	IFQ_HANDOFF_ADJ(ifp, m, adj, err)				\
 do {									\
 	int len;							\
 	short mflags;							\
 									\
 	len = (m)->m_pkthdr.len;					\
 	mflags = (m)->m_flags;						\
 	IFQ_ENQUEUE(&(ifp)->if_snd, m, err);				\
 	if ((err) == 0) {						\
 		(ifp)->if_obytes += len + (adj);			\
 		if (mflags & M_MCAST)					\
 			(ifp)->if_omcasts++;				\
 		if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0)	\
 			if_start(ifp);					\
 	}								\
 } while (0)
 
 #define	IFQ_HANDOFF(ifp, m, err)					\
 	IFQ_HANDOFF_ADJ(ifp, m, 0, err)
 
 #define	IFQ_DRV_DEQUEUE(ifq, m)						\
 do {									\
 	(m) = (ifq)->ifq_drv_head;					\
 	if (m) {							\
 		if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL)	\
 			(ifq)->ifq_drv_tail = NULL;			\
 		(m)->m_nextpkt = NULL;					\
 		(ifq)->ifq_drv_len--;					\
 	} else {							\
 		IFQ_LOCK(ifq);						\
 		IFQ_DEQUEUE_NOLOCK(ifq, m);				\
 		while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) {	\
 			struct mbuf *m0;				\
 			IFQ_DEQUEUE_NOLOCK(ifq, m0);			\
 			if (m0 == NULL)					\
 				break;					\
 			m0->m_nextpkt = NULL;				\
 			if ((ifq)->ifq_drv_tail == NULL)		\
 				(ifq)->ifq_drv_head = m0;		\
 			else						\
 				(ifq)->ifq_drv_tail->m_nextpkt = m0;	\
 			(ifq)->ifq_drv_tail = m0;			\
 			(ifq)->ifq_drv_len++;				\
 		}							\
 		IFQ_UNLOCK(ifq);					\
 	}								\
 } while (0)
 
 #define	IFQ_DRV_PREPEND(ifq, m)						\
 do {									\
 	(m)->m_nextpkt = (ifq)->ifq_drv_head;				\
 	if ((ifq)->ifq_drv_tail == NULL)				\
 		(ifq)->ifq_drv_tail = (m);				\
 	(ifq)->ifq_drv_head = (m);					\
 	(ifq)->ifq_drv_len++;						\
 } while (0)
 
 #define	IFQ_DRV_IS_EMPTY(ifq)						\
 	(((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
 
 #define	IFQ_DRV_PURGE(ifq)						\
 do {									\
 	struct mbuf *m, *n = (ifq)->ifq_drv_head;			\
 	while((m = n) != NULL) {					\
 		n = m->m_nextpkt;					\
 		m_freem(m);						\
 	}								\
 	(ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL;		\
 	(ifq)->ifq_drv_len = 0;						\
 	IFQ_PURGE(ifq);							\
 } while (0)
 
 /*
  * 72 was chosen below because it is the size of a TCP/IP
  * header (40) + the minimum mss (32).
  */
 #define	IF_MINMTU	72
 #define	IF_MAXMTU	65535
 
 #endif /* _KERNEL */
 
 /*
  * The ifaddr structure contains information about one address
  * of an interface.  They are maintained by the different address families,
  * are allocated and attached when an address is set, and are linked
  * together so all addresses for an interface can be located.
  *
  * NOTE: a 'struct ifaddr' is always at the beginning of a larger
  * chunk of malloc'ed memory, where we store the three addresses
  * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here.
  */
 struct ifaddr {
 	struct	sockaddr *ifa_addr;	/* address of interface */
 	struct	sockaddr *ifa_dstaddr;	/* other end of p-to-p link */
 #define	ifa_broadaddr	ifa_dstaddr	/* broadcast address interface */
 	struct	sockaddr *ifa_netmask;	/* used to determine subnet */
 	struct	if_data if_data;	/* not all members are meaningful */
 	struct	ifnet *ifa_ifp;		/* back-pointer to interface */
 	TAILQ_ENTRY(ifaddr) ifa_link;	/* queue macro glue */
 	void	(*ifa_rtrequest)	/* check or clean routes (+ or -)'d */
 		(int, struct rtentry *, struct rt_addrinfo *);
 	u_short	ifa_flags;		/* mostly rt_flags for cloning */
 	u_int	ifa_refcnt;		/* references to this structure */
 	int	ifa_metric;		/* cost of going out this interface */
 	int (*ifa_claim_addr)		/* check if an addr goes to this if */
 		(struct ifaddr *, struct sockaddr *);
 	struct mtx ifa_mtx;
 };
 #define	IFA_ROUTE	RTF_UP		/* route installed */
 
 /* for compatibility with other BSDs */
 #define	ifa_list	ifa_link
 
 #define	IFA_LOCK_INIT(ifa)	\
     mtx_init(&(ifa)->ifa_mtx, "ifaddr", NULL, MTX_DEF)
 #define	IFA_LOCK(ifa)		mtx_lock(&(ifa)->ifa_mtx)
 #define	IFA_UNLOCK(ifa)		mtx_unlock(&(ifa)->ifa_mtx)
 #define	IFA_DESTROY(ifa)	mtx_destroy(&(ifa)->ifa_mtx)
 
 /*
  * The prefix structure contains information about one prefix
  * of an interface.  They are maintained by the different address families,
  * are allocated and attached when a prefix or an address is set,
  * and are linked together so all prefixes for an interface can be located.
  */
 struct ifprefix {
 	struct	sockaddr *ifpr_prefix;	/* prefix of interface */
 	struct	ifnet *ifpr_ifp;	/* back-pointer to interface */
 	TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
 	u_char	ifpr_plen;		/* prefix length in bits */
 	u_char	ifpr_type;		/* protocol dependent prefix type */
 };
 
 /*
  * Multicast address structure.  This is analogous to the ifaddr
  * structure except that it keeps track of multicast addresses.
  */
 struct ifmultiaddr {
 	TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
 	struct	sockaddr *ifma_addr; 	/* address this membership is for */
 	struct	sockaddr *ifma_lladdr;	/* link-layer translation, if any */
 	struct	ifnet *ifma_ifp;	/* back-pointer to interface */
 	u_int	ifma_refcount;		/* reference count */
 	void	*ifma_protospec;	/* protocol-specific state, if any */
 	struct	ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
 };
 
 #ifdef _KERNEL
 #define	IFAFREE(ifa)					\
 	do {						\
 		IFA_LOCK(ifa);				\
 		KASSERT((ifa)->ifa_refcnt > 0,		\
 		    ("ifa %p !(ifa_refcnt > 0)", ifa));	\
 		if (--(ifa)->ifa_refcnt == 0) {		\
 			IFA_DESTROY(ifa);		\
 			free(ifa, M_IFADDR);		\
 		} else 					\
 			IFA_UNLOCK(ifa);		\
 	} while (0)
 
 #define IFAREF(ifa)					\
 	do {						\
 		IFA_LOCK(ifa);				\
 		++(ifa)->ifa_refcnt;			\
 		IFA_UNLOCK(ifa);			\
 	} while (0)
 
 extern	struct mtx ifnet_lock;
 #define	IFNET_LOCK_INIT() \
     mtx_init(&ifnet_lock, "ifnet", NULL, MTX_DEF | MTX_RECURSE)
 #define	IFNET_WLOCK()		mtx_lock(&ifnet_lock)
 #define	IFNET_WUNLOCK()		mtx_unlock(&ifnet_lock)
 #define	IFNET_RLOCK()		IFNET_WLOCK()
 #define	IFNET_RUNLOCK()		IFNET_WUNLOCK()
 
 struct ifindex_entry {
 	struct	ifnet *ife_ifnet;
 	struct cdev *ife_dev;
 };
 
 #define ifnet_byindex(idx)	ifindex_table[(idx)].ife_ifnet
 /*
  * Given the index, ifaddr_byindex() returns the one and only
  * link-level ifaddr for the interface. You are not supposed to use
  * it to traverse the list of addresses associated to the interface.
  */
 #define ifaddr_byindex(idx)	ifnet_byindex(idx)->if_addr
 #define ifdev_byindex(idx)	ifindex_table[(idx)].ife_dev
 
 extern	struct ifnethead ifnet;
 extern	struct ifindex_entry *ifindex_table;
 extern	int ifqmaxlen;
 extern	struct ifnet *loif;	/* first loopback interface */
 extern	int if_index;
 
 int	if_addgroup(struct ifnet *, const char *);
 int	if_delgroup(struct ifnet *, const char *);
 int	if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **);
 int	if_allmulti(struct ifnet *, int);
 struct	ifnet* if_alloc(u_char);
 void	if_attach(struct ifnet *);
 int	if_delmulti(struct ifnet *, struct sockaddr *);
 void	if_delmulti_ifma(struct ifmultiaddr *);
 void	if_detach(struct ifnet *);
 void	if_purgeaddrs(struct ifnet *);
 void	if_purgemaddrs(struct ifnet *);
 void	if_down(struct ifnet *);
 struct ifmultiaddr *
 	if_findmulti(struct ifnet *, struct sockaddr *);
 void	if_free(struct ifnet *);
 void	if_free_type(struct ifnet *, u_char);
 void	if_initname(struct ifnet *, const char *, int);
 void	if_link_state_change(struct ifnet *, int);
 int	if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
 int	if_setlladdr(struct ifnet *, const u_char *, int);
 void	if_up(struct ifnet *);
 /*void	ifinit(void);*/ /* declared in systm.h for main() */
 int	ifioctl(struct socket *, u_long, caddr_t, struct thread *);
 int	ifpromisc(struct ifnet *, int);
 struct	ifnet *ifunit(const char *);
 
 struct	ifaddr *ifa_ifwithaddr(struct sockaddr *);
 struct	ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
 struct	ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
 struct	ifaddr *ifa_ifwithnet(struct sockaddr *);
 struct	ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
+struct	ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
+
 struct	ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
 
 int	if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
 
 typedef	void *if_com_alloc_t(u_char type, struct ifnet *ifp);
 typedef	void if_com_free_t(void *com, u_char type);
 void	if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
 void	if_deregister_com_alloc(u_char type);
 
 #define IF_LLADDR(ifp)							\
     LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
 
 #ifdef DEVICE_POLLING
 enum poll_cmd {	POLL_ONLY, POLL_AND_CHECK_STATUS };
 
 typedef	void poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
 int    ether_poll_register(poll_handler_t *h, struct ifnet *ifp);
 int    ether_poll_deregister(struct ifnet *ifp);
 #endif /* DEVICE_POLLING */
 
 #endif /* _KERNEL */
 
 #endif /* !_NET_IF_VAR_H_ */
Index: head/sys/net/radix_mpath.c
===================================================================
--- head/sys/net/radix_mpath.c	(revision 178887)
+++ head/sys/net/radix_mpath.c	(revision 178888)
@@ -1,340 +1,340 @@
 /*	$KAME: radix_mpath.c,v 1.17 2004/11/08 10:29:39 itojun Exp $	*/
 
 /*
  * Copyright (C) 2001 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
  * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
  * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
  * PROPERTIES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/domain.h>
 #include <sys/syslog.h>
 #include <net/radix.h>
 #include <net/radix_mpath.h>
 #include <net/route.h>
 #include <net/if.h>
 #include <net/if_var.h>
 
 /*
  * give some jitter to hash, to avoid synchronization between routers
  */
 static u_int32_t hashjitter;
 
 int
 rn_mpath_capable(struct radix_node_head *rnh)
 {
 
 	return rnh->rnh_multipath;
 }
 
 struct radix_node *
 rn_mpath_next(struct radix_node *rn)
 {
 	struct radix_node *next;
 
 	if (!rn->rn_dupedkey)
 		return NULL;
 	next = rn->rn_dupedkey;
 	if (rn->rn_mask == next->rn_mask)
 		return next;
 	else
 		return NULL;
 }
 
 int
 rn_mpath_count(struct radix_node *rn)
 {
 	int i;
 
 	i = 1;
 	while ((rn = rn_mpath_next(rn)) != NULL)
 		i++;
 	return i;
 }
 
 struct rtentry *
 rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
 {
 	struct radix_node *rn;
 
 	if (!rn_mpath_next((struct radix_node *)rt))
 		return rt;
 
 	if (!gate)
 		return NULL;
 
 	/* beyond here, we use rn as the master copy */
 	rn = (struct radix_node *)rt;
 	do {
 		rt = (struct rtentry *)rn;
 		/*
 		 * we are removing an address alias that has 
 		 * the same prefix as another address
 		 * we need to compare the interface address because
 		 * rt_gateway is a special sockadd_dl structure
 		 */
 		if (rt->rt_gateway->sa_family == AF_LINK) {
 			if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len))
 				break;
 		} else {
 			if (rt->rt_gateway->sa_len == gate->sa_len &&
 			    !memcmp(rt->rt_gateway, gate, gate->sa_len))
 				break;
 		}
 	} while ((rn = rn_mpath_next(rn)) != NULL);
 
 	return (struct rtentry *)rn;
 }
 
 /* 
  * go through the chain and unlink "rt" from the list
  * the caller will free "rt"
  */
 int
 rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
 {
         struct radix_node *t, *tt;
 
         if (!headrt || !rt)
             return (0);
         t = (struct radix_node *)headrt;
         tt = rn_mpath_next(t);
         while (tt) {
             if (tt == (struct radix_node *)rt) {
                 t->rn_dupedkey = tt->rn_dupedkey;
                 tt->rn_dupedkey = NULL;
     	        tt->rn_flags &= ~RNF_ACTIVE;
 	        tt[1].rn_flags &= ~RNF_ACTIVE;
                 return (1);
             }
             t = tt;
             tt = rn_mpath_next((struct radix_node *)t);
         }
         return (0);
 }
 
 /*
  * check if we have the same key/mask/gateway on the table already.
  */
 int
 rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
     struct sockaddr *netmask)
 {
 	struct radix_node *rn, *rn1;
 	struct rtentry *rt1;
 	char *p, *q, *eq;
 	int same, l, skip;
 
 	rn = (struct radix_node *)rt;
 	rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
 	if (!rn1 || rn1->rn_flags & RNF_ROOT)
 		return 0;
 
 	/*
 	 * unlike other functions we have in this file, we have to check
 	 * all key/mask/gateway as rnh_lookup can match less specific entry.
 	 */
 	rt1 = (struct rtentry *)rn1;
 
 	/* compare key. */
 	if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
 	    bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
 		goto different;
 
 	/* key was the same.  compare netmask.  hairy... */
 	if (rt_mask(rt1) && netmask) {
 		skip = rnh->rnh_treetop->rn_offset;
 		if (rt_mask(rt1)->sa_len > netmask->sa_len) {
 			/*
 			 * as rt_mask(rt1) is made optimal by radix.c,
 			 * there must be some 1-bits on rt_mask(rt1)
 			 * after netmask->sa_len.  therefore, in
 			 * this case, the entries are different.
 			 */
 			if (rt_mask(rt1)->sa_len > skip)
 				goto different;
 			else {
 				/* no bits to compare, i.e. same*/
 				goto maskmatched;
 			}
 		}
 
 		l = rt_mask(rt1)->sa_len;
 		if (skip > l) {
 			/* no bits to compare, i.e. same */
 			goto maskmatched;
 		}
 		p = (char *)rt_mask(rt1);
 		q = (char *)netmask;
 		if (bcmp(p + skip, q + skip, l - skip))
 			goto different;
 		/*
 		 * need to go through all the bit, as netmask is not
 		 * optimal and can contain trailing 0s
 		 */
 		eq = (char *)netmask + netmask->sa_len;
 		q += l;
 		same = 1;
 		while (eq > q)
 			if (*q++) {
 				same = 0;
 				break;
 			}
 		if (!same)
 			goto different;
 	} else if (!rt_mask(rt1) && !netmask)
 		; /* no mask to compare, i.e. same */
 	else {
 		/* one has mask and the other does not, different */
 		goto different;
 	}
 
 maskmatched:
 
 	/* key/mask were the same.  compare gateway for all multipaths */
 	do {
 		rt1 = (struct rtentry *)rn1;
 
 		/* sanity: no use in comparing the same thing */
 		if (rn1 == rn)
 			continue;
         
 		if (rt1->rt_gateway->sa_family == AF_LINK) {
 			if (rt1->rt_ifa->ifa_addr->sa_len != rt->rt_ifa->ifa_addr->sa_len ||
 			    bcmp(rt1->rt_ifa->ifa_addr, rt->rt_ifa->ifa_addr, 
 			    rt1->rt_ifa->ifa_addr->sa_len))
 				continue;
 		} else {
 			if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
 			    bcmp(rt1->rt_gateway, rt->rt_gateway,
 			    rt1->rt_gateway->sa_len))
 				continue;
 		}
 
 		/* all key/mask/gateway are the same.  conflicting entry. */
 		return EEXIST;
 	} while ((rn1 = rn_mpath_next(rn1)) != NULL);
 
 different:
 	return 0;
 }
 
 void
-rtalloc_mpath(struct route *ro, int hash)
+rtalloc_mpath_fib(struct route *ro, int hash, u_int fibnum)
 {
 	struct radix_node *rn0, *rn;
 	int n;
 
 	/*
 	 * XXX we don't attempt to lookup cached route again; what should
 	 * be done for sendto(3) case?
 	 */
 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
 		return;				 /* XXX */
-	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0UL);
+	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0UL, fibnum);
 
 	/* if the route does not exist or it is not multipath, don't care */
 	if (ro->ro_rt == NULL)
 		return;
 	if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) {
 		RT_UNLOCK(ro->ro_rt);
 		return;
 	}
 
 	/* beyond here, we use rn as the master copy */
 	rn0 = rn = (struct radix_node *)ro->ro_rt;
 	n = rn_mpath_count(rn0);
 
 	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
 	hash += hashjitter;
 	hash %= n;
 	while (hash-- > 0 && rn) {
 		/* stay within the multipath routes */
 		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
 			break;
 		rn = rn->rn_dupedkey;
 	}
 
 	/* XXX try filling rt_gwroute and avoid unreachable gw  */
 
 	/* if gw selection fails, use the first match (default) */
 	if (!rn) {
 		RT_UNLOCK(ro->ro_rt);
 		return;
 	}
 	
 	rtfree(ro->ro_rt);
 	ro->ro_rt = (struct rtentry *)rn;
 	RT_LOCK(ro->ro_rt);
 	RT_ADDREF(ro->ro_rt);
 	RT_UNLOCK(ro->ro_rt);
 }
 
 extern int	in6_inithead(void **head, int off);
 extern int	in_inithead(void **head, int off);
 
 int
 rn4_mpath_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	hashjitter = arc4random();
 	if (in_inithead(head, off) == 1) {
 		rnh = (struct radix_node_head *)*head;
 		rnh->rnh_multipath = 1;
 		return 1;
 	} else
 		return 0;
 }
 
 #ifdef INET6
 int
 rn6_mpath_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	hashjitter = arc4random();
 	if (in6_inithead(head, off) == 1) {
 		rnh = (struct radix_node_head *)*head;
 		rnh->rnh_multipath = 1;
 		return 1;
 	} else
 		return 0;
 }
 
 #endif
Index: head/sys/net/radix_mpath.h
===================================================================
--- head/sys/net/radix_mpath.h	(revision 178887)
+++ head/sys/net/radix_mpath.h	(revision 178888)
@@ -1,62 +1,63 @@
 /*	$KAME: radix_mpath.h,v 1.10 2004/11/06 15:44:28 itojun Exp $	*/
 
 /*
  * Copyright (C) 2001 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
  * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
  * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
  * PROPERTIES.
  */
 /* $FreeBSD$ */
 
 #ifndef _NET_RADIX_MPATH_H_
 #define	_NET_RADIX_MPATH_H_
 
 #ifdef _KERNEL
 /*
  * Radix tree API with multipath support
  */
 struct route;
 struct rtentry;
 struct sockaddr;
 int	rn_mpath_capable(struct radix_node_head *);
 struct radix_node *rn_mpath_next(struct radix_node *);
 int rn_mpath_count(struct radix_node *);
 struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
 int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
     struct sockaddr *);
-void rtalloc_mpath(struct route *, int);
+void rtalloc_mpath_fib(struct route *, int, u_int);
+#define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
 struct radix_node *rn_mpath_lookup(void *, void *,
     struct radix_node_head *);
 int rt_mpath_deldup(struct rtentry *, struct rtentry *);
 int	rn4_mpath_inithead(void **, int);
 int	rn6_mpath_inithead(void **, int);
 
 #endif
 
 #endif /* _NET_RADIX_MPATH_H_ */
Index: head/sys/net/route.c
===================================================================
--- head/sys/net/route.c	(revision 178887)
+++ head/sys/net/route.c	(revision 178888)
@@ -1,1481 +1,1743 @@
 /*-
  * Copyright (c) 1980, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
  * $FreeBSD$
  */
+/************************************************************************
+ * Note: In this file a 'fib' is a "forwarding information base"	*
+ * Which is the new name for an in kernel routing (next hop) table.	*
+ ***********************************************************************/
 
 #include "opt_inet.h"
+#include "opt_route.h"
 #include "opt_mrouting.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/ip_mroute.h>
 
 #include <vm/uma.h>
 
+#ifndef ROUTETABLES
+ #define RT_NUMFIBS 1
+ #define RT_MAXFIBS 1
+#else
+ /* while we use 4 bits in the mbuf flags,
+  * we are limited to 16
+  */
+ #if ROUTETABLES > RT_MAXFIBS
+  #define RT_NUMFIBS RT_MAXFIBS
+  #error "ROUTETABLES defined too big"
+ #else
+  #if ROUTETABLES == 0
+   #define RT_NUMFIBS 1
+  #else
+   #define RT_NUMFIBS ROUTETABLES
+  #endif
+ #endif
+#endif
+
+u_int rt_numfibs = RT_NUMFIBS;
+SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
+/* Eventually this will be a tunable */
+TUNABLE_INT("net.fibs", &rt_numfibs);
+
 static struct rtstat rtstat;
-struct radix_node_head *rt_tables[AF_MAX+1];
 
+/* by default only the first 'row' of tables will be accessed. */
+/* 
+ * XXXMRT When we fix netstat, and do this differnetly,
+ * we can allocate this dynamically. As long as we are keeping
+ * things backwards compaitble we need to allocate this 
+ * statically.
+ */
+struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1];
+
 static int	rttrash;		/* routes not in table but not freed */
 
 static void rt_maskedcopy(struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *);
-static void rtable_init(void **);
 
 /* compare two sockaddr structures */
 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
 
 /*
  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
  * The operation can be done safely (in this code) because a
  * 'struct rtentry' starts with two 'struct radix_node''s, the first
  * one representing leaf nodes in the routing tree, which is
  * what the code in radix.c passes us as a 'struct radix_node'.
  *
  * But because there are a lot of assumptions in this conversion,
  * do not cast explicitly, but always use the macro below.
  */
 #define RNTORT(p)	((struct rtentry *)(p))
 
-static void
-rtable_init(void **table)
+static uma_zone_t rtzone;		/* Routing table UMA zone. */
+
+#if 0
+/* default fib for tunnels to use */
+u_int tunnel_fib = 0;
+SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
+#endif
+
+/*
+ * handler for net.my_fibnum
+ */
+static int
+sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
 {
-	struct domain *dom;
-	for (dom = domains; dom; dom = dom->dom_next)
-		if (dom->dom_rtattach)
-			dom->dom_rtattach(&table[dom->dom_family],
-			    dom->dom_rtoffset);
+        int fibnum;
+        int error;
+ 
+        fibnum = curthread->td_proc->p_fibnum;
+        error = sysctl_handle_int(oidp, &fibnum, 0, req);
+        return (error);
 }
 
-static uma_zone_t rtzone;		/* Routing table UMA zone. */
+SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
+            NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
 
 static void
 route_init(void)
 {
+	int table;
+	struct domain *dom;
+	int fam;
+
+	/* whack teh tunable ints into  line. */
+	if (rt_numfibs > RT_MAXFIBS)
+		rt_numfibs = RT_MAXFIBS;
+	if (rt_numfibs == 0)
+		rt_numfibs = 1;
 	rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	rn_init();	/* initialize all zeroes, all ones, mask table */
-	rtable_init((void **)rt_tables);
+
+	for (dom = domains; dom; dom = dom->dom_next) {
+		if (dom->dom_rtattach)  {
+			for  (table = 0; table < rt_numfibs; table++) {
+				if ( (fam = dom->dom_family) == AF_INET ||
+				    table == 0) {
+ 			        	/* for now only AF_INET has > 1 table */
+					/* XXX MRT 
+					 * rtattach will be also called
+					 * from vfs_export.c but the
+					 * offset will be 0
+					 * (only for AF_INET and AF_INET6
+					 * which don't need it anyhow)
+					 */
+					dom->dom_rtattach(
+				    	    (void **)&rt_tables[table][fam],
+				    	    dom->dom_rtoffset);
+				} else {
+					break;
+				}
+			}
+		}
+	}
 }
 
+#ifndef _SYS_SYSPROTO_H_
+struct setfib_args {
+	int     fibnum;
+};
+#endif
+int
+setfib(struct thread *td, struct setfib_args *uap)
+{
+	if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
+		return EINVAL;
+	td->td_proc->p_fibnum = uap->fibnum;
+	return (0);
+}
+
 /*
  * Packet routing routines.
  */
 void
 rtalloc(struct route *ro)
 {
-	rtalloc_ign(ro, 0UL);
+	rtalloc_ign_fib(ro, 0UL, 0);
 }
 
 void
+rtalloc_fib(struct route *ro, u_int fibnum)
+{
+	rtalloc_ign_fib(ro, 0UL, fibnum);
+}
+
+void
 rtalloc_ign(struct route *ro, u_long ignore)
 {
 	struct rtentry *rt;
 
 	if ((rt = ro->ro_rt) != NULL) {
 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
 			return;
 		RTFREE(rt);
 		ro->ro_rt = NULL;
 	}
-	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore);
+	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
 	if (ro->ro_rt)
 		RT_UNLOCK(ro->ro_rt);
 }
 
+void
+rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
+{
+	struct rtentry *rt;
+
+	if ((rt = ro->ro_rt) != NULL) {
+		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
+			return;
+		RTFREE(rt);
+		ro->ro_rt = NULL;
+	}
+	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
+	if (ro->ro_rt)
+		RT_UNLOCK(ro->ro_rt);
+}
+
 /*
  * Look up the route that matches the address given
  * Or, at least try.. Create a cloned route if needed.
  *
  * The returned route, if any, is locked.
  */
 struct rtentry *
 rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
 {
-	struct radix_node_head *rnh = rt_tables[dst->sa_family];
+	return (rtalloc1_fib(dst, report, ignflags, 0));
+}
+
+struct rtentry *
+rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
+		    u_int fibnum)
+{
+	struct radix_node_head *rnh;
 	struct rtentry *rt;
 	struct radix_node *rn;
 	struct rtentry *newrt;
 	struct rt_addrinfo info;
 	u_long nflags;
 	int err = 0, msgtype = RTM_MISS;
 
+	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
+	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
+		fibnum = 0;
+	rnh = rt_tables[fibnum][dst->sa_family];
 	newrt = NULL;
 	/*
 	 * Look up the address in the table for that Address Family
 	 */
 	if (rnh == NULL) {
 		rtstat.rts_unreach++;
 		goto miss2;
 	}
 	RADIX_NODE_HEAD_LOCK(rnh);
 	if ((rn = rnh->rnh_matchaddr(dst, rnh)) &&
 	    (rn->rn_flags & RNF_ROOT) == 0) {
 		/*
 		 * If we find it and it's not the root node, then
 		 * get a reference on the rtentry associated.
 		 */
 		newrt = rt = RNTORT(rn);
 		nflags = rt->rt_flags & ~ignflags;
 		if (report && (nflags & RTF_CLONING)) {
 			/*
 			 * We are apparently adding (report = 0 in delete).
 			 * If it requires that it be cloned, do so.
 			 * (This implies it wasn't a HOST route.)
 			 */
-			err = rtrequest(RTM_RESOLVE, dst, NULL,
-					      NULL, 0, &newrt);
+			err = rtrequest_fib(RTM_RESOLVE, dst, NULL,
+					      NULL, 0, &newrt, fibnum);
 			if (err) {
 				/*
 				 * If the cloning didn't succeed, maybe
 				 * what we have will do. Return that.
 				 */
 				newrt = rt;		/* existing route */
 				RT_LOCK(newrt);
 				RT_ADDREF(newrt);
 				goto miss;
 			}
 			KASSERT(newrt, ("no route and no error"));
 			RT_LOCK(newrt);
 			if (newrt->rt_flags & RTF_XRESOLVE) {
 				/*
 				 * If the new route specifies it be
 				 * externally resolved, then go do that.
 				 */
 				msgtype = RTM_RESOLVE;
 				goto miss;
 			}
 			/* Inform listeners of the new route. */
 			bzero(&info, sizeof(info));
 			info.rti_info[RTAX_DST] = rt_key(newrt);
 			info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
 			info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
 			if (newrt->rt_ifp != NULL) {
 				info.rti_info[RTAX_IFP] =
 				    newrt->rt_ifp->if_addr->ifa_addr;
 				info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
 			}
 			rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
 		} else {
 			RT_LOCK(newrt);
 			RT_ADDREF(newrt);
 		}
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	} else {
 		/*
 		 * Either we hit the root or couldn't find any match,
 		 * Which basically means
 		 * "caint get there frm here"
 		 */
 		rtstat.rts_unreach++;
 	miss:
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	miss2:	if (report) {
 			/*
 			 * If required, report the failure to the supervising
 			 * Authorities.
 			 * For a delete, this is not an error. (report == 0)
 			 */
 			bzero(&info, sizeof(info));
 			info.rti_info[RTAX_DST] = dst;
 			rt_missmsg(msgtype, &info, 0, err);
 		}
 	}
 	if (newrt)
 		RT_LOCK_ASSERT(newrt);
 	return (newrt);
 }
 
 /*
  * Remove a reference count from an rtentry.
  * If the count gets low enough, take it out of the routing table
  */
 void
 rtfree(struct rtentry *rt)
 {
 	struct radix_node_head *rnh;
 
 	KASSERT(rt != NULL,("%s: NULL rt", __func__));
-	rnh = rt_tables[rt_key(rt)->sa_family];
+	rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
 	KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
 
 	RT_LOCK_ASSERT(rt);
 
 	/*
 	 * The callers should use RTFREE_LOCKED() or RTFREE(), so
 	 * we should come here exactly with the last reference.
 	 */
 	RT_REMREF(rt);
 	if (rt->rt_refcnt > 0) {
 		printf("%s: %p has %lu refs\n", __func__, rt, rt->rt_refcnt);
 		goto done;
 	}
 
 	/*
 	 * On last reference give the "close method" a chance
 	 * to cleanup private state.  This also permits (for
 	 * IPv4 and IPv6) a chance to decide if the routing table
 	 * entry should be purged immediately or at a later time.
 	 * When an immediate purge is to happen the close routine
 	 * typically calls rtexpunge which clears the RTF_UP flag
 	 * on the entry so that the code below reclaims the storage.
 	 */
 	if (rt->rt_refcnt == 0 && rnh->rnh_close)
 		rnh->rnh_close((struct radix_node *)rt, rnh);
 
 	/*
 	 * If we are no longer "up" (and ref == 0)
 	 * then we can free the resources associated
 	 * with the route.
 	 */
 	if ((rt->rt_flags & RTF_UP) == 0) {
 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic("rtfree 2");
 		/*
 		 * the rtentry must have been removed from the routing table
 		 * so it is represented in rttrash.. remove that now.
 		 */
 		rttrash--;
 #ifdef	DIAGNOSTIC
 		if (rt->rt_refcnt < 0) {
 			printf("rtfree: %p not freed (neg refs)\n", rt);
 			goto done;
 		}
 #endif
 		/*
 		 * release references on items we hold them on..
 		 * e.g other routes and ifaddrs.
 		 */
 		if (rt->rt_ifa)
 			IFAFREE(rt->rt_ifa);
 		rt->rt_parent = NULL;		/* NB: no refcnt on parent */
 
 		/*
 		 * The key is separatly alloc'd so free it (see rt_setgate()).
 		 * This also frees the gateway, as they are always malloc'd
 		 * together.
 		 */
 		Free(rt_key(rt));
 
 		/*
 		 * and the rtentry itself of course
 		 */
 		RT_LOCK_DESTROY(rt);
 		uma_zfree(rtzone, rt);
 		return;
 	}
 done:
 	RT_UNLOCK(rt);
 }
 
 
 /*
  * Force a routing table entry to the specified
  * destination to go through the given gateway.
  * Normally called as a result of a routing redirect
  * message from the network layer.
  */
 void
 rtredirect(struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
 	struct sockaddr *src)
 {
+	rtredirect_fib(dst, gateway, netmask, flags, src, 0);
+}
+
+void
+rtredirect_fib(struct sockaddr *dst,
+	struct sockaddr *gateway,
+	struct sockaddr *netmask,
+	int flags,
+	struct sockaddr *src,
+	u_int fibnum)
+{
 	struct rtentry *rt, *rt0 = NULL;
 	int error = 0;
 	short *stat = NULL;
 	struct rt_addrinfo info;
 	struct ifaddr *ifa;
 
 	/* verify the gateway is directly reachable */
 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
 		error = ENETUNREACH;
 		goto out;
 	}
-	rt = rtalloc1(dst, 0, 0UL);	/* NB: rt is locked */
+	rt = rtalloc1_fib(dst, 0, 0UL, fibnum);	/* NB: rt is locked */
 	/*
 	 * If the redirect isn't from our current router for this dst,
 	 * it's either old or wrong.  If it redirects us to ourselves,
 	 * we have a routing loop, perhaps as a result of an interface
 	 * going down recently.
 	 */
 	if (!(flags & RTF_DONE) && rt &&
 	     (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
 		error = EINVAL;
 	else if (ifa_ifwithaddr(gateway))
 		error = EHOSTUNREACH;
 	if (error)
 		goto done;
 	/*
 	 * Create a new entry if we just got back a wildcard entry
 	 * or the the lookup failed.  This is necessary for hosts
 	 * which use routing redirects generated by smart gateways
 	 * to dynamically build the routing tables.
 	 */
 	if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
 		goto create;
 	/*
 	 * Don't listen to the redirect if it's
 	 * for a route to an interface.
 	 */
 	if (rt->rt_flags & RTF_GATEWAY) {
 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
 			/*
 			 * Changing from route to net => route to host.
 			 * Create new route, rather than smashing route to net.
 			 */
 		create:
 			rt0 = rt;
 			rt = NULL;
 		
 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
 			bzero((caddr_t)&info, sizeof(info));
 			info.rti_info[RTAX_DST] = dst;
 			info.rti_info[RTAX_GATEWAY] = gateway;
 			info.rti_info[RTAX_NETMASK] = netmask;
 			info.rti_ifa = ifa;
 			info.rti_flags = flags;
-			error = rtrequest1(RTM_ADD, &info, &rt);
+			error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
 			if (rt != NULL) {
 				RT_LOCK(rt);
 				EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
 				flags = rt->rt_flags;
 			}
 			if (rt0)
 				RTFREE_LOCKED(rt0);
 			
 			stat = &rtstat.rts_dynamic;
 		} else {
 			struct rtentry *gwrt;
 
 			/*
 			 * Smash the current notion of the gateway to
 			 * this destination.  Should check about netmask!!!
 			 */
 			rt->rt_flags |= RTF_MODIFIED;
 			flags |= RTF_MODIFIED;
 			stat = &rtstat.rts_newgateway;
 			/*
 			 * add the key and gateway (in one malloc'd chunk).
 			 */
 			rt_setgate(rt, rt_key(rt), gateway);
 			gwrt = rtalloc1(gateway, 1, 0);
 			EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
 			RTFREE_LOCKED(gwrt);
 		}
 	} else
 		error = EHOSTUNREACH;
 done:
 	if (rt)
 		RTFREE_LOCKED(rt);
 out:
 	if (error)
 		rtstat.rts_badredirect++;
 	else if (stat != NULL)
 		(*stat)++;
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_GATEWAY] = gateway;
 	info.rti_info[RTAX_NETMASK] = netmask;
 	info.rti_info[RTAX_AUTHOR] = src;
 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
 }
 
+int
+rtioctl(u_long req, caddr_t data)
+{
+	return (rtioctl_fib(req, data, 0));
+}
+
 /*
  * Routing table ioctl interface.
  */
 int
-rtioctl(u_long req, caddr_t data)
+rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
 {
 
 	/*
 	 * If more ioctl commands are added here, make sure the proper
 	 * super-user checks are being performed because it is possible for
 	 * prison-root to make it this far if raw sockets have been enabled
 	 * in jails.
 	 */
 #ifdef INET
 	/* Multicast goop, grrr... */
-	return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
+	return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
 #else /* INET */
 	return ENXIO;
 #endif /* INET */
 }
 
 struct ifaddr *
 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
 {
+	return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
+}
+
+struct ifaddr *
+ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
+				u_int fibnum)
+{
 	register struct ifaddr *ifa;
 	int not_found = 0;
 
 	if ((flags & RTF_GATEWAY) == 0) {
 		/*
 		 * If we are adding a route to an interface,
 		 * and the interface is a pt to pt link
 		 * we should search for the destination
 		 * as our clue to the interface.  Otherwise
 		 * we can use the local address.
 		 */
 		ifa = NULL;
 		if (flags & RTF_HOST)
 			ifa = ifa_ifwithdstaddr(dst);
 		if (ifa == NULL)
 			ifa = ifa_ifwithaddr(gateway);
 	} else {
 		/*
 		 * If we are adding a route to a remote net
 		 * or host, the gateway may still be on the
 		 * other end of a pt to pt link.
 		 */
 		ifa = ifa_ifwithdstaddr(gateway);
 	}
 	if (ifa == NULL)
 		ifa = ifa_ifwithnet(gateway);
 	if (ifa == NULL) {
-		struct rtentry *rt = rtalloc1(gateway, 0, 0UL);
+		struct rtentry *rt = rtalloc1_fib(gateway, 0, 0UL, fibnum);
 		if (rt == NULL)
 			return (NULL);
 		/*
 		 * dismiss a gateway that is reachable only
 		 * through the default router
 		 */
 		switch (gateway->sa_family) {
 		case AF_INET:
 			if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
 				not_found = 1;
 			break;
 		case AF_INET6:
 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
 				not_found = 1;
 			break;
 		default:
 			break;
 		}
 		RT_REMREF(rt);
 		RT_UNLOCK(rt);
 		if (not_found)
 			return (NULL);
 		if ((ifa = rt->rt_ifa) == NULL)
 			return (NULL);
 	}
 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
 		struct ifaddr *oifa = ifa;
 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
 		if (ifa == NULL)
 			ifa = oifa;
 	}
 	return (ifa);
 }
 
 static walktree_f_t rt_fixdelete;
 static walktree_f_t rt_fixchange;
 
 struct rtfc_arg {
 	struct rtentry *rt0;
 	struct radix_node_head *rnh;
 };
 
 /*
  * Do appropriate manipulations of a routing tree given
  * all the bits of info needed
  */
 int
 rtrequest(int req,
 	struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
 	struct rtentry **ret_nrt)
 {
+	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
+}
+
+int
+rtrequest_fib(int req,
+	struct sockaddr *dst,
+	struct sockaddr *gateway,
+	struct sockaddr *netmask,
+	int flags,
+	struct rtentry **ret_nrt,
+	u_int fibnum)
+{
 	struct rt_addrinfo info;
 
 	if (dst->sa_len == 0)
 		return(EINVAL);
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_flags = flags;
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_GATEWAY] = gateway;
 	info.rti_info[RTAX_NETMASK] = netmask;
-	return rtrequest1(req, &info, ret_nrt);
+	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
 }
 
 /*
  * These (questionable) definitions of apparent local variables apply
  * to the next two functions.  XXXXXX!!!
  */
 #define	dst	info->rti_info[RTAX_DST]
 #define	gateway	info->rti_info[RTAX_GATEWAY]
 #define	netmask	info->rti_info[RTAX_NETMASK]
 #define	ifaaddr	info->rti_info[RTAX_IFA]
 #define	ifpaddr	info->rti_info[RTAX_IFP]
 #define	flags	info->rti_flags
 
 int
 rt_getifa(struct rt_addrinfo *info)
 {
+	return (rt_getifa_fib(info, 0));
+}
+
+int
+rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
+{
 	struct ifaddr *ifa;
 	int error = 0;
 
 	/*
 	 * ifp may be specified by sockaddr_dl
 	 * when protocol address is ambiguous.
 	 */
 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
 	    ifpaddr->sa_family == AF_LINK &&
 	    (ifa = ifa_ifwithnet(ifpaddr)) != NULL)
 		info->rti_ifp = ifa->ifa_ifp;
 	if (info->rti_ifa == NULL && ifaaddr != NULL)
 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
 	if (info->rti_ifa == NULL) {
 		struct sockaddr *sa;
 
 		sa = ifaaddr != NULL ? ifaaddr :
 		    (gateway != NULL ? gateway : dst);
 		if (sa != NULL && info->rti_ifp != NULL)
 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
 		else if (dst != NULL && gateway != NULL)
-			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
+			info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
+							fibnum);
 		else if (sa != NULL)
-			info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
+			info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
+							fibnum);
 	}
 	if ((ifa = info->rti_ifa) != NULL) {
 		if (info->rti_ifp == NULL)
 			info->rti_ifp = ifa->ifa_ifp;
 	} else
 		error = ENETUNREACH;
 	return (error);
 }
 
 /*
  * Expunges references to a route that's about to be reclaimed.
  * The route must be locked.
  */
 int
 rtexpunge(struct rtentry *rt)
 {
 	struct radix_node *rn;
 	struct radix_node_head *rnh;
 	struct ifaddr *ifa;
 	int error = 0;
 
 	RT_LOCK_ASSERT(rt);
 #if 0
 	/*
 	 * We cannot assume anything about the reference count
 	 * because protocols call us in many situations; often
 	 * before unwinding references to the table entry.
 	 */
 	KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt));
 #endif
 	/*
 	 * Find the correct routing tree to use for this Address Family
 	 */
-	rnh = rt_tables[rt_key(rt)->sa_family];
+	rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 
 	RADIX_NODE_HEAD_LOCK(rnh);
 
 	/*
 	 * Remove the item from the tree; it should be there,
 	 * but when callers invoke us blindly it may not (sigh).
 	 */
 	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
 	if (rn == NULL) {
 		error = ESRCH;
 		goto bad;
 	}
 	KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
 		("unexpected flags 0x%x", rn->rn_flags));
 	KASSERT(rt == RNTORT(rn),
 		("lookup mismatch, rt %p rn %p", rt, rn));
 
 	rt->rt_flags &= ~RTF_UP;
 
 	/*
 	 * Now search what's left of the subtree for any cloned
 	 * routes which might have been formed from this node.
 	 */
 	if ((rt->rt_flags & RTF_CLONING) && rt_mask(rt))
 		rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
 				       rt_fixdelete, rt);
 
 	/*
 	 * Remove any external references we may have.
 	 * This might result in another rtentry being freed if
 	 * we held its last reference.
 	 */
 	if (rt->rt_gwroute) {
 		RTFREE(rt->rt_gwroute);
 		rt->rt_gwroute = NULL;
 	}
 
 	/*
 	 * Give the protocol a chance to keep things in sync.
 	 */
 	if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
 		struct rt_addrinfo info;
 
 		bzero((caddr_t)&info, sizeof(info));
 		info.rti_flags = rt->rt_flags;
 		info.rti_info[RTAX_DST] = rt_key(rt);
 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 		ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
 	}
 
 	/*
 	 * one more rtentry floating around that is not
 	 * linked to the routing table.
 	 */
 	rttrash++;
 bad:
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 	return (error);
 }
 
 int
 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
 {
+	return (rtrequest1_fib(req, info, ret_nrt, 0));
+}
+
+int
+rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
+				u_int fibnum)
+{
 	int error = 0;
 	register struct rtentry *rt;
 	register struct radix_node *rn;
 	register struct radix_node_head *rnh;
 	struct ifaddr *ifa;
 	struct sockaddr *ndst;
 #define senderr(x) { error = x ; goto bad; }
 
+	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
+	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
+		fibnum = 0;
 	/*
 	 * Find the correct routing tree to use for this Address Family
 	 */
-	rnh = rt_tables[dst->sa_family];
+	rnh = rt_tables[fibnum][dst->sa_family];
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 	RADIX_NODE_HEAD_LOCK(rnh);
 	/*
 	 * If we are adding a host route then we don't want to put
 	 * a netmask in the tree, nor do we want to clone it.
 	 */
 	if (flags & RTF_HOST) {
 		netmask = NULL;
 		flags &= ~RTF_CLONING;
 	}
 	switch (req) {
 	case RTM_DELETE:
 #ifdef RADIX_MPATH
 		/*
 		 * if we got multipath routes, we require users to specify
 		 * a matching RTAX_GATEWAY.
 		 */
 		if (rn_mpath_capable(rnh)) {
 			struct rtentry *rto = NULL;
 
 			rn = rnh->rnh_matchaddr(dst, rnh);
 			if (rn == NULL)
 				senderr(ESRCH);
  			rto = rt = RNTORT(rn);
 			rt = rt_mpath_matchgate(rt, gateway);
 			if (!rt)
 				senderr(ESRCH);
 			/*
 			 * this is the first entry in the chain
 			 */
 			if (rto == rt) {
 				rn = rn_mpath_next((struct radix_node *)rt);
 				/*
 				 * there is another entry, now it's active
 				 */
 				if (rn) {
 					rto = RNTORT(rn);
 					RT_LOCK(rto);
 					rto->rt_flags |= RTF_UP;
 					RT_UNLOCK(rto);
 				} else if (rt->rt_flags & RTF_GATEWAY) {
 					/*
 					 * For gateway routes, we need to 
 					 * make sure that we we are deleting
 					 * the correct gateway. 
 					 * rt_mpath_matchgate() does not 
 					 * check the case when there is only
 					 * one route in the chain.  
 					 */
 					if (gateway &&
 					    (rt->rt_gateway->sa_len != gateway->sa_len ||
 					    memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
 						senderr(ESRCH);
 				}
 				/*
 				 * use the normal delete code to remove
 				 * the first entry
 				 */
 				goto normal_rtdel;
 			}
 			/*
 			 * if the entry is 2nd and on up
 			 */
 			if (!rt_mpath_deldup(rto, rt))
 				panic ("rtrequest1: rt_mpath_deldup");
 			RT_LOCK(rt);
 			RT_ADDREF(rt);
 			rt->rt_flags &= ~RTF_UP;
 			goto deldone;  /* done with the RTM_DELETE command */
 		}
 
 normal_rtdel:
 #endif
 		/*
 		 * Remove the item from the tree and return it.
 		 * Complain if it is not there and do no more processing.
 		 */
 		rn = rnh->rnh_deladdr(dst, netmask, rnh);
 		if (rn == NULL)
 			senderr(ESRCH);
 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic ("rtrequest delete");
 		rt = RNTORT(rn);
 		RT_LOCK(rt);
 		RT_ADDREF(rt);
 		rt->rt_flags &= ~RTF_UP;
 
 		/*
 		 * Now search what's left of the subtree for any cloned
 		 * routes which might have been formed from this node.
 		 */
 		if ((rt->rt_flags & RTF_CLONING) &&
 		    rt_mask(rt)) {
 			rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
 					       rt_fixdelete, rt);
 		}
 
 		/*
 		 * Remove any external references we may have.
 		 * This might result in another rtentry being freed if
 		 * we held its last reference.
 		 */
 		if (rt->rt_gwroute) {
 			RTFREE(rt->rt_gwroute);
 			rt->rt_gwroute = NULL;
 		}
 
 		/*
 		 * give the protocol a chance to keep things in sync.
 		 */
 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
 
 #ifdef RADIX_MPATH
 deldone:
 #endif
 		/*
 		 * One more rtentry floating around that is not
 		 * linked to the routing table. rttrash will be decremented
 		 * when RTFREE(rt) is eventually called.
 		 */
 		rttrash++;
 
 		/*
 		 * If the caller wants it, then it can have it,
 		 * but it's up to it to free the rtentry as we won't be
 		 * doing it.
 		 */
 		if (ret_nrt) {
 			*ret_nrt = rt;
 			RT_UNLOCK(rt);
 		} else
 			RTFREE_LOCKED(rt);
 		break;
 
 	case RTM_RESOLVE:
 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
 			senderr(EINVAL);
 		ifa = rt->rt_ifa;
 		/* XXX locking? */
 		flags = rt->rt_flags &
 		    ~(RTF_CLONING | RTF_STATIC);
 		flags |= RTF_WASCLONED;
 		gateway = rt->rt_gateway;
 		if ((netmask = rt->rt_genmask) == NULL)
 			flags |= RTF_HOST;
 		goto makeroute;
 
 	case RTM_ADD:
 		if ((flags & RTF_GATEWAY) && !gateway)
 			senderr(EINVAL);
 		if (dst && gateway && (dst->sa_family != gateway->sa_family) && 
 		    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
 			senderr(EINVAL);
 
-		if (info->rti_ifa == NULL && (error = rt_getifa(info)))
+		if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum)))
 			senderr(error);
 		ifa = info->rti_ifa;
 
 	makeroute:
 		rt = uma_zalloc(rtzone, M_NOWAIT | M_ZERO);
 		if (rt == NULL)
 			senderr(ENOBUFS);
 		RT_LOCK_INIT(rt);
 		rt->rt_flags = RTF_UP | flags;
+		rt->rt_fibnum = fibnum;
 		/*
 		 * Add the gateway. Possibly re-malloc-ing the storage for it
 		 * also add the rt_gwroute if possible.
 		 */
 		RT_LOCK(rt);
 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
 			RT_LOCK_DESTROY(rt);
 			uma_zfree(rtzone, rt);
 			senderr(error);
 		}
 
 		/*
 		 * point to the (possibly newly malloc'd) dest address.
 		 */
 		ndst = (struct sockaddr *)rt_key(rt);
 
 		/*
 		 * make sure it contains the value we want (masked if needed).
 		 */
 		if (netmask) {
 			rt_maskedcopy(dst, ndst, netmask);
 		} else
 			bcopy(dst, ndst, dst->sa_len);
 
 		/*
 		 * Note that we now have a reference to the ifa.
 		 * This moved from below so that rnh->rnh_addaddr() can
 		 * examine the ifa and  ifa->ifa_ifp if it so desires.
 		 */
 		IFAREF(ifa);
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
 
 #ifdef RADIX_MPATH
 		/* do not permit exactly the same dst/mask/gw pair */
 		if (rn_mpath_capable(rnh) &&
 			rt_mpath_conflict(rnh, rt, netmask)) {
 			if (rt->rt_gwroute)
 				RTFREE(rt->rt_gwroute);
 			if (rt->rt_ifa) {
 				IFAFREE(rt->rt_ifa);
 			}
 			Free(rt_key(rt));
 			RT_LOCK_DESTROY(rt);
 			uma_zfree(rtzone, rt);
 			senderr(EEXIST);
 		}
 #endif
 
 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
 		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
 		if (rn == NULL) {
 			struct rtentry *rt2;
 			/*
 			 * Uh-oh, we already have one of these in the tree.
 			 * We do a special hack: if the route that's already
 			 * there was generated by the cloning mechanism
 			 * then we just blow it away and retry the insertion
 			 * of the new one.
 			 */
-			rt2 = rtalloc1(dst, 0, 0);
+			rt2 = rtalloc1_fib(dst, 0, 0, fibnum);
 			if (rt2 && rt2->rt_parent) {
 				rtexpunge(rt2);
 				RT_UNLOCK(rt2);
 				rn = rnh->rnh_addaddr(ndst, netmask,
 						      rnh, rt->rt_nodes);
 			} else if (rt2) {
 				/* undo the extra ref we got */
 				RTFREE_LOCKED(rt2);
 			}
 		}
 
 		/*
 		 * If it still failed to go into the tree,
 		 * then un-make it (this should be a function)
 		 */
 		if (rn == NULL) {
 			if (rt->rt_gwroute)
 				RTFREE(rt->rt_gwroute);
 			if (rt->rt_ifa)
 				IFAFREE(rt->rt_ifa);
 			Free(rt_key(rt));
 			RT_LOCK_DESTROY(rt);
 			uma_zfree(rtzone, rt);
 			senderr(EEXIST);
 		}
 
 		rt->rt_parent = NULL;
 
 		/*
 		 * If we got here from RESOLVE, then we are cloning
 		 * so clone the rest, and note that we
 		 * are a clone (and increment the parent's references)
 		 */
 		if (req == RTM_RESOLVE) {
 			KASSERT(ret_nrt && *ret_nrt,
 				("no route to clone from"));
 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
 			rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
 			if ((*ret_nrt)->rt_flags & RTF_CLONING) {
 				/*
 				 * NB: We do not bump the refcnt on the parent
 				 * entry under the assumption that it will
 				 * remain so long as we do.  This is
 				 * important when deleting the parent route
 				 * as this operation requires traversing
 				 * the tree to delete all clones and futzing
 				 * with refcnts requires us to double-lock
 				 * parent through this back reference.
 				 */
 				rt->rt_parent = *ret_nrt;
 			}
 		}
 
 		/*
 		 * If this protocol has something to add to this then
 		 * allow it to do that as well.
 		 */
 		if (ifa->ifa_rtrequest)
 			ifa->ifa_rtrequest(req, rt, info);
 
 		/*
 		 * We repeat the same procedure from rt_setgate() here because
 		 * it doesn't fire when we call it there because the node
 		 * hasn't been added to the tree yet.
 		 */
 		if (req == RTM_ADD &&
 		    !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
 			struct rtfc_arg arg;
 			arg.rnh = rnh;
 			arg.rt0 = rt;
 			rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
 					       rt_fixchange, &arg);
 		}
 
 		/*
 		 * actually return a resultant rtentry and
 		 * give the caller a single reference.
 		 */
 		if (ret_nrt) {
 			*ret_nrt = rt;
 			RT_ADDREF(rt);
 		}
 		RT_UNLOCK(rt);
 		break;
 	default:
 		error = EOPNOTSUPP;
 	}
 bad:
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 	return (error);
 #undef senderr
 }
 
 #undef dst
 #undef gateway
 #undef netmask
 #undef ifaaddr
 #undef ifpaddr
 #undef flags
 
 /*
  * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
  * (i.e., the routes related to it by the operation of cloning).  This
  * routine is iterated over all potential former-child-routes by way of
  * rnh->rnh_walktree_from() above, and those that actually are children of
  * the late parent (passed in as VP here) are themselves deleted.
  */
 static int
 rt_fixdelete(struct radix_node *rn, void *vp)
 {
 	struct rtentry *rt = RNTORT(rn);
 	struct rtentry *rt0 = vp;
 
 	if (rt->rt_parent == rt0 &&
 	    !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
-		return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
-				 rt->rt_flags, NULL);
+		return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
+				 rt->rt_flags, NULL, rt->rt_fibnum);
 	}
 	return 0;
 }
 
 /*
  * This routine is called from rt_setgate() to do the analogous thing for
  * adds and changes.  There is the added complication in this case of a
  * middle insert; i.e., insertion of a new network route between an older
  * network route and (cloned) host routes.  For this reason, a simple check
  * of rt->rt_parent is insufficient; each candidate route must be tested
  * against the (mask, value) of the new route (passed as before in vp)
  * to see if the new route matches it.
  *
  * XXX - it may be possible to do fixdelete() for changes and reserve this
  * routine just for adds.  I'm not sure why I thought it was necessary to do
  * changes this way.
  */
 
 static int
 rt_fixchange(struct radix_node *rn, void *vp)
 {
 	struct rtentry *rt = RNTORT(rn);
 	struct rtfc_arg *ap = vp;
 	struct rtentry *rt0 = ap->rt0;
 	struct radix_node_head *rnh = ap->rnh;
 	u_char *xk1, *xm1, *xk2, *xmp;
 	int i, len, mlen;
 
 	/* make sure we have a parent, and route is not pinned or cloning */
 	if (!rt->rt_parent ||
 	    (rt->rt_flags & (RTF_PINNED | RTF_CLONING)))
 		return 0;
 
 	if (rt->rt_parent == rt0)	/* parent match */
 		goto delete_rt;
 	/*
 	 * There probably is a function somewhere which does this...
 	 * if not, there should be.
 	 */
 	len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
 
 	xk1 = (u_char *)rt_key(rt0);
 	xm1 = (u_char *)rt_mask(rt0);
 	xk2 = (u_char *)rt_key(rt);
 
 	/* avoid applying a less specific route */
 	xmp = (u_char *)rt_mask(rt->rt_parent);
 	mlen = rt_key(rt->rt_parent)->sa_len;
 	if (mlen > rt_key(rt0)->sa_len)		/* less specific route */
 		return 0;
 	for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++)
 		if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i])
 			return 0;	/* less specific route */
 
 	for (i = rnh->rnh_treetop->rn_offset; i < len; i++)
 		if ((xk2[i] & xm1[i]) != xk1[i])
 			return 0;	/* no match */
 
 	/*
 	 * OK, this node is a clone, and matches the node currently being
 	 * changed/added under the node's mask.  So, get rid of it.
 	 */
 delete_rt:
-	return rtrequest(RTM_DELETE, rt_key(rt), NULL,
-			 rt_mask(rt), rt->rt_flags, NULL);
+	return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL,
+			 rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum);
 }
 
 int
 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 {
 	/* XXX dst may be overwritten, can we move this to below */
-	struct radix_node_head *rnh = rt_tables[dst->sa_family];
+	struct radix_node_head *rnh = rt_tables[rt->rt_fibnum][dst->sa_family];
 	int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
 
 again:
 	RT_LOCK_ASSERT(rt);
 
 	/*
 	 * A host route with the destination equal to the gateway
 	 * will interfere with keeping LLINFO in the routing
 	 * table, so disallow it.
 	 */
 	if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
 					(RTF_HOST|RTF_GATEWAY)) &&
 	    dst->sa_len == gate->sa_len &&
 	    bcmp(dst, gate, dst->sa_len) == 0) {
 		/*
 		 * The route might already exist if this is an RTM_CHANGE
 		 * or a routing redirect, so try to delete it.
 		 */
 		if (rt_key(rt))
 			rtexpunge(rt);
 		return EADDRNOTAVAIL;
 	}
 
 	/*
 	 * Cloning loop avoidance in case of bad configuration.
 	 */
 	if (rt->rt_flags & RTF_GATEWAY) {
 		struct rtentry *gwrt;
 
 		RT_UNLOCK(rt);		/* XXX workaround LOR */
-		gwrt = rtalloc1(gate, 1, 0);
+		gwrt = rtalloc1_fib(gate, 1, 0, rt->rt_fibnum);
 		if (gwrt == rt) {
 			RT_REMREF(rt);
 			return (EADDRINUSE); /* failure */
 		}
 		/*
 		 * Try to reacquire the lock on rt, and if it fails,
 		 * clean state and restart from scratch.
 		 */
 		if (!RT_TRYLOCK(rt)) {
 			RTFREE_LOCKED(gwrt);
 			RT_LOCK(rt);
 			goto again;
 		}
 		/*
 		 * If there is already a gwroute, then drop it. If we
 		 * are asked to replace route with itself, then do
 		 * not leak its refcounter.
 		 */
 		if (rt->rt_gwroute != NULL) {
 			if (rt->rt_gwroute == gwrt) {
 				RT_REMREF(rt->rt_gwroute);
 			} else
 				RTFREE(rt->rt_gwroute);
 		}
 
 		if ((rt->rt_gwroute = gwrt) != NULL)
 			RT_UNLOCK(rt->rt_gwroute);
 	}
 
 	/*
 	 * Prepare to store the gateway in rt->rt_gateway.
 	 * Both dst and gateway are stored one after the other in the same
 	 * malloc'd chunk. If we have room, we can reuse the old buffer,
 	 * rt_gateway already points to the right place.
 	 * Otherwise, malloc a new block and update the 'dst' address.
 	 */
 	if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
 		caddr_t new;
 
 		R_Malloc(new, caddr_t, dlen + glen);
 		if (new == NULL)
 			return ENOBUFS;
 		/*
 		 * XXX note, we copy from *dst and not *rt_key(rt) because
 		 * rt_setgate() can be called to initialize a newly
 		 * allocated route entry, in which case rt_key(rt) == NULL
 		 * (and also rt->rt_gateway == NULL).
 		 * Free()/free() handle a NULL argument just fine.
 		 */
 		bcopy(dst, new, dlen);
 		Free(rt_key(rt));	/* free old block, if any */
 		rt_key(rt) = (struct sockaddr *)new;
 		rt->rt_gateway = (struct sockaddr *)(new + dlen);
 	}
 
 	/*
 	 * Copy the new gateway value into the memory chunk.
 	 */
 	bcopy(gate, rt->rt_gateway, glen);
 
 	/*
 	 * This isn't going to do anything useful for host routes, so
 	 * don't bother.  Also make sure we have a reasonable mask
 	 * (we don't yet have one during adds).
 	 */
 	if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
 		struct rtfc_arg arg;
 
 		arg.rnh = rnh;
 		arg.rt0 = rt;
 		RT_UNLOCK(rt);		/* XXX workaround LOR */
 		RADIX_NODE_HEAD_LOCK(rnh);
 		RT_LOCK(rt);
 		rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
 				       rt_fixchange, &arg);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 
 	return 0;
 }
 
 static void
 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
 {
 	register u_char *cp1 = (u_char *)src;
 	register u_char *cp2 = (u_char *)dst;
 	register u_char *cp3 = (u_char *)netmask;
 	u_char *cplim = cp2 + *cp3;
 	u_char *cplim2 = cp2 + *cp1;
 
 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
 	cp3 += 2;
 	if (cplim > cplim2)
 		cplim = cplim2;
 	while (cp2 < cplim)
 		*cp2++ = *cp1++ & *cp3++;
 	if (cp2 < cplim2)
 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
 }
 
 /*
  * Set up a routing table entry, normally
  * for an interface.
  */
-int
-rtinit(struct ifaddr *ifa, int cmd, int flags)
+#define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
+static inline  int
+rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 {
 	struct sockaddr *dst;
 	struct sockaddr *netmask;
-	struct mbuf *m = NULL;
 	struct rtentry *rt = NULL;
 	struct rt_addrinfo info;
-	int error=0;
+	int error = 0;
+	int startfib, endfib;
+	char tempbuf[_SOCKADDR_TMPSIZE];
+	int didwork = 0;
+	int a_failure = 0;
 
 	if (flags & RTF_HOST) {
 		dst = ifa->ifa_dstaddr;
 		netmask = NULL;
 	} else {
 		dst = ifa->ifa_addr;
 		netmask = ifa->ifa_netmask;
 	}
+	if ( dst->sa_family != AF_INET)
+		fibnum = 0;
+	if (fibnum == -1) {
+		startfib = 0;
+		endfib = rt_numfibs - 1;
+	} else {
+		KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
+		startfib = fibnum;
+		endfib = fibnum;
+	}
 	if (dst->sa_len == 0)
 		return(EINVAL);
 
 	/*
-	 * If it's a delete, check that if it exists, it's on the correct
-	 * interface or we might scrub a route to another ifa which would
+	 * If it's a delete, check that if it exists,
+	 * it's on the correct interface or we might scrub
+	 * a route to another ifa which would
 	 * be confusing at best and possibly worse.
 	 */
 	if (cmd == RTM_DELETE) {
-		struct sockaddr *deldst;
-		struct radix_node_head *rnh;
-		struct radix_node *rn;
-
 		/*
 		 * It's a delete, so it should already exist..
 		 * If it's a net, mask off the host bits
 		 * (Assuming we have a mask)
+		 * XXX this is kinda inet specific..
 		 */
 		if (netmask != NULL) {
-			m = m_get(M_DONTWAIT, MT_SONAME);
-			if (m == NULL)
-				return(ENOBUFS);
-			deldst = mtod(m, struct sockaddr *);
-			rt_maskedcopy(dst, deldst, netmask);
-			dst = deldst;
+			rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
+			dst = (struct sockaddr *)tempbuf;
 		}
-		/*
-		 * Look up an rtentry that is in the routing tree and
-		 * contains the correct info.
-		 */
-		if ((rnh = rt_tables[dst->sa_family]) == NULL)
-			goto bad;
-		RADIX_NODE_HEAD_LOCK(rnh);
+	}
+	/*
+	 * Now go through all the requested tables (fibs) and do the
+	 * requested action. Realistically, this will either be fib 0
+	 * for protocols that don't do multiple tables or all the
+	 * tables for those that do. XXX For this version only AF_INET.
+	 * When that changes code should be refactored to protocol
+	 * independent parts and protocol dependent parts.
+	 */
+	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
+		if (cmd == RTM_DELETE) {
+			struct radix_node_head *rnh;
+			struct radix_node *rn;
+			/*
+			 * Look up an rtentry that is in the routing tree and
+			 * contains the correct info.
+			 */
+			if ((rnh = rt_tables[fibnum][dst->sa_family]) == NULL)
+				/* this table doesn't exist but others might */
+				continue;
+			RADIX_NODE_HEAD_LOCK(rnh);
 #ifdef RADIX_MPATH
-		if (rn_mpath_capable(rnh)) {
+			if (rn_mpath_capable(rnh)) {
 
-			rn = rnh->rnh_matchaddr(dst, rnh);
-			if (rn == NULL) 
-				error = ESRCH;
-			else {
-				rt = RNTORT(rn);
-				/*
-				 * for interface route the rt->rt_gateway is
-				 * sockaddr_intf for cloning ARP entries, so
-				 * rt_mpath_matchgate must use the interface
-				 * address
-				 */
-				rt = rt_mpath_matchgate(rt, ifa->ifa_addr);
-				if (!rt) 
+				rn = rnh->rnh_matchaddr(dst, rnh);
+				if (rn == NULL) 
 					error = ESRCH;
+				else {
+					rt = RNTORT(rn);
+					/*
+					 * for interface route the
+					 * rt->rt_gateway is sockaddr_intf
+					 * for cloning ARP entries, so
+					 * rt_mpath_matchgate must use the
+					 * interface address
+					 */
+					rt = rt_mpath_matchgate(rt,
+					    ifa->ifa_addr);
+					if (!rt) 
+						error = ESRCH;
+				}
 			}
-		}
-		else
+			else
 #endif
-		error = ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL ||
-		    (rn->rn_flags & RNF_ROOT) ||
-		    RNTORT(rn)->rt_ifa != ifa ||
-		    !sa_equal((struct sockaddr *)rn->rn_key, dst));
-
-		RADIX_NODE_HEAD_UNLOCK(rnh);
-		if (error) {
-bad:
-			if (m)
-				(void) m_free(m);
-			return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+			rn = rnh->rnh_lookup(dst, netmask, rnh);
+			error = (rn == NULL ||
+			    (rn->rn_flags & RNF_ROOT) ||
+			    RNTORT(rn)->rt_ifa != ifa ||
+			    !sa_equal((struct sockaddr *)rn->rn_key, dst));
+			RADIX_NODE_HEAD_UNLOCK(rnh);
+			if (error) {
+				/* this is only an error if bad on ALL tables */
+				continue;
+			}
 		}
-	}
-	/*
-	 * Do the actual request
-	 */
-	bzero((caddr_t)&info, sizeof(info));
-	info.rti_ifa = ifa;
-	info.rti_flags = flags | ifa->ifa_flags;
-	info.rti_info[RTAX_DST] = dst;
-	info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
-	info.rti_info[RTAX_NETMASK] = netmask;
-	error = rtrequest1(cmd, &info, &rt);
-	if (error == 0 && rt != NULL) {
 		/*
-		 * notify any listening routing agents of the change
+		 * Do the actual request
 		 */
-		RT_LOCK(rt);
+		bzero((caddr_t)&info, sizeof(info));
+		info.rti_ifa = ifa;
+		info.rti_flags = flags | ifa->ifa_flags;
+		info.rti_info[RTAX_DST] = dst;
+		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
+		info.rti_info[RTAX_NETMASK] = netmask;
+		error = rtrequest1_fib(cmd, &info, &rt, fibnum);
+		if (error == 0 && rt != NULL) {
+			/*
+			 * notify any listening routing agents of the change
+			 */
+			RT_LOCK(rt);
 #ifdef RADIX_MPATH
-		/*
-		 * in case address alias finds the first address
-		 * e.g. ifconfig bge0 192.103.54.246/24
-		 * e.g. ifconfig bge0 192.103.54.247/24
-		 * the address set in the route is 192.103.54.246
-		 * so we need to replace it with 192.103.54.247
-		 */
-		if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
-			IFAFREE(rt->rt_ifa);
-			IFAREF(ifa);
-			rt->rt_ifp = ifa->ifa_ifp;
-			rt->rt_ifa = ifa;
-		}
-#endif
-		rt_newaddrmsg(cmd, ifa, error, rt);
-		if (cmd == RTM_DELETE) {
 			/*
-			 * If we are deleting, and we found an entry, then
-			 * it's been removed from the tree.. now throw it away.
+			 * in case address alias finds the first address
+			 * e.g. ifconfig bge0 192.103.54.246/24
+			 * e.g. ifconfig bge0 192.103.54.247/24
+			 * the address set in the route is 192.103.54.246
+			 * so we need to replace it with 192.103.54.247
 			 */
-			RTFREE_LOCKED(rt);
-		} else {
-			if (cmd == RTM_ADD) {
+			if (memcmp(rt->rt_ifa->ifa_addr,
+			    ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
+				IFAFREE(rt->rt_ifa);
+				IFAREF(ifa);
+				rt->rt_ifp = ifa->ifa_ifp;
+				rt->rt_ifa = ifa;
+			}
+#endif
+			rt_newaddrmsg(cmd, ifa, error, rt);
+			if (cmd == RTM_DELETE) {
 				/*
-				 * We just wanted to add it.. we don't actually
-				 * need a reference.
+				 * If we are deleting, and we found an entry,
+				 * then it's been removed from the tree..
+				 * now throw it away.
 				 */
-				RT_REMREF(rt);
+				RTFREE_LOCKED(rt);
+			} else {
+				if (cmd == RTM_ADD) {
+					/*
+					 * We just wanted to add it..
+					 * we don't actually need a reference.
+					 */
+					RT_REMREF(rt);
+				}
+				RT_UNLOCK(rt);
 			}
-			RT_UNLOCK(rt);
+			didwork = 1;
 		}
+		if (error)
+			a_failure = error;
 	}
-	if (m)
-		(void) m_free(m);
+	if (cmd == RTM_DELETE) {
+		if (didwork) {
+			error = 0;
+		} else {
+			/* we only give an error if it wasn't in any table */
+			error = ((flags & RTF_HOST) ?
+			    EHOSTUNREACH : ENETUNREACH);
+		}
+	} else {
+		if (a_failure) {
+			/* return an error if any of them failed */
+			error = a_failure;
+		}
+	}
 	return (error);
 }
 
+/* special one for inet internal use. may not use. */
+int
+rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
+{
+	return (rtinit1(ifa, cmd, flags, -1));
+}
+
 /*
+ * Set up a routing table entry, normally
+ * for an interface.
+ */
+int
+rtinit(struct ifaddr *ifa, int cmd, int flags)
+{
+	struct sockaddr *dst;
+	int fib = 0;
+
+	if (flags & RTF_HOST) {
+		dst = ifa->ifa_dstaddr;
+	} else {
+		dst = ifa->ifa_addr;
+	}
+
+	if (dst->sa_family == AF_INET)
+		fib = -1;
+	return (rtinit1(ifa, cmd, flags, fib));
+}
+
+/*
  * rt_check() is invoked on each layer 2 output path, prior to
  * encapsulating outbound packets.
  *
  * The function is mostly used to find a routing entry for the gateway,
  * which in some protocol families could also point to the link-level
  * address for the gateway itself (the side effect of revalidating the
  * route to the destination is rather pointless at this stage, we did it
  * already a moment before in the pr_output() routine to locate the ifp
  * and gateway to use).
  *
  * When we remove the layer-3 to layer-2 mapping tables from the
  * routing table, this function can be removed.
  *
  * === On input ===
  *   *dst is the address of the NEXT HOP (which coincides with the
  *	final destination if directly reachable);
  *   *lrt0 points to the cached route to the final destination;
  *   *lrt is not meaningful;
+ *    fibnum is the index to the correct network fib for this packet
  *
  * === Operation ===
  * If the route is marked down try to find a new route.  If the route
  * to the gateway is gone, try to setup a new route.  Otherwise,
  * if the route is marked for packets to be rejected, enforce that.
  *
  * === On return ===
  *   *dst is unchanged;
  *   *lrt0 points to the (possibly new) route to the final destination
  *   *lrt points to the route to the next hop
  *
  * Their values are meaningful ONLY if no error is returned.
  */
 int
 rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
 {
+	return (rt_check_fib(lrt, lrt0, dst, 0));
+}
+
+int
+rt_check_fib(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst,
+		u_int fibnum)
+{
 	struct rtentry *rt;
 	struct rtentry *rt0;
 	int error;
 
 	KASSERT(*lrt0 != NULL, ("rt_check"));
 	rt = rt0 = *lrt0;
 
 	/* NB: the locking here is tortuous... */
 	RT_LOCK(rt);
 	if ((rt->rt_flags & RTF_UP) == 0) {
 		RT_UNLOCK(rt);
-		rt = rtalloc1(dst, 1, 0UL);
+		rt = rtalloc1_fib(dst, 1, 0UL, fibnum);
 		if (rt != NULL) {
 			RT_REMREF(rt);
 			/* XXX what about if change? */
 		} else
 			return (EHOSTUNREACH);
 		rt0 = rt;
 	}
 	/* XXX BSD/OS checks dst->sa_family != AF_NS */
 	if (rt->rt_flags & RTF_GATEWAY) {
 		if (rt->rt_gwroute == NULL)
 			goto lookup;
 		rt = rt->rt_gwroute;
 		RT_LOCK(rt);		/* NB: gwroute */
 		if ((rt->rt_flags & RTF_UP) == 0) {
 			RTFREE_LOCKED(rt);	/* unlock gwroute */
 			rt = rt0;
 			rt0->rt_gwroute = NULL;
 		lookup:
 			RT_UNLOCK(rt0);
-			rt = rtalloc1(rt->rt_gateway, 1, 0UL);
+/* XXX MRT link level looked up in table 0 */
+			rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0);
 			if (rt == rt0) {
 				RT_REMREF(rt0);
 				RT_UNLOCK(rt0);
 				return (ENETUNREACH);
 			}
 			RT_LOCK(rt0);
 			if (rt0->rt_gwroute != NULL)
 				RTFREE(rt0->rt_gwroute);
 			rt0->rt_gwroute = rt;
 			if (rt == NULL) {
 				RT_UNLOCK(rt0);
 				return (EHOSTUNREACH);
 			}
 		}
 		RT_UNLOCK(rt0);
 	}
 	/* XXX why are we inspecting rmx_expire? */
 	error = (rt->rt_flags & RTF_REJECT) &&
 		(rt->rt_rmx.rmx_expire == 0 ||
 			time_uptime < rt->rt_rmx.rmx_expire);
 	if (error) {
 		RT_UNLOCK(rt);
 		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
 	}
 
 	*lrt = rt;
 	*lrt0 = rt0;
 	return (0);
 }
 
 /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
 SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
Index: head/sys/net/route.h
===================================================================
--- head/sys/net/route.h	(revision 178887)
+++ head/sys/net/route.h	(revision 178888)
@@ -1,374 +1,401 @@
 /*-
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)route.h	8.4 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NET_ROUTE_H_
 #define _NET_ROUTE_H_
 
 /*
  * Kernel resident routing tables.
  *
  * The routing tables are initialized when interface addresses
  * are set by making entries for all directly connected interfaces.
  */
 
 /*
  * A route consists of a destination address and a reference
  * to a routing entry.  These are often held by protocols
  * in their control blocks, e.g. inpcb.
  */
 struct route {
 	struct	rtentry *ro_rt;
 	struct	sockaddr ro_dst;
 };
 
 /*
  * These numbers are used by reliable protocols for determining
  * retransmission behavior and are included in the routing structure.
  */
 struct rt_metrics_lite {
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
 	u_long	rmx_pksent;	/* packets sent using this route */
 };
 
 struct rt_metrics {
 	u_long	rmx_locks;	/* Kernel must leave these values alone */
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_hopcount;	/* max hops expected */
 	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
 	u_long	rmx_recvpipe;	/* inbound delay-bandwidth product */
 	u_long	rmx_sendpipe;	/* outbound delay-bandwidth product */
 	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
 	u_long	rmx_rtt;	/* estimated round trip time */
 	u_long	rmx_rttvar;	/* estimated rtt variance */
 	u_long	rmx_pksent;	/* packets sent using this route */
 	u_long	rmx_filler[4];	/* will be used for T/TCP later */
 };
 
 /*
  * rmx_rtt and rmx_rttvar are stored as microseconds;
  * RTTTOPRHZ(rtt) converts to a value suitable for use
  * by a protocol slowtimo counter.
  */
 #define	RTM_RTTUNIT	1000000	/* units for rtt, rttvar, as units per sec */
 #define	RTTTOPRHZ(r)	((r) / (RTM_RTTUNIT / PR_SLOWHZ))
 
+#define RT_MAXFIBS 16
+extern u_int rt_numfibs;	/* number fo usable routing tables */
+extern u_int tunnel_fib;	/* tunnels use these */
+extern u_int fwd_fib;		/* packets being forwarded use these routes */
 /*
  * XXX kernel function pointer `rt_output' is visible to applications.
  */
 struct mbuf;
 
 /*
  * We distinguish between routes to hosts and routes to networks,
  * preferring the former if available.  For each route we infer
  * the interface to use from the gateway address supplied when
  * the route was entered.  Routes that forward packets through
  * gateways are marked so that the output routines know to address the
  * gateway rather than the ultimate destination.
  */
 #ifndef RNF_NORMAL
 #include <net/radix.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 #endif
 struct rtentry {
 	struct	radix_node rt_nodes[2];	/* tree glue, and other values */
 	/*
 	 * XXX struct rtentry must begin with a struct radix_node (or two!)
 	 * because the code does some casts of a 'struct radix_node *'
 	 * to a 'struct rtentry *'
 	 */
 #define	rt_key(r)	(*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
 #define	rt_mask(r)	(*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
 	struct	sockaddr *rt_gateway;	/* value */
 	u_long	rt_flags;		/* up/down?, host/net */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
 	struct	ifaddr *rt_ifa;		/* the answer: interface address to use */
 	struct	rt_metrics_lite rt_rmx;	/* metrics used by rx'ing protocols */
 	long	rt_refcnt;		/* # held references */
 	struct	sockaddr *rt_genmask;	/* for generation of cloned routes */
 	caddr_t	rt_llinfo;		/* pointer to link level info cache */
 	struct	rtentry *rt_gwroute;	/* implied entry for gatewayed routes */
 	struct	rtentry *rt_parent; 	/* cloning parent of this route */
+	u_int	rt_fibnum;		/* which FIB */
 #ifdef _KERNEL
 	/* XXX ugly, user apps use this definition but don't have a mtx def */
 	struct	mtx rt_mtx;		/* mutex for routing entry */
 #endif
 };
 
 /*
  * Following structure necessary for 4.3 compatibility;
  * We should eventually move it to a compat file.
  */
 struct ortentry {
 	u_long	rt_hash;		/* to speed lookups */
 	struct	sockaddr rt_dst;	/* key */
 	struct	sockaddr rt_gateway;	/* value */
 	short	rt_flags;		/* up/down?, host/net */
 	short	rt_refcnt;		/* # held references */
 	u_long	rt_use;			/* raw # packets forwarded */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
 };
 
 #define rt_use rt_rmx.rmx_pksent
 
 #define	RTF_UP		0x1		/* route usable */
 #define	RTF_GATEWAY	0x2		/* destination is a gateway */
 #define	RTF_HOST	0x4		/* host entry (net otherwise) */
 #define	RTF_REJECT	0x8		/* host or net unreachable */
 #define	RTF_DYNAMIC	0x10		/* created dynamically (by redirect) */
 #define	RTF_MODIFIED	0x20		/* modified dynamically (by redirect) */
 #define RTF_DONE	0x40		/* message confirmed */
 /*			0x80		   unused, was RTF_DELCLONE */
 #define RTF_CLONING	0x100		/* generate new routes on use */
 #define RTF_XRESOLVE	0x200		/* external daemon resolves name */
 #define RTF_LLINFO	0x400		/* generated by link layer (e.g. ARP) */
 #define RTF_STATIC	0x800		/* manually added */
 #define RTF_BLACKHOLE	0x1000		/* just discard pkts (during updates) */
 #define RTF_PROTO2	0x4000		/* protocol specific routing flag */
 #define RTF_PROTO1	0x8000		/* protocol specific routing flag */
 
 /* XXX: temporary to stay API/ABI compatible with userland */
 #ifndef _KERNEL
 #define RTF_PRCLONING	0x10000		/* unused, for compatibility */
 #endif
 
 #define RTF_WASCLONED	0x20000		/* route generated through cloning */
 #define RTF_PROTO3	0x40000		/* protocol specific routing flag */
 /*			0x80000		   unused */
 #define RTF_PINNED	0x100000	/* future use */
 #define	RTF_LOCAL	0x200000 	/* route represents a local address */
 #define	RTF_BROADCAST	0x400000	/* route represents a bcast address */
 #define	RTF_MULTICAST	0x800000	/* route represents a mcast address */
 					/* 0x1000000 and up unassigned */
 
 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
 #define RTF_FMASK	\
 	(RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
 	 RTF_REJECT | RTF_STATIC)
 
 /*
  * Routing statistics.
  */
 struct	rtstat {
 	short	rts_badredirect;	/* bogus redirect calls */
 	short	rts_dynamic;		/* routes created by redirects */
 	short	rts_newgateway;		/* routes modified by redirects */
 	short	rts_unreach;		/* lookups which failed */
 	short	rts_wildcard;		/* lookups satisfied by a wildcard */
 };
 /*
  * Structures for routing messages.
  */
 struct rt_msghdr {
 	u_short	rtm_msglen;	/* to skip over non-understood messages */
 	u_char	rtm_version;	/* future binary compatibility */
 	u_char	rtm_type;	/* message type */
 	u_short	rtm_index;	/* index for associated ifp */
 	int	rtm_flags;	/* flags, incl. kern & message, e.g. DONE */
 	int	rtm_addrs;	/* bitmask identifying sockaddrs in msg */
 	pid_t	rtm_pid;	/* identify sender */
 	int	rtm_seq;	/* for sender to identify action */
 	int	rtm_errno;	/* why failed */
 	int	rtm_fmask;	/* bitmask used in RTM_CHANGE message */
 #define	rtm_use	rtm_fmask	/* deprecated, use rtm_rmx->rmx_pksent */
 	u_long	rtm_inits;	/* which metrics we are initializing */
 	struct	rt_metrics rtm_rmx; /* metrics themselves */
 };
 
 #define RTM_VERSION	5	/* Up the ante and ignore older versions */
 
 /*
  * Message types.
  */
 #define RTM_ADD		0x1	/* Add Route */
 #define RTM_DELETE	0x2	/* Delete Route */
 #define RTM_CHANGE	0x3	/* Change Metrics or flags */
 #define RTM_GET		0x4	/* Report Metrics */
 #define RTM_LOSING	0x5	/* Kernel Suspects Partitioning */
 #define RTM_REDIRECT	0x6	/* Told to use different route */
 #define RTM_MISS	0x7	/* Lookup failed on this address */
 #define RTM_LOCK	0x8	/* fix specified metrics */
 #define RTM_OLDADD	0x9	/* caused by SIOCADDRT */
 #define RTM_OLDDEL	0xa	/* caused by SIOCDELRT */
 #define RTM_RESOLVE	0xb	/* req to resolve dst to LL addr */
 #define RTM_NEWADDR	0xc	/* address being added to iface */
 #define RTM_DELADDR	0xd	/* address being removed from iface */
 #define RTM_IFINFO	0xe	/* iface going up/down etc. */
 #define	RTM_NEWMADDR	0xf	/* mcast group membership being added to if */
 #define	RTM_DELMADDR	0x10	/* mcast group membership being deleted */
 #define	RTM_IFANNOUNCE	0x11	/* iface arrival/departure */
 #define	RTM_IEEE80211	0x12	/* IEEE80211 wireless event */
 
 /*
  * Bitmask values for rtm_inits and rmx_locks.
  */
 #define RTV_MTU		0x1	/* init or lock _mtu */
 #define RTV_HOPCOUNT	0x2	/* init or lock _hopcount */
 #define RTV_EXPIRE	0x4	/* init or lock _expire */
 #define RTV_RPIPE	0x8	/* init or lock _recvpipe */
 #define RTV_SPIPE	0x10	/* init or lock _sendpipe */
 #define RTV_SSTHRESH	0x20	/* init or lock _ssthresh */
 #define RTV_RTT		0x40	/* init or lock _rtt */
 #define RTV_RTTVAR	0x80	/* init or lock _rttvar */
 
 /*
  * Bitmask values for rtm_addrs.
  */
 #define RTA_DST		0x1	/* destination sockaddr present */
 #define RTA_GATEWAY	0x2	/* gateway sockaddr present */
 #define RTA_NETMASK	0x4	/* netmask sockaddr present */
 #define RTA_GENMASK	0x8	/* cloning mask sockaddr present */
 #define RTA_IFP		0x10	/* interface name sockaddr present */
 #define RTA_IFA		0x20	/* interface addr sockaddr present */
 #define RTA_AUTHOR	0x40	/* sockaddr for author of redirect */
 #define RTA_BRD		0x80	/* for NEWADDR, broadcast or p-p dest addr */
 
 /*
  * Index offsets for sockaddr array for alternate internal encoding.
  */
 #define RTAX_DST	0	/* destination sockaddr present */
 #define RTAX_GATEWAY	1	/* gateway sockaddr present */
 #define RTAX_NETMASK	2	/* netmask sockaddr present */
 #define RTAX_GENMASK	3	/* cloning mask sockaddr present */
 #define RTAX_IFP	4	/* interface name sockaddr present */
 #define RTAX_IFA	5	/* interface addr sockaddr present */
 #define RTAX_AUTHOR	6	/* sockaddr for author of redirect */
 #define RTAX_BRD	7	/* for NEWADDR, broadcast or p-p dest addr */
 #define RTAX_MAX	8	/* size of array to allocate */
 
 struct rt_addrinfo {
 	int	rti_addrs;
 	struct	sockaddr *rti_info[RTAX_MAX];
 	int	rti_flags;
 	struct	ifaddr *rti_ifa;
 	struct	ifnet *rti_ifp;
 };
 
 /*
  * This macro returns the size of a struct sockaddr when passed
  * through a routing socket. Basically we round up sa_len to
  * a multiple of sizeof(long), with a minimum of sizeof(long).
  * The check for a NULL pointer is just a convenience, probably never used.
  * The case sa_len == 0 should only apply to empty structures.
  */
 #define SA_SIZE(sa)						\
     (  (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ?	\
 	sizeof(long)		:				\
 	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) )
 
 #ifdef _KERNEL
 
 #define	RT_LOCK_INIT(_rt) \
 	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
 #define	RT_LOCK(_rt)		mtx_lock(&(_rt)->rt_mtx)
 #define	RT_TRYLOCK(_rt)		mtx_trylock(&(_rt)->rt_mtx)
 #define	RT_UNLOCK(_rt)		mtx_unlock(&(_rt)->rt_mtx)
 #define	RT_LOCK_DESTROY(_rt)	mtx_destroy(&(_rt)->rt_mtx)
 #define	RT_LOCK_ASSERT(_rt)	mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
 
 #define	RT_ADDREF(_rt)	do {					\
 	RT_LOCK_ASSERT(_rt);					\
 	KASSERT((_rt)->rt_refcnt >= 0,				\
 		("negative refcnt %ld", (_rt)->rt_refcnt));	\
 	(_rt)->rt_refcnt++;					\
 } while (0)
 #define	RT_REMREF(_rt)	do {					\
 	RT_LOCK_ASSERT(_rt);					\
 	KASSERT((_rt)->rt_refcnt > 0,				\
 		("bogus refcnt %ld", (_rt)->rt_refcnt));	\
 	(_rt)->rt_refcnt--;					\
 } while (0)
 
 #define	RTFREE_LOCKED(_rt) do {					\
 		if ((_rt)->rt_refcnt <= 1)			\
 			rtfree(_rt);				\
 		else {						\
 			RT_REMREF(_rt);				\
 			RT_UNLOCK(_rt);				\
 		}						\
 		/* guard against invalid refs */		\
 		_rt = 0;					\
 	} while (0)
 #define	RTFREE(_rt) do {					\
 		RT_LOCK(_rt);					\
 		RTFREE_LOCKED(_rt);				\
 	} while (0)
 
-extern struct radix_node_head *rt_tables[AF_MAX+1];
+extern struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1];
 
 struct ifmultiaddr;
 
-int	 rt_getifa(struct rt_addrinfo *);
 void	 rt_ieee80211msg(struct ifnet *, int, void *, size_t);
 void	 rt_ifannouncemsg(struct ifnet *, int);
 void	 rt_ifmsg(struct ifnet *);
 void	 rt_missmsg(int, struct rt_addrinfo *, int, int);
 void	 rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
 int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
 
 /*
  * Note the following locking behavior:
  *
  *    rtalloc_ign() and rtalloc() return ro->ro_rt unlocked
  *
  *    rtalloc1() returns a locked rtentry
  *
  *    rtfree() and RTFREE_LOCKED() require a locked rtentry
  *
  *    RTFREE() uses an unlocked entry.
  */
 
+int	 rtexpunge(struct rtentry *);
+void	 rtfree(struct rtentry *);
+
+/* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */
+/* Thes are used by old code not yet converted to use multiple FIBS */
+int	 rt_getifa(struct rt_addrinfo *);
 void	 rtalloc_ign(struct route *ro, u_long ignflags);
 void	 rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */
 struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
-int	 rtexpunge(struct rtentry *);
-void	 rtfree(struct rtentry *);
 int	 rtinit(struct ifaddr *, int, int);
 int	 rtioctl(u_long, caddr_t);
 void	 rtredirect(struct sockaddr *, struct sockaddr *,
 	    struct sockaddr *, int, struct sockaddr *);
 int	 rtrequest(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **);
 int	 rtrequest1(int, struct rt_addrinfo *, struct rtentry **);
 int	 rt_check(struct rtentry **, struct rtentry **, struct sockaddr *);
+
+/* defaults to "all" FIBs */
+int	 rtinit_fib(struct ifaddr *, int, int);
+
+/* XXX MRT NEW VERSIONS THAT USE FIBs
+ * For now the protocol indepedent versions are the same as the AF_INET ones
+ * but this will change.. 
+ */
+int	 rt_getifa_fib(struct rt_addrinfo *, u_int fibnum);
+void	 rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum);
+void	 rtalloc_fib(struct route *ro, u_int fibnum);
+struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
+int	 rtioctl_fib(u_long, caddr_t, u_int);
+void	 rtredirect_fib(struct sockaddr *, struct sockaddr *,
+	    struct sockaddr *, int, struct sockaddr *, u_int);
+int	 rtrequest_fib(int, struct sockaddr *,
+	    struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
+int	 rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
+int	 rt_check_fib(struct rtentry **, struct rtentry **, struct sockaddr *, u_int);
 
 #include <sys/eventhandler.h>
 typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *);
 typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *);
 EVENTHANDLER_DECLARE(route_arp_update_event, rtevent_arp_update_fn);
 EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn);
 #endif
 
 #endif
Index: head/sys/net/rtsock.c
===================================================================
--- head/sys/net/rtsock.c	(revision 178887)
+++ head/sys/net/rtsock.c	(revision 178888)
@@ -1,1330 +1,1334 @@
 /*-
  * Copyright (c) 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
  * $FreeBSD$
  */
 #include "opt_sctp.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/raw_cb.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 
 #ifdef SCTP
 extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
 #endif /* SCTP */
 
 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 
 /* NB: these are not modified */
 static struct	sockaddr route_dst = { 2, PF_ROUTE, };
 static struct	sockaddr route_src = { 2, PF_ROUTE, };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
 
 static struct {
 	int	ip_count;	/* attached w/ AF_INET */
 	int	ip6_count;	/* attached w/ AF_INET6 */
 	int	ipx_count;	/* attached w/ AF_IPX */
 	int	any_count;	/* total attached */
 } route_cb;
 
 struct mtx rtsock_mtx;
 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
 
 #define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
 #define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
 #define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
 
 static struct	ifqueue rtsintrq;
 
 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
 SYSCTL_INT(_net_route, OID_AUTO, netisr_maxqlen, CTLFLAG_RW,
     &rtsintrq.ifq_maxlen, 0, "maximum routing socket dispatch queue length");
 
 struct walkarg {
 	int	w_tmemsize;
 	int	w_op, w_arg;
 	caddr_t	w_tmem;
 	struct sysctl_req *w_req;
 };
 
 static void	rts_input(struct mbuf *m);
 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
 static int	rt_msg2(int type, struct rt_addrinfo *rtinfo,
 			caddr_t cp, struct walkarg *w);
 static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
 			struct rt_addrinfo *rtinfo);
 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	sysctl_ifmalist(int af, struct walkarg *w);
 static int	route_output(struct mbuf *m, struct socket *so);
 static void	rt_setmetrics(u_long which, const struct rt_metrics *in,
 			struct rt_metrics_lite *out);
 static void	rt_getmetrics(const struct rt_metrics_lite *in,
 			struct rt_metrics *out);
 static void	rt_dispatch(struct mbuf *, const struct sockaddr *);
 
 static void
 rts_init(void)
 {
 	int tmp;
 
 	rtsintrq.ifq_maxlen = 256;
 	if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
 		rtsintrq.ifq_maxlen = tmp;
 	mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF);
 	netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, NETISR_MPSAFE);
 }
 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
 
 static void
 rts_input(struct mbuf *m)
 {
 	struct sockproto route_proto;
 	unsigned short *family;
 	struct m_tag *tag;
 
 	route_proto.sp_family = PF_ROUTE;
 	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
 	if (tag != NULL) {
 		family = (unsigned short *)(tag + 1);
 		route_proto.sp_protocol = *family;
 		m_tag_delete(m, tag);
 	} else
 		route_proto.sp_protocol = 0;
 
 	raw_input(m, &route_proto, &route_src, &route_dst);
 }
 
 /*
  * It really doesn't make any sense at all for this code to share much
  * with raw_usrreq.c, since its functionality is so restricted.  XXX
  */
 static void
 rts_abort(struct socket *so)
 {
 
 	raw_usrreqs.pru_abort(so);
 }
 
 static void
 rts_close(struct socket *so)
 {
 
 	raw_usrreqs.pru_close(so);
 }
 
 /* pru_accept is EOPNOTSUPP */
 
 static int
 rts_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct rawcb *rp;
 	int s, error;
 
 	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
 
 	/* XXX */
 	MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
 	if (rp == NULL)
 		return ENOBUFS;
 
 	/*
 	 * The splnet() is necessary to block protocols from sending
 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
 	 * this PCB is extant but incompletely initialized.
 	 * Probably we should try to do more of this work beforehand and
 	 * eliminate the spl.
 	 */
 	s = splnet();
 	so->so_pcb = (caddr_t)rp;
+	so->so_fibnum = td->td_proc->p_fibnum;
 	error = raw_attach(so, proto);
 	rp = sotorawcb(so);
 	if (error) {
 		splx(s);
 		so->so_pcb = NULL;
 		free(rp, M_PCB);
 		return error;
 	}
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
 		route_cb.ip_count++;
 		break;
 	case AF_INET6:
 		route_cb.ip6_count++;
 		break;
 	case AF_IPX:
 		route_cb.ipx_count++;
 		break;
 	}
 	rp->rcb_faddr = &route_src;
 	route_cb.any_count++;
 	RTSOCK_UNLOCK();
 	soisconnected(so);
 	so->so_options |= SO_USELOOPBACK;
 	splx(s);
 	return 0;
 }
 
 static int
 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
 }
 
 static int
 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
 }
 
 /* pru_connect2 is EOPNOTSUPP */
 /* pru_control is EOPNOTSUPP */
 
 static void
 rts_detach(struct socket *so)
 {
 	struct rawcb *rp = sotorawcb(so);
 
 	KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
 
 	RTSOCK_LOCK();
 	switch(rp->rcb_proto.sp_protocol) {
 	case AF_INET:
 		route_cb.ip_count--;
 		break;
 	case AF_INET6:
 		route_cb.ip6_count--;
 		break;
 	case AF_IPX:
 		route_cb.ipx_count--;
 		break;
 	}
 	route_cb.any_count--;
 	RTSOCK_UNLOCK();
 	raw_usrreqs.pru_detach(so);
 }
 
 static int
 rts_disconnect(struct socket *so)
 {
 
 	return (raw_usrreqs.pru_disconnect(so));
 }
 
 /* pru_listen is EOPNOTSUPP */
 
 static int
 rts_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 
 	return (raw_usrreqs.pru_peeraddr(so, nam));
 }
 
 /* pru_rcvd is EOPNOTSUPP */
 /* pru_rcvoob is EOPNOTSUPP */
 
 static int
 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 	 struct mbuf *control, struct thread *td)
 {
 
 	return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
 }
 
 /* pru_sense is null */
 
 static int
 rts_shutdown(struct socket *so)
 {
 
 	return (raw_usrreqs.pru_shutdown(so));
 }
 
 static int
 rts_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 
 	return (raw_usrreqs.pru_sockaddr(so, nam));
 }
 
 static struct pr_usrreqs route_usrreqs = {
 	.pru_abort =		rts_abort,
 	.pru_attach =		rts_attach,
 	.pru_bind =		rts_bind,
 	.pru_connect =		rts_connect,
 	.pru_detach =		rts_detach,
 	.pru_disconnect =	rts_disconnect,
 	.pru_peeraddr =		rts_peeraddr,
 	.pru_send =		rts_send,
 	.pru_shutdown =		rts_shutdown,
 	.pru_sockaddr =		rts_sockaddr,
 	.pru_close =		rts_close,
 };
 
 /*ARGSUSED*/
 static int
 route_output(struct mbuf *m, struct socket *so)
 {
 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
 	struct rt_msghdr *rtm = NULL;
 	struct rtentry *rt = NULL;
 	struct radix_node_head *rnh;
 	struct rt_addrinfo info;
 	int len, error = 0;
 	struct ifnet *ifp = NULL;
 	struct sockaddr_in jail;
 
 #define senderr(e) { error = e; goto flush;}
 	if (m == NULL || ((m->m_len < sizeof(long)) &&
 		       (m = m_pullup(m, sizeof(long))) == NULL))
 		return (ENOBUFS);
 	if ((m->m_flags & M_PKTHDR) == 0)
 		panic("route_output");
 	len = m->m_pkthdr.len;
 	if (len < sizeof(*rtm) ||
 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(EINVAL);
 	}
 	R_Malloc(rtm, struct rt_msghdr *, len);
 	if (rtm == NULL) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(ENOBUFS);
 	}
 	m_copydata(m, 0, len, (caddr_t)rtm);
 	if (rtm->rtm_version != RTM_VERSION) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(EPROTONOSUPPORT);
 	}
 	rtm->rtm_pid = curproc->p_pid;
 	bzero(&info, sizeof(info));
 	info.rti_addrs = rtm->rtm_addrs;
 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
 		info.rti_info[RTAX_DST] = NULL;
 		senderr(EINVAL);
 	}
 	info.rti_flags = rtm->rtm_flags;
 	if (info.rti_info[RTAX_DST] == NULL ||
 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
 	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
 		senderr(EINVAL);
 	if (info.rti_info[RTAX_GENMASK]) {
 		struct radix_node *t;
 		t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1);
 		if (t != NULL &&
 		    bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1,
 		    (char *)(void *)t->rn_key + 1,
 		    ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0)
 			info.rti_info[RTAX_GENMASK] =
 			    (struct sockaddr *)t->rn_key;
 		else
 			senderr(ENOBUFS);
 	}
 
 	/*
 	 * Verify that the caller has the appropriate privilege; RTM_GET
 	 * is the only operation the non-superuser is allowed.
 	 */
 	if (rtm->rtm_type != RTM_GET) {
 		error = priv_check(curthread, PRIV_NET_ROUTE);
 		if (error)
 			senderr(error);
 	}
 
 	switch (rtm->rtm_type) {
 		struct rtentry *saved_nrt;
 
 	case RTM_ADD:
 		if (info.rti_info[RTAX_GATEWAY] == NULL)
 			senderr(EINVAL);
 		saved_nrt = NULL;
-		error = rtrequest1(RTM_ADD, &info, &saved_nrt);
+		error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
+		    so->so_fibnum);
 		if (error == 0 && saved_nrt) {
 			RT_LOCK(saved_nrt);
 			rt_setmetrics(rtm->rtm_inits,
 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
 			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
 			RT_REMREF(saved_nrt);
 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
 			RT_UNLOCK(saved_nrt);
 		}
 		break;
 
 	case RTM_DELETE:
 		saved_nrt = NULL;
-		error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
+		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
+		    so->so_fibnum);
 		if (error == 0) {
 			RT_LOCK(saved_nrt);
 			rt = saved_nrt;
 			goto report;
 		}
 		break;
 
 	case RTM_GET:
 	case RTM_CHANGE:
 	case RTM_LOCK:
-		rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
+		rnh = rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family];
 		if (rnh == NULL)
 			senderr(EAFNOSUPPORT);
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
 			info.rti_info[RTAX_NETMASK], rnh);
 		if (rt == NULL) {	/* XXX looks bogus */
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 			senderr(ESRCH);
 		}
 #ifdef RADIX_MPATH
 		/*
 		 * for RTM_CHANGE/LOCK, if we got multipath routes,
 		 * we require users to specify a matching RTAX_GATEWAY.
 		 *
 		 * for RTM_GET, gate is optional even with multipath.
 		 * if gate == NULL the first match is returned.
 		 * (no need to call rt_mpath_matchgate if gate == NULL)
 		 */
 		if (rn_mpath_capable(rnh) &&
 		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
 			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
 			if (!rt) {
 				RADIX_NODE_HEAD_UNLOCK(rnh);
 				senderr(ESRCH);
 			}
 		}
 #endif
 		RT_LOCK(rt);
 		RT_ADDREF(rt);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 
 		/* 
 		 * Fix for PR: 82974
 		 *
 		 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
 		 * returns a perfect match in case a netmask is
 		 * specified.  For host routes only a longest prefix
 		 * match is returned so it is necessary to compare the
 		 * existence of the netmask.  If both have a netmask
 		 * rnh_lookup() did a perfect match and if none of them
 		 * have a netmask both are host routes which is also a
 		 * perfect match.
 		 */
 
 		if (rtm->rtm_type != RTM_GET && 
 		    (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
 			RT_UNLOCK(rt);
 			senderr(ESRCH);
 		}
 
 		switch(rtm->rtm_type) {
 
 		case RTM_GET:
 		report:
 			RT_LOCK_ASSERT(rt);
 			info.rti_info[RTAX_DST] = rt_key(rt);
 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 			info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
 				ifp = rt->rt_ifp;
 				if (ifp) {
 					info.rti_info[RTAX_IFP] =
 					    ifp->if_addr->ifa_addr;
 					if (jailed(so->so_cred)) {
 						bzero(&jail, sizeof(jail));
 						jail.sin_family = PF_INET;
 						jail.sin_len = sizeof(jail);
 						jail.sin_addr.s_addr =
 						htonl(prison_getip(so->so_cred));
 						info.rti_info[RTAX_IFA] =
 						    (struct sockaddr *)&jail;
 					} else
 						info.rti_info[RTAX_IFA] =
 						    rt->rt_ifa->ifa_addr;
 					if (ifp->if_flags & IFF_POINTOPOINT)
 						info.rti_info[RTAX_BRD] =
 						    rt->rt_ifa->ifa_dstaddr;
 					rtm->rtm_index = ifp->if_index;
 				} else {
 					info.rti_info[RTAX_IFP] = NULL;
 					info.rti_info[RTAX_IFA] = NULL;
 				}
 			} else if ((ifp = rt->rt_ifp) != NULL) {
 				rtm->rtm_index = ifp->if_index;
 			}
 			len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
 			if (len > rtm->rtm_msglen) {
 				struct rt_msghdr *new_rtm;
 				R_Malloc(new_rtm, struct rt_msghdr *, len);
 				if (new_rtm == NULL) {
 					RT_UNLOCK(rt);
 					senderr(ENOBUFS);
 				}
 				bcopy(rtm, new_rtm, rtm->rtm_msglen);
 				Free(rtm); rtm = new_rtm;
 			}
 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
 			rtm->rtm_flags = rt->rt_flags;
 			rtm->rtm_use = 0;
 			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
 			rtm->rtm_addrs = info.rti_addrs;
 			break;
 
 		case RTM_CHANGE:
 			/*
 			 * New gateway could require new ifaddr, ifp;
 			 * flags may also be different; ifp may be specified
 			 * by ll sockaddr when protocol address is ambiguous
 			 */
 			if (((rt->rt_flags & RTF_GATEWAY) &&
 			     info.rti_info[RTAX_GATEWAY] != NULL) ||
 			    info.rti_info[RTAX_IFP] != NULL ||
 			    (info.rti_info[RTAX_IFA] != NULL &&
 			     !sa_equal(info.rti_info[RTAX_IFA],
 				       rt->rt_ifa->ifa_addr))) {
 				RT_UNLOCK(rt);
-				if ((error = rt_getifa(&info)) != 0)
+				if ((error = rt_getifa_fib(&info,
+				    rt->rt_fibnum)) != 0)
 					senderr(error);
 				RT_LOCK(rt);
 			}
 			if (info.rti_ifa != NULL &&
 			    info.rti_ifa != rt->rt_ifa &&
 			    rt->rt_ifa != NULL &&
 			    rt->rt_ifa->ifa_rtrequest != NULL) {
 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
 				    &info);
 				IFAFREE(rt->rt_ifa);
 			}
 			if (info.rti_info[RTAX_GATEWAY] != NULL) {
 				if ((error = rt_setgate(rt, rt_key(rt),
 					info.rti_info[RTAX_GATEWAY])) != 0) {
 					RT_UNLOCK(rt);
 					senderr(error);
 				}
 				if (!(rt->rt_flags & RTF_LLINFO))
 					rt->rt_flags |= RTF_GATEWAY;
 			}
 			if (info.rti_ifa != NULL &&
 			    info.rti_ifa != rt->rt_ifa) {
 				IFAREF(info.rti_ifa);
 				rt->rt_ifa = info.rti_ifa;
 				rt->rt_ifp = info.rti_ifp;
 			}
 			/* Allow some flags to be toggled on change. */
 			if (rtm->rtm_fmask & RTF_FMASK)
 				rt->rt_flags = (rt->rt_flags &
 				    ~rtm->rtm_fmask) |
 				    (rtm->rtm_flags & rtm->rtm_fmask);
 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
 					&rt->rt_rmx);
 			rtm->rtm_index = rt->rt_ifp->if_index;
 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
 			if (info.rti_info[RTAX_GENMASK])
 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
 			/* FALLTHROUGH */
 		case RTM_LOCK:
 			/* We don't support locks anymore */
 			break;
 		}
 		RT_UNLOCK(rt);
 		break;
 
 	default:
 		senderr(EOPNOTSUPP);
 	}
 
 flush:
 	if (rtm) {
 		if (error)
 			rtm->rtm_errno = error;
 		else
 			rtm->rtm_flags |= RTF_DONE;
 	}
 	if (rt)		/* XXX can this be true? */
 		RTFREE(rt);
     {
 	struct rawcb *rp = NULL;
 	/*
 	 * Check to see if we don't want our own messages.
 	 */
 	if ((so->so_options & SO_USELOOPBACK) == 0) {
 		if (route_cb.any_count <= 1) {
 			if (rtm)
 				Free(rtm);
 			m_freem(m);
 			return (error);
 		}
 		/* There is another listener, so construct message */
 		rp = sotorawcb(so);
 	}
 	if (rtm) {
 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
 			m_freem(m);
 			m = NULL;
 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
 		Free(rtm);
 	}
 	if (m) {
 		if (rp) {
 			/*
 			 * XXX insure we don't get a copy by
 			 * invalidating our protocol
 			 */
 			unsigned short family = rp->rcb_proto.sp_family;
 			rp->rcb_proto.sp_family = 0;
 			rt_dispatch(m, info.rti_info[RTAX_DST]);
 			rp->rcb_proto.sp_family = family;
 		} else
 			rt_dispatch(m, info.rti_info[RTAX_DST]);
 	}
     }
 	return (error);
 #undef	sa_equal
 }
 
 static void
 rt_setmetrics(u_long which, const struct rt_metrics *in,
 	struct rt_metrics_lite *out)
 {
 #define metric(f, e) if (which & (f)) out->e = in->e;
 	/*
 	 * Only these are stored in the routing entry since introduction
 	 * of tcp hostcache. The rest is ignored.
 	 */
 	metric(RTV_MTU, rmx_mtu);
 	/* Userland -> kernel timebase conversion. */
 	if (which & RTV_EXPIRE)
 		out->rmx_expire = in->rmx_expire ?
 		    in->rmx_expire - time_second + time_uptime : 0;
 #undef metric
 }
 
 static void
 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
 {
 #define metric(e) out->e = in->e;
 	bzero(out, sizeof(*out));
 	metric(rmx_mtu);
 	/* Kernel -> userland timebase conversion. */
 	out->rmx_expire = in->rmx_expire ?
 	    in->rmx_expire - time_uptime + time_second : 0;
 #undef metric
 }
 
 /*
  * Extract the addresses of the passed sockaddrs.
  * Do a little sanity checking so as to avoid bad memory references.
  * This data is derived straight from userland.
  */
 static int
 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
 {
 	struct sockaddr *sa;
 	int i;
 
 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
 			continue;
 		sa = (struct sockaddr *)cp;
 		/*
 		 * It won't fit.
 		 */
 		if (cp + sa->sa_len > cplim)
 			return (EINVAL);
 		/*
 		 * there are no more.. quit now
 		 * If there are more bits, they are in error.
 		 * I've seen this. route(1) can evidently generate these. 
 		 * This causes kernel to core dump.
 		 * for compatibility, If we see this, point to a safe address.
 		 */
 		if (sa->sa_len == 0) {
 			rtinfo->rti_info[i] = &sa_zero;
 			return (0); /* should be EINVAL but for compat */
 		}
 		/* accept it */
 		rtinfo->rti_info[i] = sa;
 		cp += SA_SIZE(sa);
 	}
 	return (0);
 }
 
 static struct mbuf *
 rt_msg1(int type, struct rt_addrinfo *rtinfo)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	int i;
 	struct sockaddr *sa;
 	int len, dlen;
 
 	switch (type) {
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
 		len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_DELMADDR:
 	case RTM_NEWMADDR:
 		len = sizeof(struct ifma_msghdr);
 		break;
 
 	case RTM_IFINFO:
 		len = sizeof(struct if_msghdr);
 		break;
 
 	case RTM_IFANNOUNCE:
 	case RTM_IEEE80211:
 		len = sizeof(struct if_announcemsghdr);
 		break;
 
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
 	if (len > MCLBYTES)
 		panic("rt_msg1");
 	m = m_gethdr(M_DONTWAIT, MT_DATA);
 	if (m && len > MHLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			m = NULL;
 		}
 	}
 	if (m == NULL)
 		return (m);
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	rtm = mtod(m, struct rt_msghdr *);
 	bzero((caddr_t)rtm, len);
 	for (i = 0; i < RTAX_MAX; i++) {
 		if ((sa = rtinfo->rti_info[i]) == NULL)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
 		m_copyback(m, len, dlen, (caddr_t)sa);
 		len += dlen;
 	}
 	if (m->m_pkthdr.len != len) {
 		m_freem(m);
 		return (NULL);
 	}
 	rtm->rtm_msglen = len;
 	rtm->rtm_version = RTM_VERSION;
 	rtm->rtm_type = type;
 	return (m);
 }
 
 static int
 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
 {
 	int i;
 	int len, dlen, second_time = 0;
 	caddr_t cp0;
 
 	rtinfo->rti_addrs = 0;
 again:
 	switch (type) {
 
 	case RTM_DELADDR:
 	case RTM_NEWADDR:
 		len = sizeof(struct ifa_msghdr);
 		break;
 
 	case RTM_IFINFO:
 		len = sizeof(struct if_msghdr);
 		break;
 
 	case RTM_NEWMADDR:
 		len = sizeof(struct ifma_msghdr);
 		break;
 
 	default:
 		len = sizeof(struct rt_msghdr);
 	}
 	cp0 = cp;
 	if (cp0)
 		cp += len;
 	for (i = 0; i < RTAX_MAX; i++) {
 		struct sockaddr *sa;
 
 		if ((sa = rtinfo->rti_info[i]) == NULL)
 			continue;
 		rtinfo->rti_addrs |= (1 << i);
 		dlen = SA_SIZE(sa);
 		if (cp) {
 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
 			cp += dlen;
 		}
 		len += dlen;
 	}
 	len = ALIGN(len);
 	if (cp == NULL && w != NULL && !second_time) {
 		struct walkarg *rw = w;
 
 		if (rw->w_req) {
 			if (rw->w_tmemsize < len) {
 				if (rw->w_tmem)
 					free(rw->w_tmem, M_RTABLE);
 				rw->w_tmem = (caddr_t)
 					malloc(len, M_RTABLE, M_NOWAIT);
 				if (rw->w_tmem)
 					rw->w_tmemsize = len;
 			}
 			if (rw->w_tmem) {
 				cp = rw->w_tmem;
 				second_time = 1;
 				goto again;
 			}
 		}
 	}
 	if (cp) {
 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
 
 		rtm->rtm_version = RTM_VERSION;
 		rtm->rtm_type = type;
 		rtm->rtm_msglen = len;
 	}
 	return (len);
 }
 
 /*
  * This routine is called to generate a message from the routing
  * socket indicating that a redirect has occured, a routing lookup
  * has failed, or that a protocol has detected timeouts to a particular
  * destination.
  */
 void
 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
 
 	if (route_cb.any_count == 0)
 		return;
 	m = rt_msg1(type, rtinfo);
 	if (m == NULL)
 		return;
 	rtm = mtod(m, struct rt_msghdr *);
 	rtm->rtm_flags = RTF_DONE | flags;
 	rtm->rtm_errno = error;
 	rtm->rtm_addrs = rtinfo->rti_addrs;
 	rt_dispatch(m, sa);
 }
 
 /*
  * This routine is called to generate a message from the routing
  * socket indicating that the status of a network interface has changed.
  */
 void
 rt_ifmsg(struct ifnet *ifp)
 {
 	struct if_msghdr *ifm;
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	if (route_cb.any_count == 0)
 		return;
 	bzero((caddr_t)&info, sizeof(info));
 	m = rt_msg1(RTM_IFINFO, &info);
 	if (m == NULL)
 		return;
 	ifm = mtod(m, struct if_msghdr *);
 	ifm->ifm_index = ifp->if_index;
 	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 	ifm->ifm_data = ifp->if_data;
 	ifm->ifm_addrs = 0;
 	rt_dispatch(m, NULL);
 }
 
 /*
  * This is called to generate messages from the routing socket
  * indicating a network interface has had addresses associated with it.
  * if we ever reverse the logic and replace messages TO the routing
  * socket indicate a request to configure interfaces, then it will
  * be unnecessary as the routing socket will automatically generate
  * copies of it.
  */
 void
 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
 {
 	struct rt_addrinfo info;
 	struct sockaddr *sa = NULL;
 	int pass;
 	struct mbuf *m = NULL;
 	struct ifnet *ifp = ifa->ifa_ifp;
 
 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
 		("unexpected cmd %u", cmd));
 #ifdef SCTP
 	/*
 	 * notify the SCTP stack
 	 * this will only get called when an address is added/deleted
 	 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
 	 */
 	sctp_addr_change(ifa, cmd);
 #endif /* SCTP */
 	if (route_cb.any_count == 0)
 		return;
 	for (pass = 1; pass < 3; pass++) {
 		bzero((caddr_t)&info, sizeof(info));
 		if ((cmd == RTM_ADD && pass == 1) ||
 		    (cmd == RTM_DELETE && pass == 2)) {
 			struct ifa_msghdr *ifam;
 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
 
 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
 			info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			if ((m = rt_msg1(ncmd, &info)) == NULL)
 				continue;
 			ifam = mtod(m, struct ifa_msghdr *);
 			ifam->ifam_index = ifp->if_index;
 			ifam->ifam_metric = ifa->ifa_metric;
 			ifam->ifam_flags = ifa->ifa_flags;
 			ifam->ifam_addrs = info.rti_addrs;
 		}
 		if ((cmd == RTM_ADD && pass == 2) ||
 		    (cmd == RTM_DELETE && pass == 1)) {
 			struct rt_msghdr *rtm;
 
 			if (rt == NULL)
 				continue;
 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			if ((m = rt_msg1(cmd, &info)) == NULL)
 				continue;
 			rtm = mtod(m, struct rt_msghdr *);
 			rtm->rtm_index = ifp->if_index;
 			rtm->rtm_flags |= rt->rt_flags;
 			rtm->rtm_errno = error;
 			rtm->rtm_addrs = info.rti_addrs;
 		}
 		rt_dispatch(m, sa);
 	}
 }
 
 /*
  * This is the analogue to the rt_newaddrmsg which performs the same
  * function but for multicast group memberhips.  This is easier since
  * there is no route state to worry about.
  */
 void
 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 {
 	struct rt_addrinfo info;
 	struct mbuf *m = NULL;
 	struct ifnet *ifp = ifma->ifma_ifp;
 	struct ifma_msghdr *ifmam;
 
 	if (route_cb.any_count == 0)
 		return;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 	info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
 	/*
 	 * If a link-layer address is present, present it as a ``gateway''
 	 * (similarly to how ARP entries, e.g., are presented).
 	 */
 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
 	m = rt_msg1(cmd, &info);
 	if (m == NULL)
 		return;
 	ifmam = mtod(m, struct ifma_msghdr *);
 	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
 	    __func__));
 	ifmam->ifmam_index = ifp->if_index;
 	ifmam->ifmam_addrs = info.rti_addrs;
 	rt_dispatch(m, ifma->ifma_addr);
 }
 
 static struct mbuf *
 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
 	struct rt_addrinfo *info)
 {
 	struct if_announcemsghdr *ifan;
 	struct mbuf *m;
 
 	if (route_cb.any_count == 0)
 		return NULL;
 	bzero((caddr_t)info, sizeof(*info));
 	m = rt_msg1(type, info);
 	if (m != NULL) {
 		ifan = mtod(m, struct if_announcemsghdr *);
 		ifan->ifan_index = ifp->if_index;
 		strlcpy(ifan->ifan_name, ifp->if_xname,
 			sizeof(ifan->ifan_name));
 		ifan->ifan_what = what;
 	}
 	return m;
 }
 
 /*
  * This is called to generate routing socket messages indicating
  * IEEE80211 wireless events.
  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
  */
 void
 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
 {
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
 	if (m != NULL) {
 		/*
 		 * Append the ieee80211 data.  Try to stick it in the
 		 * mbuf containing the ifannounce msg; otherwise allocate
 		 * a new mbuf and append.
 		 *
 		 * NB: we assume m is a single mbuf.
 		 */
 		if (data_len > M_TRAILINGSPACE(m)) {
 			struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
 			if (n == NULL) {
 				m_freem(m);
 				return;
 			}
 			bcopy(data, mtod(n, void *), data_len);
 			n->m_len = data_len;
 			m->m_next = n;
 		} else if (data_len > 0) {
 			bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
 			m->m_len += data_len;
 		}
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len += data_len;
 		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
 		rt_dispatch(m, NULL);
 	}
 }
 
 /*
  * This is called to generate routing socket messages indicating
  * network interface arrival and departure.
  */
 void
 rt_ifannouncemsg(struct ifnet *ifp, int what)
 {
 	struct mbuf *m;
 	struct rt_addrinfo info;
 
 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
 	if (m != NULL)
 		rt_dispatch(m, NULL);
 }
 
 static void
 rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
 {
 	struct m_tag *tag;
 
 	/*
 	 * Preserve the family from the sockaddr, if any, in an m_tag for
 	 * use when injecting the mbuf into the routing socket buffer from
 	 * the netisr.
 	 */
 	if (sa != NULL) {
 		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
 		    M_NOWAIT);
 		if (tag == NULL) {
 			m_freem(m);
 			return;
 		}
 		*(unsigned short *)(tag + 1) = sa->sa_family;
 		m_tag_prepend(m, tag);
 	}
 	netisr_queue(NETISR_ROUTE, m);	/* mbuf is free'd on failure. */
 }
 
 /*
  * This is used in dumping the kernel table via sysctl().
  */
 static int
 sysctl_dumpentry(struct radix_node *rn, void *vw)
 {
 	struct walkarg *w = vw;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int error = 0, size;
 	struct rt_addrinfo info;
 
 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
 		return 0;
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
 	if (rt->rt_ifp) {
 		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
 	}
 	size = rt_msg2(RTM_GET, &info, NULL, w);
 	if (w->w_req && w->w_tmem) {
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
 		rtm->rtm_flags = rt->rt_flags;
 		rtm->rtm_use = rt->rt_rmx.rmx_pksent;
 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
 		rtm->rtm_addrs = info.rti_addrs;
 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
 		return (error);
 	}
 	return (error);
 }
 
 static int
 sysctl_iflist(int af, struct walkarg *w)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct rt_addrinfo info;
 	int len, error = 0;
 
 	bzero((caddr_t)&info, sizeof(info));
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
 		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
 		info.rti_info[RTAX_IFP] = NULL;
 		if (w->w_req && w->w_tmem) {
 			struct if_msghdr *ifm;
 
 			ifm = (struct if_msghdr *)w->w_tmem;
 			ifm->ifm_index = ifp->if_index;
 			ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 			ifm->ifm_data = ifp->if_data;
 			ifm->ifm_addrs = info.rti_addrs;
 			error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
 			if (error)
 				goto done;
 		}
 		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
 			if (af && af != ifa->ifa_addr->sa_family)
 				continue;
 			if (jailed(curthread->td_ucred) &&
 			    prison_if(curthread->td_ucred, ifa->ifa_addr))
 				continue;
 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
 			if (w->w_req && w->w_tmem) {
 				struct ifa_msghdr *ifam;
 
 				ifam = (struct ifa_msghdr *)w->w_tmem;
 				ifam->ifam_index = ifa->ifa_ifp->if_index;
 				ifam->ifam_flags = ifa->ifa_flags;
 				ifam->ifam_metric = ifa->ifa_metric;
 				ifam->ifam_addrs = info.rti_addrs;
 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
 				if (error)
 					goto done;
 			}
 		}
 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
 			info.rti_info[RTAX_BRD] = NULL;
 	}
 done:
 	IFNET_RUNLOCK();
 	return (error);
 }
 
 int
 sysctl_ifmalist(int af, struct walkarg *w)
 {
 	struct ifnet *ifp;
 	struct ifmultiaddr *ifma;
 	struct	rt_addrinfo info;
 	int	len, error = 0;
 	struct ifaddr *ifa;
 
 	bzero((caddr_t)&info, sizeof(info));
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
 		if (w->w_arg && w->w_arg != ifp->if_index)
 			continue;
 		ifa = ifp->if_addr;
 		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
 		IF_ADDR_LOCK(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (af && af != ifma->ifma_addr->sa_family)
 				continue;
 			if (jailed(curproc->p_ucred) &&
 			    prison_if(curproc->p_ucred, ifma->ifma_addr))
 				continue;
 			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
 			info.rti_info[RTAX_GATEWAY] =
 			    (ifma->ifma_addr->sa_family != AF_LINK) ?
 			    ifma->ifma_lladdr : NULL;
 			len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
 			if (w->w_req && w->w_tmem) {
 				struct ifma_msghdr *ifmam;
 
 				ifmam = (struct ifma_msghdr *)w->w_tmem;
 				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
 				ifmam->ifmam_flags = 0;
 				ifmam->ifmam_addrs = info.rti_addrs;
 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
 				if (error) {
 					IF_ADDR_UNLOCK(ifp);
 					goto done;
 				}
 			}
 		}
 		IF_ADDR_UNLOCK(ifp);
 	}
 done:
 	IFNET_RUNLOCK();
 	return (error);
 }
 
 static int
 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
 {
 	int	*name = (int *)arg1;
 	u_int	namelen = arg2;
 	struct radix_node_head *rnh;
 	int	i, lim, error = EINVAL;
 	u_char	af;
 	struct	walkarg w;
 
 	name ++;
 	namelen--;
 	if (req->newptr)
 		return (EPERM);
 	if (namelen != 3)
 		return ((namelen < 3) ? EISDIR : ENOTDIR);
 	af = name[0];
 	if (af > AF_MAX)
 		return (EINVAL);
 	bzero(&w, sizeof(w));
 	w.w_op = name[1];
 	w.w_arg = name[2];
 	w.w_req = req;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 	switch (w.w_op) {
 
 	case NET_RT_DUMP:
 	case NET_RT_FLAGS:
 		if (af == 0) {			/* dump all tables */
 			i = 1;
 			lim = AF_MAX;
 		} else				/* dump only one table */
 			i = lim = af;
 		for (error = 0; error == 0 && i <= lim; i++)
-			if ((rnh = rt_tables[i]) != NULL) {
+			if ((rnh = rt_tables[curthread->td_proc->p_fibnum][i]) != NULL) {
 				RADIX_NODE_HEAD_LOCK(rnh); 
 			    	error = rnh->rnh_walktree(rnh,
 				    sysctl_dumpentry, &w);
 				RADIX_NODE_HEAD_UNLOCK(rnh);
 			} else if (af != 0)
 				error = EAFNOSUPPORT;
 		break;
 
 	case NET_RT_IFLIST:
 		error = sysctl_iflist(af, &w);
 		break;
 
 	case NET_RT_IFMALIST:
 		error = sysctl_ifmalist(af, &w);
 		break;
 	}
 	if (w.w_tmem)
 		free(w.w_tmem, M_RTABLE);
 	return (error);
 }
 
 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
 
 /*
  * Definitions of protocols supported in the ROUTE domain.
  */
 
 static struct domain routedomain;		/* or at least forward */
 
 static struct protosw routesw[] = {
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&routedomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_output =		route_output,
 	.pr_ctlinput =		raw_ctlinput,
 	.pr_init =		raw_init,
 	.pr_usrreqs =		&route_usrreqs
 }
 };
 
 static struct domain routedomain = {
 	.dom_family =		PF_ROUTE,
 	.dom_name =		 "route",
 	.dom_protosw =		routesw,
 	.dom_protoswNPROTOSW =	&routesw[sizeof(routesw)/sizeof(routesw[0])]
 };
 
 DOMAIN_SET(route);
Index: head/sys/netatalk/at_extern.h
===================================================================
--- head/sys/netatalk/at_extern.h	(revision 178887)
+++ head/sys/netatalk/at_extern.h	(revision 178888)
@@ -1,64 +1,65 @@
 /*-
  * Copyright (c) 1990,1994 Regents of The University of Michigan.
  * All Rights Reserved.
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby granted,
  * provided that the above copyright notice appears in all copies and
  * that both that copyright notice and this permission notice appear
  * in supporting documentation, and that the name of The University
  * of Michigan not be used in advertising or publicity pertaining to
  * distribution of the software without specific, written prior
  * permission. This software is supplied as is without expressed or
  * implied warranties of any kind.
  *
  * This product includes software developed by the University of
  * California, Berkeley and its contributors.
  *
  *	Research Systems Unix Group
  *	The University of Michigan
  *	c/o Wesley Craig
  *	535 W. William Street
  *	Ann Arbor, Michigan
  *	+1-313-764-2278
  *	netatalk@umich.edu
  *
  * $FreeBSD$
  */
 
 #ifndef _NETATALK_AT_EXTERN_H_
 #define	_NETATALK_AT_EXTERN_H_
 
 extern struct mtx	aarptab_mtx;
 
 #define	AARPTAB_LOCK()		mtx_lock(&aarptab_mtx)
 #define	AARPTAB_UNLOCK()	mtx_unlock(&aarptab_mtx)
 #define	AARPTAB_LOCK_ASSERT()	mtx_assert(&aarptab_mtx, MA_OWNED)
 #define	AARPTAB_UNLOCK_ASSERT()	mtx_assert(&aarptab_mtx, MA_NOTOWNED)
 
 struct at_ifaddr;
 struct ifnet;
 struct mbuf;
 struct route;
 struct thread;
 struct sockaddr_at;
 struct socket;
 void		 aarpintr(struct mbuf *);
 void		 aarpprobe(void *arg);
 int		 aarpresolve(struct ifnet *, struct mbuf *,
 		    struct sockaddr_at *, u_char *);
 void		 aarp_clean(void);
 void		 at1intr(struct mbuf *);
 void		 at2intr(struct mbuf *);
 int		 at_broadcast(struct sockaddr_at  *);
 u_short		 at_cksum(struct mbuf *m, int skip);
 int		 at_control(struct socket *so, u_long cmd, caddr_t data,
 		    struct ifnet *ifp, struct thread *td);
 struct at_ifaddr	*at_ifawithnet(struct sockaddr_at *);
+int		 at_inithead(void**, int);
 void		 ddp_init(void);
 int		 ddp_output(struct mbuf *m, struct socket *so); 
 int		 ddp_route(struct mbuf *m, struct route *ro);
 struct ddpcb	*ddp_search(struct sockaddr_at *, struct sockaddr_at *,
 		    struct at_ifaddr *);
 
 #endif /* !_NETATALK_AT_EXTERN_H_ */
Index: head/sys/netatalk/at_proto.c
===================================================================
--- head/sys/netatalk/at_proto.c	(revision 178887)
+++ head/sys/netatalk/at_proto.c	(revision 178888)
@@ -1,64 +1,64 @@
 /*-
  * Copyright (c) 1990, 1991 Regents of The University of Michigan.
  * All Rights Reserved.
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby granted,
  * provided that the above copyright notice appears in all copies and
  * that both that copyright notice and this permission notice appear
  * in supporting documentation, and that the name of The University
  * of Michigan not be used in advertising or publicity pertaining to
  * distribution of the software without specific, written prior
  * permission. This software is supplied as is without expressed or
  * implied warranties of any kind.
  *
  *	Research Systems Unix Group
  *	The University of Michigan
  *	c/o Mike Clark
  *	535 W. William Street
  *	Ann Arbor, Michigan
  *	+1-313-763-0525
  *	netatalk@itd.umich.edu
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/socket.h>
 
 #include <sys/kernel.h>
 
 #include <net/route.h>
 
 #include <netatalk/at.h>
 #include <netatalk/ddp_var.h>
 #include <netatalk/at_extern.h>
 
 static struct domain	atalkdomain;
 
 static struct protosw	atalksw[] = {
 	{
 		/* Identifiers */
 		.pr_type =		SOCK_DGRAM,
 		.pr_domain =		&atalkdomain,
 		.pr_protocol =		ATPROTO_DDP,
 		.pr_flags =		PR_ATOMIC|PR_ADDR,
 		.pr_output =		ddp_output,
 		.pr_init =		ddp_init,
 		.pr_usrreqs =		&ddp_usrreqs,
 	},
 };
 
 static struct domain	atalkdomain = {
 	.dom_family =		AF_APPLETALK,
 	.dom_name =		"appletalk",
 	.dom_protosw =		atalksw,
 	.dom_protoswNPROTOSW =	&atalksw[sizeof(atalksw)/sizeof(atalksw[0])],
-	.dom_rtattach =		rn_inithead,
+	.dom_rtattach =		at_inithead,
 	.dom_rtoffset =		offsetof(struct sockaddr_at, sat_addr) << 3,
 	.dom_maxrtkey =		sizeof(struct sockaddr_at),
 };
 
 DOMAIN_SET(atalk);
Index: head/sys/netgraph/netflow/netflow.c
===================================================================
--- head/sys/netgraph/netflow/netflow.c	(revision 178887)
+++ head/sys/netgraph/netflow/netflow.c	(revision 178888)
@@ -1,716 +1,718 @@
 /*-
  * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $
  */
 
 static const char rcs_id[] =
     "@(#) $FreeBSD$";
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 
 #include <machine/atomic.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 
 #include <netgraph/netflow/netflow.h>
 #include <netgraph/netflow/ng_netflow.h>
 
 #define	NBUCKETS	(65536)		/* must be power of 2 */
 
 /* This hash is for TCP or UDP packets. */
 #define FULL_HASH(addr1, addr2, port1, port2)	\
 	(((addr1 ^ (addr1 >> 16) ^ 		\
 	htons(addr2 ^ (addr2 >> 16))) ^ 	\
 	port1 ^ htons(port2)) &			\
 	(NBUCKETS - 1))
 
 /* This hash is for all other IP packets. */
 #define ADDR_HASH(addr1, addr2)			\
 	((addr1 ^ (addr1 >> 16) ^ 		\
 	htons(addr2 ^ (addr2 >> 16))) &		\
 	(NBUCKETS - 1))
 
 /* Macros to shorten logical constructions */
 /* XXX: priv must exist in namespace */
 #define	INACTIVE(fle)	(time_uptime - fle->f.last > priv->info.nfinfo_inact_t)
 #define	AGED(fle)	(time_uptime - fle->f.first > priv->info.nfinfo_act_t)
 #define	ISFREE(fle)	(fle->f.packets == 0)
 
 /*
  * 4 is a magical number: statistically number of 4-packet flows is
  * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP
  * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case
  * of reachable host and 4-packet otherwise.
  */
 #define	SMALL(fle)	(fle->f.packets <= 4)
 
 /*
  * Cisco uses milliseconds for uptime. Bad idea, since it overflows
  * every 48+ days. But we will do same to keep compatibility. This macro
  * does overflowable multiplication to 1000.
  */
 #define	MILLIUPTIME(t)	(((t) << 9) +	/* 512 */	\
 			 ((t) << 8) +	/* 256 */	\
 			 ((t) << 7) +	/* 128 */	\
 			 ((t) << 6) +	/* 64  */	\
 			 ((t) << 5) +	/* 32  */	\
 			 ((t) << 3))	/* 8   */
 
 MALLOC_DECLARE(M_NETFLOW_HASH);
 MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash");
 
 static int export_add(item_p, struct flow_entry *);
 static int export_send(priv_p, item_p, int flags);
 
 /* Generate hash for a given flow record. */
 static __inline uint32_t
 ip_hash(struct flow_rec *r)
 {
 	switch (r->r_ip_p) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr,
 		    r->r_sport, r->r_dport);
 	default:
 		return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr);
 	}
 }
 
 /* This is callback from uma(9), called on alloc. */
 static int
 uma_ctor_flow(void *mem, int size, void *arg, int how)
 {
 	priv_p priv = (priv_p )arg;
 
 	if (atomic_load_acq_32(&priv->info.nfinfo_used) >= CACHESIZE)
 		return (ENOMEM);
 
 	atomic_add_32(&priv->info.nfinfo_used, 1);
 
 	return (0);
 }
 
 /* This is callback from uma(9), called on free. */
 static void
 uma_dtor_flow(void *mem, int size, void *arg)
 {
 	priv_p priv = (priv_p )arg;
 
 	atomic_subtract_32(&priv->info.nfinfo_used, 1);
 }
 
 /*
  * Detach export datagram from priv, if there is any.
  * If there is no, allocate a new one.
  */
 static item_p
 get_export_dgram(priv_p priv)
 {
 	item_p	item = NULL;
 
 	mtx_lock(&priv->export_mtx);
 	if (priv->export_item != NULL) {
 		item = priv->export_item;
 		priv->export_item = NULL;
 	}
 	mtx_unlock(&priv->export_mtx);
 
 	if (item == NULL) {
 		struct netflow_v5_export_dgram *dgram;
 		struct mbuf *m;
 
 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL)
 			return (NULL);
 		item = ng_package_data(m, NG_NOFLAGS);
 		if (item == NULL)
 			return (NULL);
 		dgram = mtod(m, struct netflow_v5_export_dgram *);
 		dgram->header.count = 0;
 		dgram->header.version = htons(NETFLOW_V5);
 
 	}
 
 	return (item);
 }
 
 /*
  * Re-attach incomplete datagram back to priv.
  * If there is already another one, then send incomplete. */
 static void
 return_export_dgram(priv_p priv, item_p item, int flags)
 {
 	/*
 	 * It may happen on SMP, that some thread has already
 	 * put its item there, in this case we bail out and
 	 * send what we have to collector.
 	 */
 	mtx_lock(&priv->export_mtx);
 	if (priv->export_item == NULL) {
 		priv->export_item = item;
 		mtx_unlock(&priv->export_mtx);
 	} else {
 		mtx_unlock(&priv->export_mtx);
 		export_send(priv, item, flags);
 	}
 }
 
 /*
  * The flow is over. Call export_add() and free it. If datagram is
  * full, then call export_send().
  */
 static __inline void
 expire_flow(priv_p priv, item_p *item, struct flow_entry *fle, int flags)
 {
 	if (*item == NULL)
 		*item = get_export_dgram(priv);
 	if (*item == NULL) {
 		atomic_add_32(&priv->info.nfinfo_export_failed, 1);
 		uma_zfree_arg(priv->zone, fle, priv);
 		return;
 	}
 	if (export_add(*item, fle) > 0) {
 		export_send(priv, *item, flags);
 		*item = NULL;
 	}
 	uma_zfree_arg(priv->zone, fle, priv);
 }
 
 /* Get a snapshot of node statistics */
 void
 ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i)
 {
 	/* XXX: atomic */
 	memcpy((void *)i, (void *)&priv->info, sizeof(priv->info));
 }
 
 /*
  * Insert a record into defined slot.
  *
  * First we get for us a free flow entry, then fill in all
  * possible fields in it.
  *
  * TODO: consider dropping hash mutex while filling in datagram,
  * as this was done in previous version. Need to test & profile
  * to be sure.
  */
 static __inline int
 hash_insert(priv_p priv, struct flow_hash_entry  *hsh, struct flow_rec *r,
 	int plen, uint8_t tcp_flags)
 {
 	struct flow_entry *fle;
 	struct sockaddr_in sin;
 	struct rtentry *rt;
 
 	mtx_assert(&hsh->mtx, MA_OWNED);
 
 	fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT);
 	if (fle == NULL) {
 		atomic_add_32(&priv->info.nfinfo_alloc_failed, 1);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Now fle is totally ours. It is detached from all lists,
 	 * we can safely edit it.
 	 */
 
 	bcopy(r, &fle->f.r, sizeof(struct flow_rec));
 	fle->f.bytes = plen;
 	fle->f.packets = 1;
 	fle->f.tcp_flags = tcp_flags;
 
 	fle->f.first = fle->f.last = time_uptime;
 
 	/*
 	 * First we do route table lookup on destination address. So we can
 	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
 	 */
 	bzero(&sin, sizeof(sin));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr = fle->f.r.r_dst;
-	rt = rtalloc1((struct sockaddr *)&sin, 0, RTF_CLONING);
+	/* XXX MRT 0 as a default.. need the m here to get fib */
+	rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0);
 	if (rt != NULL) {
 		fle->f.fle_o_ifx = rt->rt_ifp->if_index;
 
 		if (rt->rt_flags & RTF_GATEWAY &&
 		    rt->rt_gateway->sa_family == AF_INET)
 			fle->f.next_hop =
 			    ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr;
 
 		if (rt_mask(rt))
 			fle->f.dst_mask = bitcount32(((struct sockaddr_in *)
 			    rt_mask(rt))->sin_addr.s_addr);
 		else if (rt->rt_flags & RTF_HOST)
 			/* Give up. We can't determine mask :( */
 			fle->f.dst_mask = 32;
 
 		RTFREE_LOCKED(rt);
 	}
 
 	/* Do route lookup on source address, to fill in src_mask. */
 	bzero(&sin, sizeof(sin));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr = fle->f.r.r_src;
-	rt = rtalloc1((struct sockaddr *)&sin, 0, RTF_CLONING);
+	/* XXX MRT 0 as a default  revisit.  need the mbuf for fib*/
+	rt = rtalloc1_fib((struct sockaddr *)&sin, 0, RTF_CLONING, 0);
 	if (rt != NULL) {
 		if (rt_mask(rt))
 			fle->f.src_mask = bitcount32(((struct sockaddr_in *)
 			    rt_mask(rt))->sin_addr.s_addr);
 		else if (rt->rt_flags & RTF_HOST)
 			/* Give up. We can't determine mask :( */
 			fle->f.src_mask = 32;
 
 		RTFREE_LOCKED(rt);
 	}
 
 	/* Push new flow at the and of hash. */
 	TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 
 	return (0);
 }
 
 
 /*
  * Non-static functions called from ng_netflow.c
  */
 
 /* Allocate memory and set up flow cache */
 int
 ng_netflow_cache_init(priv_p priv)
 {
 	struct flow_hash_entry	*hsh;
 	int i;
 
 	/* Initialize cache UMA zone. */
 	priv->zone = uma_zcreate("NetFlow cache", sizeof(struct flow_entry),
 	    uma_ctor_flow, uma_dtor_flow, NULL, NULL, UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(priv->zone, CACHESIZE);
 
 	/* Allocate hash. */
 	MALLOC(priv->hash, struct flow_hash_entry *,
 	    NBUCKETS * sizeof(struct flow_hash_entry),
 	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
 
 	if (priv->hash == NULL) {
 		uma_zdestroy(priv->zone);
 		return (ENOMEM);
 	}
 
 	/* Initialize hash. */
 	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) {
 		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
 		TAILQ_INIT(&hsh->head);
 	}
 
 	mtx_init(&priv->export_mtx, "export dgram lock", NULL, MTX_DEF);
 
 	return (0);
 }
 
 /* Free all flow cache memory. Called from node close method. */
 void
 ng_netflow_cache_flush(priv_p priv)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	item_p			item = NULL;
 	int i;
 
 	/*
 	 * We are going to free probably billable data.
 	 * Expire everything before freeing it.
 	 * No locking is required since callout is already drained.
 	 */
 	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++)
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, &item, fle, NG_QUEUE);
 		}
 
 	if (item != NULL)
 		export_send(priv, item, NG_QUEUE);
 
 	uma_zdestroy(priv->zone);
 
 	/* Destroy hash mutexes. */
 	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++)
 		mtx_destroy(&hsh->mtx);
 
 	/* Free hash memory. */
 	if (priv->hash)
 		FREE(priv->hash, M_NETFLOW_HASH);
 
 	mtx_destroy(&priv->export_mtx);
 }
 
 /* Insert packet from into flow cache. */
 int
 ng_netflow_flow_add(priv_p priv, struct ip *ip, iface_p iface,
 	struct ifnet *ifp)
 {
 	register struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry		*hsh;
 	struct flow_rec		r;
 	item_p			item = NULL;
 	int			hlen, plen;
 	int			error = 0;
 	uint8_t			tcp_flags = 0;
 
 	/* Try to fill flow_rec r */
 	bzero(&r, sizeof(r));
 	/* check version */
 	if (ip->ip_v != IPVERSION)
 		return (EINVAL);
 
 	/* verify min header length */
 	hlen = ip->ip_hl << 2;
 
 	if (hlen < sizeof(struct ip))
 		return (EINVAL);
 
 	r.r_src = ip->ip_src;
 	r.r_dst = ip->ip_dst;
 
 	/* save packet length */
 	plen = ntohs(ip->ip_len);
 
 	r.r_ip_p = ip->ip_p;
 	r.r_tos = ip->ip_tos;
 
 	/* Configured in_ifx overrides mbuf's */
 	if (iface->info.ifinfo_index == 0) {
 		if (ifp != NULL)
 			r.r_i_ifx = ifp->if_index;
 	} else
 		r.r_i_ifx = iface->info.ifinfo_index;
 
 	/*
 	 * XXX NOTE: only first fragment of fragmented TCP, UDP and
 	 * ICMP packet will be recorded with proper s_port and d_port.
 	 * Following fragments will be recorded simply as IP packet with
 	 * ip_proto = ip->ip_p and s_port, d_port set to zero.
 	 * I know, it looks like bug. But I don't want to re-implement
 	 * ip packet assebmling here. Anyway, (in)famous trafd works this way -
 	 * and nobody complains yet :)
 	 */
 	if ((ip->ip_off & htons(IP_OFFMASK)) == 0)
 		switch(r.r_ip_p) {
 		case IPPROTO_TCP:
 		{
 			register struct tcphdr *tcp;
 
 			tcp = (struct tcphdr *)((caddr_t )ip + hlen);
 			r.r_sport = tcp->th_sport;
 			r.r_dport = tcp->th_dport;
 			tcp_flags = tcp->th_flags;
 			break;
 		}
 			case IPPROTO_UDP:
 			r.r_ports = *(uint32_t *)((caddr_t )ip + hlen);
 			break;
 		}
 
 	/* Update node statistics. XXX: race... */
 	priv->info.nfinfo_packets ++;
 	priv->info.nfinfo_bytes += plen;
 
 	/* Find hash slot. */
 	hsh = &priv->hash[ip_hash(&r)];
 
 	mtx_lock(&hsh->mtx);
 
 	/*
 	 * Go through hash and find our entry. If we encounter an
 	 * entry, that should be expired, purge it. We do a reverse
 	 * search since most active entries are first, and most
 	 * searches are done on most active entries.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
 		if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0)
 			break;
 		if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, &item, fle, NG_QUEUE);
 			atomic_add_32(&priv->info.nfinfo_act_exp, 1);
 		}
 	}
 
 	if (fle) {			/* An existent entry. */
 
 		fle->f.bytes += plen;
 		fle->f.packets ++;
 		fle->f.tcp_flags |= tcp_flags;
 		fle->f.last = time_uptime;
 
 		/*
 		 * We have the following reasons to expire flow in active way:
 		 * - it hit active timeout
 		 * - a TCP connection closed
 		 * - it is going to overflow counter
 		 */
 		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) ||
 		    (fle->f.bytes >= (UINT_MAX - IF_MAXMTU)) ) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, &item, fle, NG_QUEUE);
 			atomic_add_32(&priv->info.nfinfo_act_exp, 1);
 		} else {
 			/*
 			 * It is the newest, move it to the tail,
 			 * if it isn't there already. Next search will
 			 * locate it quicker.
 			 */
 			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 			}
 		}
 	} else				/* A new flow entry. */
 		error = hash_insert(priv, hsh, &r, plen, tcp_flags);
 
 	mtx_unlock(&hsh->mtx);
 
 	if (item != NULL)
 		return_export_dgram(priv, item, NG_QUEUE);
 
 	return (error);
 }
 
 /*
  * Return records from cache to userland.
  *
  * TODO: matching particular IP should be done in kernel, here.
  */
 int
 ng_netflow_flow_show(priv_p priv, uint32_t last, struct ng_mesg *resp)
 {
 	struct flow_hash_entry *hsh;
 	struct flow_entry *fle;
 	struct ngnf_flows *data;
 	int i;
 
 	data = (struct ngnf_flows *)resp->data;
 	data->last = 0;
 	data->nentries = 0;
 
 	/* Check if this is a first run */
 	if (last == 0) {
 		hsh = priv->hash;
 		i = 0;
 	} else {
 		if (last > NBUCKETS-1)
 			return (EINVAL);
 		hsh = priv->hash + last;
 		i = last;
 	}
 
 	/*
 	 * We will transfer not more than NREC_AT_ONCE. More data
 	 * will come in next message.
 	 * We send current hash index to userland, and userland should
 	 * return it back to us. Then, we will restart with new entry.
 	 *
 	 * The resulting cache snapshot is inaccurate for the
 	 * following reasons:
 	 *  - we skip locked hash entries
 	 *  - we bail out, if someone wants our entry
 	 *  - we skip rest of entry, when hit NREC_AT_ONCE
 	 */
 	for (; i < NBUCKETS; hsh++, i++) {
 		if (mtx_trylock(&hsh->mtx) == 0)
 			continue;
 
 		TAILQ_FOREACH(fle, &hsh->head, fle_hash) {
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
 				break;
 
 			bcopy(&fle->f, &(data->entries[data->nentries]),
 			    sizeof(fle->f));
 			data->nentries++;
 			if (data->nentries == NREC_AT_ONCE) {
 				mtx_unlock(&hsh->mtx);
 				if (++i < NBUCKETS)
 					data->last = i;
 				return (0);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 
 	return (0);
 }
 
 /* We have full datagram in privdata. Send it to export hook. */
 static int
 export_send(priv_p priv, item_p item, int flags)
 {
 	struct mbuf *m = NGI_M(item);
 	struct netflow_v5_export_dgram *dgram = mtod(m,
 					struct netflow_v5_export_dgram *);
 	struct netflow_v5_header *header = &dgram->header;
 	struct timespec ts;
 	int error = 0;
 
 	/* Fill mbuf header. */
 	m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) *
 	   header->count + sizeof(struct netflow_v5_header);
 
 	/* Fill export header. */
 	header->sys_uptime = htonl(MILLIUPTIME(time_uptime));
 	getnanotime(&ts);
 	header->unix_secs  = htonl(ts.tv_sec);
 	header->unix_nsecs = htonl(ts.tv_nsec);
 	header->engine_type = 0;
 	header->engine_id = 0;
 	header->pad = 0;
 	header->flow_seq = htonl(atomic_fetchadd_32(&priv->flow_seq,
 	    header->count));
 	header->count = htons(header->count);
 
 	if (priv->export != NULL)
 		NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags);
 	else
 		NG_FREE_ITEM(item);
 
 	return (error);
 }
 
 
 /* Add export record to dgram. */
 static int
 export_add(item_p item, struct flow_entry *fle)
 {
 	struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item),
 					struct netflow_v5_export_dgram *);
 	struct netflow_v5_header *header = &dgram->header;
 	struct netflow_v5_record *rec;
 
 	rec = &dgram->r[header->count];
 	header->count ++;
 
 	KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS,
 	    ("ng_netflow: export too big"));
 
 	/* Fill in export record. */
 	rec->src_addr = fle->f.r.r_src.s_addr;
 	rec->dst_addr = fle->f.r.r_dst.s_addr;
 	rec->next_hop = fle->f.next_hop.s_addr;
 	rec->i_ifx    = htons(fle->f.fle_i_ifx);
 	rec->o_ifx    = htons(fle->f.fle_o_ifx);
 	rec->packets  = htonl(fle->f.packets);
 	rec->octets   = htonl(fle->f.bytes);
 	rec->first    = htonl(MILLIUPTIME(fle->f.first));
 	rec->last     = htonl(MILLIUPTIME(fle->f.last));
 	rec->s_port   = fle->f.r.r_sport;
 	rec->d_port   = fle->f.r.r_dport;
 	rec->flags    = fle->f.tcp_flags;
 	rec->prot     = fle->f.r.r_ip_p;
 	rec->tos      = fle->f.r.r_tos;
 	rec->dst_mask = fle->f.dst_mask;
 	rec->src_mask = fle->f.src_mask;
 
 	/* Not supported fields. */
 	rec->src_as = rec->dst_as = 0;
 
 	if (header->count == NETFLOW_V5_MAX_RECORDS)
 		return (1); /* end of datagram */
 	else
 		return (0);	
 }
 
 /* Periodic flow expiry run. */
 void
 ng_netflow_expire(void *arg)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	priv_p			priv = (priv_p )arg;
 	item_p			item = NULL;
 	uint32_t		used;
 	int			i;
 
 	/*
 	 * Going through all the cache.
 	 */
 	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) {
 		/*
 		 * Skip entries, that are already being worked on.
 		 */
 		if (mtx_trylock(&hsh->mtx) == 0)
 			continue;
 
 		used = atomic_load_acq_32(&priv->info.nfinfo_used);
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			/*
 			 * Interrupt thread wants this entry!
 			 * Quick! Quick! Bail out!
 			 */
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
 				break;
 
 			/*
 			 * Don't expire aggressively while hash collision
 			 * ratio is predicted small.
 			 */
 			if (used <= (NBUCKETS*2) && !INACTIVE(fle))
 				break;
 
 			if ((INACTIVE(fle) && (SMALL(fle) ||
 			    (used > (NBUCKETS*2)))) || AGED(fle)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				expire_flow(priv, &item, fle, NG_NOFLAGS);
 				used--;
 				atomic_add_32(&priv->info.nfinfo_inact_exp, 1);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 
 	if (item != NULL)
 		return_export_dgram(priv, item, NG_NOFLAGS);
 
 	/* Schedule next expire. */
 	callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
 	    (void *)priv);
 }
Index: head/sys/netinet/if_atm.c
===================================================================
--- head/sys/netinet/if_atm.c	(revision 178887)
+++ head/sys/netinet/if_atm.c	(revision 178888)
@@ -1,370 +1,370 @@
 /*      $NetBSD: if_atm.c,v 1.6 1996/10/13 02:03:01 christos Exp $       */
 
 /*-
  *
  * Copyright (c) 1996 Charles D. Cranor and Washington University.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Charles D. Cranor and
  *      Washington University.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * IP <=> ATM address resolution.
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_natm.h"
 
 #if defined(INET) || defined(INET6)
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/queue.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/if_atm.h>
 
 #include <netinet/in.h>
 #include <netinet/if_atm.h>
 
 #ifdef NATM
 #include <netnatm/natm.h>
 #endif
 
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 #define	GET3BYTE(V, A, L)	do {				\
 	(V) = ((A)[0] << 16) | ((A)[1] << 8) | (A)[2];		\
 	(A) += 3;						\
 	(L) -= 3;						\
     } while (0)
 
 #define GET2BYTE(V, A, L)	do {				\
 	(V) = ((A)[0] << 8) | (A)[1];				\
 	(A) += 2;						\
 	(L) -= 2;						\
     } while (0)
 
 #define GET1BYTE(V, A, L)	do {				\
 	(V) = *(A)++;						\
 	(L)--;							\
     } while (0)
 
 
 /*
  * atm_rtrequest: handle ATM rt request (in support of generic code)
  *   inputs: "req" = request code
  *           "rt" = route entry
  *           "info" = rt_addrinfo
  */
 void
 atm_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct sockaddr *gate = rt->rt_gateway;
 	struct atmio_openvcc op;
 	struct atmio_closevcc cl;
 	u_char *addr;
 	u_int alen;
 #ifdef NATM
 	struct sockaddr_in *sin;
 	struct natmpcb *npcb = NULL;
 #endif
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 
 	if (rt->rt_flags & RTF_GATEWAY)   /* link level requests only */
 		return;
 
 	switch (req) {
 
 	case RTM_RESOLVE: /* resolve: only happens when cloning */
 		printf("atm_rtrequest: RTM_RESOLVE request detected?\n");
 		break;
 
 	case RTM_ADD:
 		/*
 		 * route added by a command (e.g. ifconfig, route, arp...).
 		 *
 		 * first check to see if this is not a host route, in which
 		 * case we are being called via "ifconfig" to set the address.
 		 */
 		if ((rt->rt_flags & RTF_HOST) == 0) {
 			rt_setgate(rt,rt_key(rt),(struct sockaddr *)&null_sdl);
 			gate = rt->rt_gateway;
 			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
 			break;
 		}
 
 		if ((rt->rt_flags & RTF_CLONING) != 0) {
 			printf("atm_rtrequest: cloning route detected?\n");
 			break;
 		}
 		if (gate->sa_family != AF_LINK ||
 		    gate->sa_len < sizeof(null_sdl)) {
 			log(LOG_DEBUG, "atm_rtrequest: bad gateway value");
 			break;
 		}
 
 		KASSERT(rt->rt_ifp->if_ioctl != NULL,
 		    ("atm_rtrequest: null ioctl"));
 
 		/*
 		 * Parse and verify the link level address as
 		 * an open request
 		 */
 #ifdef NATM
 		NATM_LOCK();
 #endif
 		bzero(&op, sizeof(op));
 		addr = LLADDR(SDL(gate));
 		alen = SDL(gate)->sdl_alen;
 		if (alen < 4) {
 			printf("%s: bad link-level address\n", __func__);
 			goto failed;
 		}
 
 		if (alen == 4) {
 			/* old type address */
 			GET1BYTE(op.param.flags, addr, alen);
 			GET1BYTE(op.param.vpi, addr, alen);
 			GET2BYTE(op.param.vci, addr, alen);
 			op.param.traffic = ATMIO_TRAFFIC_UBR;
 			op.param.aal = (op.param.flags & ATM_PH_AAL5) ?
 			    ATMIO_AAL_5 : ATMIO_AAL_0;
 		} else {
 			/* new address */
 			op.param.aal = ATMIO_AAL_5;
 
 			GET1BYTE(op.param.flags, addr, alen);
 			op.param.flags &= ATM_PH_LLCSNAP;
 
 			GET1BYTE(op.param.vpi, addr, alen);
 			GET2BYTE(op.param.vci, addr, alen);
 
 			GET1BYTE(op.param.traffic, addr, alen);
 
 			switch (op.param.traffic) {
 
 			  case ATMIO_TRAFFIC_UBR:
 				if (alen >= 3)
 					GET3BYTE(op.param.tparam.pcr,
 					    addr, alen);
 				break;
 
 			  case ATMIO_TRAFFIC_CBR:
 				if (alen < 3)
 					goto bad_param;
 				GET3BYTE(op.param.tparam.pcr, addr, alen);
 				break;
 
 			  case ATMIO_TRAFFIC_VBR:
 				if (alen < 3 * 3)
 					goto bad_param;
 				GET3BYTE(op.param.tparam.pcr, addr, alen);
 				GET3BYTE(op.param.tparam.scr, addr, alen);
 				GET3BYTE(op.param.tparam.mbs, addr, alen);
 				break;
 
 			  case ATMIO_TRAFFIC_ABR:
 				if (alen < 4 * 3 + 2 + 1 * 2 + 3)
 					goto bad_param;
 				GET3BYTE(op.param.tparam.pcr, addr, alen);
 				GET3BYTE(op.param.tparam.mcr, addr, alen);
 				GET3BYTE(op.param.tparam.icr, addr, alen);
 				GET3BYTE(op.param.tparam.tbe, addr, alen);
 				GET1BYTE(op.param.tparam.nrm, addr, alen);
 				GET1BYTE(op.param.tparam.trm, addr, alen);
 				GET2BYTE(op.param.tparam.adtf, addr, alen);
 				GET1BYTE(op.param.tparam.rif, addr, alen);
 				GET1BYTE(op.param.tparam.rdf, addr, alen);
 				GET1BYTE(op.param.tparam.cdf, addr, alen);
 				break;
 
 			  default:
 			  bad_param:
 				printf("%s: bad traffic params\n", __func__);
 				goto failed;
 			}
 		}
 		op.param.rmtu = op.param.tmtu = rt->rt_ifp->if_mtu;
 #ifdef NATM
 		/*
 		 * let native ATM know we are using this VCI/VPI
 		 * (i.e. reserve it)
 		 */
 		sin = (struct sockaddr_in *) rt_key(rt);
 		if (sin->sin_family != AF_INET)
 			goto failed;
 		npcb = npcb_add(NULL, rt->rt_ifp, op.param.vci,  op.param.vpi);
 		if (npcb == NULL)
 			goto failed;
 		npcb->npcb_flags |= NPCB_IP;
 		npcb->ipaddr.s_addr = sin->sin_addr.s_addr;
 		/* XXX: move npcb to llinfo when ATM ARP is ready */
 		rt->rt_llinfo = (caddr_t) npcb;
 		rt->rt_flags |= RTF_LLINFO;
 #endif
 		/*
 		 * let the lower level know this circuit is active
 		 */
 		op.rxhand = NULL;
 		op.param.flags |= ATMIO_FLAG_ASYNC;
 		if (rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMOPENVCC,
 		    (caddr_t)&op) != 0) {
 			printf("atm: couldn't add VC\n");
 			goto failed;
 		}
 
 		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
 
 #ifdef NATM
 		NATM_UNLOCK();
 #endif
 		break;
 
 failed:
 #ifdef NATM
 		if (npcb) {
 			npcb_free(npcb, NPCB_DESTROY);
 			rt->rt_llinfo = NULL;
 			rt->rt_flags &= ~RTF_LLINFO;
 		}
 		NATM_UNLOCK();
 #endif
 		/* mark as invalid. We cannot RTM_DELETE the route from
 		 * here, because the recursive call to rtrequest1 does
 		 * not really work. */
 		rt->rt_flags |= RTF_REJECT;
 		break;
 
 	case RTM_DELETE:
 #ifdef NATM
 		/*
 		 * tell native ATM we are done with this VC
 		 */
 		if (rt->rt_flags & RTF_LLINFO) {
 			NATM_LOCK();
 			npcb_free((struct natmpcb *)rt->rt_llinfo,
 			    NPCB_DESTROY);
 			rt->rt_llinfo = NULL;
 			rt->rt_flags &= ~RTF_LLINFO;
 			NATM_UNLOCK();
 		}
 #endif
 		/*
 		 * tell the lower layer to disable this circuit
 		 */
 		bzero(&op, sizeof(op));
 		addr = LLADDR(SDL(gate));
 		addr++;
 		cl.vpi = *addr++;
 		cl.vci = *addr++ << 8;
 		cl.vci |= *addr++;
 		(void)rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMCLOSEVCC,
 		    (caddr_t)&cl);
 		break;
 	}
 }
 
 /*
  * atmresolve:
  *   inputs:
  *     [1] "rt" = the link level route to use (or null if need to look one up)
  *     [2] "m" = mbuf containing the data to be sent
  *     [3] "dst" = sockaddr_in (IP) address of dest.
  *   output:
  *     [4] "desten" = ATM pseudo header which we will fill in VPI/VCI info
  *   return:
  *     0 == resolve FAILED; note that "m" gets m_freem'd in this case
  *     1 == resolve OK; desten contains result
  *
  *   XXX: will need more work if we wish to support ATMARP in the kernel,
  *   but this is enough for PVCs entered via the "route" command.
  */
 int
 atmresolve(struct rtentry *rt, struct mbuf *m, struct sockaddr *dst,
     struct atm_pseudohdr *desten)
 {
 	struct sockaddr_dl *sdl;
 
 	if (m->m_flags & (M_BCAST | M_MCAST)) {
 		log(LOG_INFO,
 		    "atmresolve: BCAST/MCAST packet detected/dumped\n");
 		goto bad;
 	}
 
 	if (rt == NULL) {
-		rt = RTALLOC1(dst, 0);
+		rt = RTALLOC1(dst, 0); /* link level on table 0 XXX MRT */
 		if (rt == NULL)
 			goto bad;	/* failed */
 		RT_REMREF(rt);		/* don't keep LL references */
 		if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
 		    (rt->rt_flags & RTF_LLINFO) == 0 ||
 		    /* XXX: are we using LLINFO? */
 		    rt->rt_gateway->sa_family != AF_LINK) {
 			RT_UNLOCK(rt);
 			goto bad;
 		}
 		RT_UNLOCK(rt);
 	}
 
 	/*
 	 * note that rt_gateway is a sockaddr_dl which contains the
 	 * atm_pseudohdr data structure for this route.   we currently
 	 * don't need any rt_llinfo info (but will if we want to support
 	 * ATM ARP [c.f. if_ether.c]).
 	 */
 	sdl = SDL(rt->rt_gateway);
 
 	/*
 	 * Check the address family and length is valid, the address
 	 * is resolved; otherwise, try to resolve.
 	 */
 	if (sdl->sdl_family == AF_LINK && sdl->sdl_alen >= sizeof(*desten)) {
 		bcopy(LLADDR(sdl), desten, sizeof(*desten));
 		return (1);	/* ok, go for it! */
 	}
 
 	/*
 	 * we got an entry, but it doesn't have valid link address
 	 * info in it (it is prob. the interface route, which has
 	 * sdl_alen == 0).    dump packet.  (fall through to "bad").
 	 */
 bad:
 	m_freem(m);
 	return (0);
 }
 #endif /* INET */
Index: head/sys/netinet/if_ether.c
===================================================================
--- head/sys/netinet/if_ether.c	(revision 178887)
+++ head/sys/netinet/if_ether.c	(revision 178888)
@@ -1,982 +1,1045 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
  */
 
 /*
  * Ethernet address resolution protocol.
  * TODO:
  *	add "inuse/lock" bit (or ref. count) along with valid bit
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_mac.h"
 #include "opt_carp.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/if_llc.h>
 #include <net/ethernet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 
 #include <net/if_arc.h>
 #include <net/iso88025.h>
 
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #define SIN(s) ((struct sockaddr_in *)s)
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 SYSCTL_DECL(_net_link_ether);
 SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
 
 /* timer values */
 static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, 
 	   &arpt_keep, 0, "ARP entry lifetime in seconds");
 
 #define	rt_expire rt_rmx.rmx_expire
 
 struct llinfo_arp {
 	struct	callout la_timer;
 	struct	rtentry *la_rt;
 	struct	mbuf *la_hold;	/* last packet until resolved/timeout */
 	u_short	la_preempt;	/* countdown for pre-expiry arps */
 	u_short	la_asked;	/* # requests sent */
 };
 
 static struct	ifqueue arpintrq;
 static int	arp_allocated;
 
 static int	arp_maxtries = 5;
 static int	useloopback = 1; /* use loopback interface for local traffic */
 static int	arp_proxyall = 0;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
 	   &arp_maxtries, 0, "ARP resolution attempts before returning error");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
 	   &useloopback, 0, "Use the loopback interface for local traffic");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
 	   &arp_proxyall, 0, "Enable proxy ARP for all suitable requests");
 
 static void	arp_init(void);
 static void	arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static void	arprequest(struct ifnet *,
 			struct in_addr *, struct in_addr *, u_char *);
 static void	arpintr(struct mbuf *);
 static void	arptimer(void *);
 static struct rtentry
-		*arplookup(u_long, int, int);
+		*arplookup(u_long, int, int, int);
 #ifdef INET
 static void	in_arpinput(struct mbuf *);
 #endif
 
 /*
  * Timeout routine.
  */
 static void
 arptimer(void *arg)
 {
 	struct rtentry *rt = (struct rtentry *)arg;
 
 	RT_LOCK_ASSERT(rt);
 	/*
 	 * The lock is needed to close a theoretical race
 	 * between spontaneous expiry and intentional removal.
 	 * We still got an extra reference on rtentry, so can
 	 * safely pass pointers to its contents.
 	 */
 	RT_UNLOCK(rt);
 
-	rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL);
+	in_rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL,
+	    rt->rt_fibnum);
 }
 
 /*
  * Parallel to llc_rtrequest.
  */
 static void
 arp_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct sockaddr *gate;
 	struct llinfo_arp *la;
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 	struct in_ifaddr *ia;
 	struct ifaddr *ifa;
 
 	RT_LOCK_ASSERT(rt);
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		return;
 	gate = rt->rt_gateway;
 	la = (struct llinfo_arp *)rt->rt_llinfo;
 	switch (req) {
 
 	case RTM_ADD:
 		/*
 		 * XXX: If this is a manually added route to interface
 		 * such as older version of routed or gated might provide,
 		 * restore cloning bit.
 		 */
 		if ((rt->rt_flags & RTF_HOST) == 0 &&
 		    rt_mask(rt) != NULL &&
 		    SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
 			rt->rt_flags |= RTF_CLONING;
 		if (rt->rt_flags & RTF_CLONING) {
 			/*
 			 * Case 1: This route should come from a route to iface.
 			 */
 			rt_setgate(rt, rt_key(rt),
 					(struct sockaddr *)&null_sdl);
 			gate = rt->rt_gateway;
 			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
 			rt->rt_expire = time_uptime;
 			break;
 		}
 		/* Announce a new entry if requested. */
 		if (rt->rt_flags & RTF_ANNOUNCE)
 			arprequest(rt->rt_ifp,
 			    &SIN(rt_key(rt))->sin_addr,
 			    &SIN(rt_key(rt))->sin_addr,
 			    (u_char *)LLADDR(SDL(gate)));
 		/*FALLTHROUGH*/
 	case RTM_RESOLVE:
 		if (gate->sa_family != AF_LINK ||
 		    gate->sa_len < sizeof(null_sdl)) {
 			log(LOG_DEBUG, "%s: bad gateway %s%s\n", __func__,
 			    inet_ntoa(SIN(rt_key(rt))->sin_addr),
 			    (gate->sa_family != AF_LINK) ?
 			    " (!AF_LINK)": "");
 			break;
 		}
 		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
 		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
 		if (la != 0)
 			break; /* This happens on a route change */
 		/*
 		 * Case 2:  This route may come from cloning, or a manual route
 		 * add with a LL address.
 		 */
 		R_Zalloc(la, struct llinfo_arp *, sizeof(*la));
 		rt->rt_llinfo = (caddr_t)la;
 		if (la == 0) {
 			log(LOG_DEBUG, "%s: malloc failed\n", __func__);
 			break;
 		}
 		arp_allocated++;
 		/*
 		 * We are storing a route entry outside of radix tree. So,
 		 * it can be found and accessed by other means than radix
 		 * lookup. The routing code assumes that any rtentry detached
 		 * from radix can be destroyed safely. To prevent this, we
 		 * add an additional reference.
 		 */
 		RT_ADDREF(rt);
 		la->la_rt = rt;
 		rt->rt_flags |= RTF_LLINFO;
 		callout_init_mtx(&la->la_timer, &rt->rt_mtx,
 		    CALLOUT_RETURNUNLOCKED);
 
 #ifdef INET
 		/*
 		 * This keeps the multicast addresses from showing up
 		 * in `arp -a' listings as unresolved.  It's not actually
 		 * functional.  Then the same for broadcast.
 		 */
 		if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr)) &&
 		    rt->rt_ifp->if_type != IFT_ARCNET) {
 			ETHER_MAP_IP_MULTICAST(&SIN(rt_key(rt))->sin_addr,
 					       LLADDR(SDL(gate)));
 			SDL(gate)->sdl_alen = 6;
 			rt->rt_expire = 0;
 		}
 		if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) {
 			memcpy(LLADDR(SDL(gate)), rt->rt_ifp->if_broadcastaddr,
 			       rt->rt_ifp->if_addrlen);
 			SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen;
 			rt->rt_expire = 0;
 		}
 #endif
 
 		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
 			if (ia->ia_ifp == rt->rt_ifp &&
 			    SIN(rt_key(rt))->sin_addr.s_addr ==
 			    (IA_SIN(ia))->sin_addr.s_addr)
 				break;
 		}
 		if (ia) {
 		    /*
 		     * This test used to be
 		     *	if (loif.if_flags & IFF_UP)
 		     * It allowed local traffic to be forced
 		     * through the hardware by configuring the loopback down.
 		     * However, it causes problems during network configuration
 		     * for boards that can't receive packets they send.
 		     * It is now necessary to clear "useloopback" and remove
 		     * the route to force traffic out to the hardware.
 		     */
 			rt->rt_expire = 0;
 			bcopy(IF_LLADDR(rt->rt_ifp), LLADDR(SDL(gate)),
 			      SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
 			if (useloopback) {
 				rt->rt_ifp = loif;
 				rt->rt_rmx.rmx_mtu = loif->if_mtu;
 			}
 
 		    /*
 		     * make sure to set rt->rt_ifa to the interface
 		     * address we are using, otherwise we will have trouble
 		     * with source address selection.
 		     */
 			ifa = &ia->ia_ifa;
 			if (ifa != rt->rt_ifa) {
 				IFAFREE(rt->rt_ifa);
 				IFAREF(ifa);
 				rt->rt_ifa = ifa;
 			}
 		}
 		break;
 
 	case RTM_DELETE:
 		if (la == NULL)	/* XXX: at least CARP does this. */
 			break;
 		callout_stop(&la->la_timer);
 		rt->rt_llinfo = NULL;
 		rt->rt_flags &= ~RTF_LLINFO;
 		RT_REMREF(rt);
 		if (la->la_hold)
 			m_freem(la->la_hold);
 		Free((caddr_t)la);
 	}
 }
 
 /*
  * Broadcast an ARP request. Caller specifies:
  *	- arp header source ip address
  *	- arp header target ip address
  *	- arp header source ethernet address
  */
 static void
 arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip,
     u_char *enaddr)
 {
 	struct mbuf *m;
 	struct arphdr *ah;
 	struct sockaddr sa;
 
 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
 		return;
 	m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
 		2*ifp->if_data.ifi_addrlen;
 	m->m_pkthdr.len = m->m_len;
 	MH_ALIGN(m, m->m_len);
 	ah = mtod(m, struct arphdr *);
 	bzero((caddr_t)ah, m->m_len);
 #ifdef MAC
 	mac_netinet_arp_send(ifp, m);
 #endif
 	ah->ar_pro = htons(ETHERTYPE_IP);
 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
 	ah->ar_op = htons(ARPOP_REQUEST);
 	bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln);
 	bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln);
 	bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln);
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 	m->m_flags |= M_BCAST;
 	(*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0);
 
 	return;
 }
 
 /*
  * Resolve an IP address into an ethernet address.
  * On input:
  *    ifp is the interface we use
  *    rt0 is the route to the final destination (possibly useless)
  *    m is the mbuf. May be NULL if we don't have a packet.
  *    dst is the next hop,
  *    desten is where we want the address.
  *
  * On success, desten is filled in and the function returns 0;
  * If the packet must be held pending resolution, we return EWOULDBLOCK
  * On other errors, we return the corresponding error code.
  * Note that m_freem() handles NULL.
  */
 int
 arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
     struct sockaddr *dst, u_char *desten)
 {
 	struct llinfo_arp *la = NULL;
 	struct rtentry *rt = NULL;
 	struct sockaddr_dl *sdl;
 	int error;
+	int fibnum = 0;
 
 	if (m) {
 		if (m->m_flags & M_BCAST) {
 			/* broadcast */
 			(void)memcpy(desten,
 			    ifp->if_broadcastaddr, ifp->if_addrlen);
 			return (0);
 		}
 		if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) {
 			/* multicast */
 			ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
 			return (0);
 		}
+		fibnum = M_GETFIB(m);
 	}
 
 	if (rt0 != NULL) {
-		error = rt_check(&rt, &rt0, dst);
+		/* Look for a cached arp (ll) entry. */
+		if (m == NULL)
+			fibnum = rt0->rt_fibnum;
+		error = in_rt_check(&rt, &rt0, dst, fibnum);
 		if (error) {
 			m_freem(m);
 			return error;
 		}
 		la = (struct llinfo_arp *)rt->rt_llinfo;
 		if (la == NULL)
 			RT_UNLOCK(rt);
 	}
 	if (la == NULL) {
 		/*
-		 * We enter this block in case if rt0 was NULL,
-		 * or if rt found by rt_check() didn't have llinfo.
+		 * We enter this block if rt0 was NULL,
+		 * or if rt found by in_rt_check() didn't have llinfo.
+		 * we should get a cloned route, which since it should
+		 * come from the local interface should have a ll entry.
+		 * if may be incoplete but that's ok.
+		 * XXXMRT if we haven't found a fibnum is that OK?
 		 */
-		rt = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0);
+		rt = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0, fibnum);
 		if (rt == NULL) {
 			log(LOG_DEBUG,
 			    "arpresolve: can't allocate route for %s\n",
 			    inet_ntoa(SIN(dst)->sin_addr));
 			m_freem(m);
 			return (EINVAL); /* XXX */
 		}
 		la = (struct llinfo_arp *)rt->rt_llinfo;
 		if (la == NULL) {
 			RT_UNLOCK(rt);
 			log(LOG_DEBUG,
 			    "arpresolve: can't allocate llinfo for %s\n",
 			    inet_ntoa(SIN(dst)->sin_addr));
 			m_freem(m);
 			return (EINVAL); /* XXX */
 		}
 	}
 	sdl = SDL(rt->rt_gateway);
 	/*
 	 * Check the address family and length is valid, the address
 	 * is resolved; otherwise, try to resolve.
 	 */
 	if ((rt->rt_expire == 0 || rt->rt_expire > time_uptime) &&
 	    sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
 
 		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
 
 		/*
 		 * If entry has an expiry time and it is approaching,
 		 * send an ARP request.
 		 */
 		if ((rt->rt_expire != 0) &&
 		    (time_uptime + la->la_preempt > rt->rt_expire)) {
 			struct in_addr sin = 
 			    SIN(rt->rt_ifa->ifa_addr)->sin_addr;
 
 			la->la_preempt--;
 			RT_UNLOCK(rt);
 			arprequest(ifp, &sin, &SIN(dst)->sin_addr,
 			    IF_LLADDR(ifp));
 			return (0);
 		} 
 
 		RT_UNLOCK(rt);
 		return (0);
 	}
 	/*
 	 * If ARP is disabled or static on this interface, stop.
 	 * XXX
 	 * Probably should not allocate empty llinfo struct if we are
 	 * not going to be sending out an arp request.
 	 */
 	if (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) {
 		RT_UNLOCK(rt);
 		m_freem(m);
 		return (EINVAL);
 	}
 	/*
 	 * There is an arptab entry, but no ethernet address
 	 * response yet.  Replace the held mbuf with this
 	 * latest one.
 	 */
 	if (m) {
 		if (la->la_hold)
 			m_freem(la->la_hold);
 		la->la_hold = m;
 	}
 	KASSERT(rt->rt_expire > 0, ("sending ARP request for static entry"));
 
 	/*
 	 * Return EWOULDBLOCK if we have tried less than arp_maxtries. It
 	 * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH
 	 * if we have already sent arp_maxtries ARP requests. Retransmit the
 	 * ARP request, but not faster than one request per second.
 	 */
 	if (la->la_asked < arp_maxtries)
 		error = EWOULDBLOCK;	/* First request. */
 	else
 		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
 
 	if (la->la_asked == 0 || rt->rt_expire != time_uptime) {
 		struct in_addr sin =
 		    SIN(rt->rt_ifa->ifa_addr)->sin_addr;
 
 		rt->rt_expire = time_uptime;
 		callout_reset(&la->la_timer, hz, arptimer, rt);
 		la->la_asked++;
 		RT_UNLOCK(rt);
 
 		arprequest(ifp, &sin, &SIN(dst)->sin_addr,
 		    IF_LLADDR(ifp));
 	} else
 		RT_UNLOCK(rt);
 
 	return (error);
 }
 
 /*
  * Common length and type checks are done here,
  * then the protocol-specific routine is called.
  */
 static void
 arpintr(struct mbuf *m)
 {
 	struct arphdr *ar;
 
 	if (m->m_len < sizeof(struct arphdr) &&
 	    ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
 		log(LOG_ERR, "arp: runt packet -- m_pullup failed\n");
 		return;
 	}
 	ar = mtod(m, struct arphdr *);
 
 	if (ntohs(ar->ar_hrd) != ARPHRD_ETHER &&
 	    ntohs(ar->ar_hrd) != ARPHRD_IEEE802 &&
 	    ntohs(ar->ar_hrd) != ARPHRD_ARCNET &&
 	    ntohs(ar->ar_hrd) != ARPHRD_IEEE1394) {
 		log(LOG_ERR, "arp: unknown hardware address format (0x%2D)\n",
 		    (unsigned char *)&ar->ar_hrd, "");
 		m_freem(m);
 		return;
 	}
 
 	if (m->m_len < arphdr_len(ar)) {
 		if ((m = m_pullup(m, arphdr_len(ar))) == NULL) {
 			log(LOG_ERR, "arp: runt packet\n");
 			m_freem(m);
 			return;
 		}
 		ar = mtod(m, struct arphdr *);
 	}
 
 	switch (ntohs(ar->ar_pro)) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		in_arpinput(m);
 		return;
 #endif
 	}
 	m_freem(m);
 }
 
 #ifdef INET
 /*
  * ARP for Internet protocols on 10 Mb/s Ethernet.
  * Algorithm is that given in RFC 826.
  * In addition, a sanity check is performed on the sender
  * protocol address, to catch impersonators.
  * We no longer handle negotiations for use of trailer protocol:
  * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
  * along with IP replies if we wanted trailers sent to us,
  * and also sent them in response to IP replies.
  * This allowed either end to announce the desire to receive
  * trailer packets.
  * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
  * but formerly didn't normally send requests.
  */
 static int log_arp_wrong_iface = 1;
 static int log_arp_movements = 1;
 static int log_arp_permanent_modify = 1;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
 	&log_arp_wrong_iface, 0,
 	"log arp packets arriving on the wrong interface");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
         &log_arp_movements, 0,
         "log arp replies from MACs different than the one in the cache");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
         &log_arp_permanent_modify, 0,
         "log arp replies from MACs different than the one in the permanent arp entry");
 
 
 static void
 in_arpinput(struct mbuf *m)
 {
 	struct arphdr *ah;
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct llinfo_arp *la;
 	struct rtentry *rt;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	struct sockaddr_dl *sdl;
 	struct sockaddr sa;
 	struct in_addr isaddr, itaddr, myaddr;
 	struct mbuf *hold;
 	u_int8_t *enaddr = NULL;
 	int op, rif_len;
 	int req_len;
 	int bridged = 0;
+	u_int fibnum;
+	u_int goodfib = 0;
+	int firstpass = 1;
 #ifdef DEV_CARP
 	int carp_match = 0;
 #endif
 	struct sockaddr_in sin;
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = 0;
 	
 	if (ifp->if_bridge)
 		bridged = 1;
 
 	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
 	if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
 		log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n");
 		return;
 	}
 
 	ah = mtod(m, struct arphdr *);
 	op = ntohs(ah->ar_op);
 	(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
 	(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
 
 	/*
 	 * For a bridge, we want to check the address irrespective
 	 * of the receive interface. (This will change slightly
 	 * when we have clusters of interfaces).
 	 * If the interface does not match, but the recieving interface
 	 * is part of carp, we call carp_iamatch to see if this is a
 	 * request for the virtual host ip.
 	 * XXX: This is really ugly!
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
 		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
 		    (ia->ia_ifp == ifp)) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			goto match;
 #ifdef DEV_CARP
 		if (ifp->if_carp != NULL &&
 		    carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
 			carp_match = 1;
 			goto match;
 		}
 #endif
 	}
 	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
 		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
 		    (ia->ia_ifp == ifp)) &&
 		    isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			goto match;
 	/*
 	 * No match, use the first inet address on the receive interface
 	 * as a dummy address for the rest of the function.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ia = ifatoia(ifa);
 			goto match;
 		}
 	/*
 	 * If bridging, fall back to using any inet address.
 	 */
 	if (!bridged || (ia = TAILQ_FIRST(&in_ifaddrhead)) == NULL)
 		goto drop;
 match:
 	if (!enaddr)
 		enaddr = (u_int8_t *)IF_LLADDR(ifp);
 	myaddr = ia->ia_addr.sin_addr;
 	if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
 		goto drop;	/* it's from me, ignore it. */
 	if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
 		log(LOG_ERR,
 		    "arp: link address is broadcast for IP address %s!\n",
 		    inet_ntoa(isaddr));
 		goto drop;
 	}
 	/*
 	 * Warn if another host is using the same IP address, but only if the
 	 * IP address isn't 0.0.0.0, which is used for DHCP only, in which
 	 * case we suppress the warning to avoid false positive complaints of
 	 * potential misconfiguration.
 	 */
 	if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
 		log(LOG_ERR,
 		   "arp: %*D is using my IP address %s on %s!\n",
 		   ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 		   inet_ntoa(isaddr), ifp->if_xname);
 		itaddr = myaddr;
 		goto reply;
 	}
 	if (ifp->if_flags & IFF_STATICARP)
 		goto reply;
-	rt = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0);
-	if (rt != NULL) {
-		sin.sin_addr.s_addr = isaddr.s_addr;
-		EVENTHANDLER_INVOKE(route_arp_update_event, rt,
-		    ar_sha(ah), (struct sockaddr *)&sin);
+	/*
+	 * We look for any FIBs that has this address to find
+	 * the interface etc.
+	 * For sanity checks that are FIB independent we abort the loop.
+	 */
+	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+		rt = arplookup(isaddr.s_addr,
+		    itaddr.s_addr == myaddr.s_addr, 0, fibnum);
+		if (rt == NULL)
+			continue;
 		
+		sdl = SDL(rt->rt_gateway);
+		/* Only call this once */
+		if (firstpass) {
+			sin.sin_addr.s_addr = isaddr.s_addr;
+			EVENTHANDLER_INVOKE(route_arp_update_event, rt,
+			    ar_sha(ah), (struct sockaddr *)&sin);
+		}
+		
 		la = (struct llinfo_arp *)rt->rt_llinfo;
 		if (la == NULL) {
 			RT_UNLOCK(rt);
-			goto reply;
+			continue;
 		}
-	} else
-		goto reply;
 
-	/* The following is not an error when doing bridging. */
-	if (!bridged && rt->rt_ifp != ifp
+		if (firstpass) {
+			/* The following is not an error when doing bridging. */
+			if (!bridged && rt->rt_ifp != ifp
 #ifdef DEV_CARP
-	    && (ifp->if_type != IFT_CARP || !carp_match)
+			    && (ifp->if_type != IFT_CARP || !carp_match)
 #endif
-							) {
-		if (log_arp_wrong_iface)
-			log(LOG_ERR, "arp: %s is on %s but got reply from %*D on %s\n",
-			    inet_ntoa(isaddr),
-			    rt->rt_ifp->if_xname,
-			    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
-			    ifp->if_xname);
-		RT_UNLOCK(rt);
-		goto reply;
-	}
-	sdl = SDL(rt->rt_gateway);
-	if (sdl->sdl_alen &&
-	    bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) {
-		if (rt->rt_expire) {
-		    if (log_arp_movements)
-		        log(LOG_INFO, "arp: %s moved from %*D to %*D on %s\n",
-			    inet_ntoa(isaddr),
-			    ifp->if_addrlen, (u_char *)LLADDR(sdl), ":",
-			    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
-			    ifp->if_xname);
-		} else {
-			RT_UNLOCK(rt);
-			if (log_arp_permanent_modify)
-				log(LOG_ERR, "arp: %*D attempts to modify "
-				    "permanent entry for %s on %s\n",
-				    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
-				    inet_ntoa(isaddr), ifp->if_xname);
-			goto reply;
-		}
-	}
-	/*
-	 * sanity check for the address length.
-	 * XXX this does not work for protocols with variable address
-	 * length. -is
-	 */
-	if (sdl->sdl_alen &&
-	    sdl->sdl_alen != ah->ar_hln) {
-		log(LOG_WARNING,
-		    "arp from %*D: new addr len %d, was %d",
-		    ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
-		    ah->ar_hln, sdl->sdl_alen);
-	}
-	if (ifp->if_addrlen != ah->ar_hln) {
-		log(LOG_WARNING,
-		    "arp from %*D: addr len: new %d, i/f %d (ignored)",
-		    ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
-		    ah->ar_hln, ifp->if_addrlen);
-		RT_UNLOCK(rt);
-		goto reply;
-	}
-	(void)memcpy(LLADDR(sdl), ar_sha(ah),
-	    sdl->sdl_alen = ah->ar_hln);
-	/*
-	 * If we receive an arp from a token-ring station over
-	 * a token-ring nic then try to save the source
-	 * routing info.
-	 */
-	if (ifp->if_type == IFT_ISO88025) {
-		struct iso88025_header *th = NULL;
-		struct iso88025_sockaddr_dl_data *trld;
-
-		th = (struct iso88025_header *)m->m_pkthdr.header;
-		trld = SDL_ISO88025(sdl);
-		rif_len = TR_RCF_RIFLEN(th->rcf);
-		if ((th->iso88025_shost[0] & TR_RII) &&
-		    (rif_len > 2)) {
-			trld->trld_rcf = th->rcf;
-			trld->trld_rcf ^= htons(TR_RCF_DIR);
-			memcpy(trld->trld_route, th->rd, rif_len - 2);
-			trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK);
+			    ) {
+				if (log_arp_wrong_iface)
+					log(LOG_ERR, "arp: %s is on %s "
+						"but got reply from %*D "
+						"on %s\n",
+					    inet_ntoa(isaddr),
+					    rt->rt_ifp->if_xname,
+					    ifp->if_addrlen,
+					    (u_char *)ar_sha(ah), ":",
+					    ifp->if_xname);
+				RT_UNLOCK(rt);
+				break;
+			}
+			if (sdl->sdl_alen &&
+			    bcmp(ar_sha(ah), LLADDR(sdl), sdl->sdl_alen)) {
+				if (rt->rt_expire) {
+				    if (log_arp_movements)
+					log(LOG_INFO,
+					    "arp: %s moved from %*D to %*D "
+					    "on %s\n",
+					    inet_ntoa(isaddr),
+					    ifp->if_addrlen,
+					    (u_char *)LLADDR(sdl), ":",
+					    ifp->if_addrlen,
+					    (u_char *)ar_sha(ah), ":",
+					    ifp->if_xname);
+				} else {
+					RT_UNLOCK(rt);
+					if (log_arp_permanent_modify)
+						log(LOG_ERR,
+						    "arp: %*D attempts to "
+						    "modify permanent entry "
+						    "for %s on %s\n",
+						    ifp->if_addrlen,
+						    (u_char *)ar_sha(ah), ":",
+						    inet_ntoa(isaddr),
+						    ifp->if_xname);
+					break;
+				}
+			}
 			/*
-			 * Set up source routing information for
-			 * reply packet (XXX)
+			 * sanity check for the address length.
+			 * XXX this does not work for protocols
+			 * with variable address length. -is
 			 */
-			m->m_data -= rif_len;
-			m->m_len  += rif_len;
-			m->m_pkthdr.len += rif_len;
-		} else {
-			th->iso88025_shost[0] &= ~TR_RII;
-			trld->trld_rcf = 0;
+			if (sdl->sdl_alen &&
+			    sdl->sdl_alen != ah->ar_hln) {
+				log(LOG_WARNING,
+				    "arp from %*D: new addr len %d, was %d",
+				    ifp->if_addrlen, (u_char *) ar_sha(ah),
+				    ":", ah->ar_hln, sdl->sdl_alen);
+			}
+			if (ifp->if_addrlen != ah->ar_hln) {
+				log(LOG_WARNING,
+				    "arp from %*D: addr len: "
+				    "new %d, i/f %d (ignored)",
+				    ifp->if_addrlen, (u_char *) ar_sha(ah),
+				    ":", ah->ar_hln, ifp->if_addrlen);
+				RT_UNLOCK(rt);
+				break;
+			}
+			firstpass = 0;
+			goodfib = fibnum;
 		}
-		m->m_data -= 8;
-		m->m_len  += 8;
-		m->m_pkthdr.len += 8;
-		th->rcf = trld->trld_rcf;
-	}
-	if (rt->rt_expire) {
-		rt->rt_expire = time_uptime + arpt_keep;
-		callout_reset(&la->la_timer, hz * arpt_keep, arptimer, rt);
-	}
-	la->la_asked = 0;
-	la->la_preempt = arp_maxtries;
-	hold = la->la_hold;
-	la->la_hold = NULL;
-	RT_UNLOCK(rt);
-	if (hold != NULL)
-		(*ifp->if_output)(ifp, hold, rt_key(rt), rt);
 
+		/* Copy in the information received. */
+		(void)memcpy(LLADDR(sdl), ar_sha(ah),
+		    sdl->sdl_alen = ah->ar_hln);
+		/*
+		 * If we receive an arp from a token-ring station over
+		 * a token-ring nic then try to save the source routing info.
+		 * XXXMRT Only minimal Token Ring support for MRT.
+		 * Only do this on the first pass as if modifies the mbuf.
+		 */
+		if (ifp->if_type == IFT_ISO88025) {
+			struct iso88025_header *th = NULL;
+			struct iso88025_sockaddr_dl_data *trld;
+
+			/* force the fib loop to end after this pass */
+			fibnum = rt_numfibs - 1;
+
+			th = (struct iso88025_header *)m->m_pkthdr.header;
+			trld = SDL_ISO88025(sdl);
+			rif_len = TR_RCF_RIFLEN(th->rcf);
+			if ((th->iso88025_shost[0] & TR_RII) &&
+			    (rif_len > 2)) {
+				trld->trld_rcf = th->rcf;
+				trld->trld_rcf ^= htons(TR_RCF_DIR);
+				memcpy(trld->trld_route, th->rd, rif_len - 2);
+				trld->trld_rcf &= ~htons(TR_RCF_BCST_MASK);
+				/*
+				 * Set up source routing information for
+				 * reply packet (XXX)
+				 */
+				m->m_data -= rif_len;
+				m->m_len  += rif_len;
+				m->m_pkthdr.len += rif_len;
+			} else {
+				th->iso88025_shost[0] &= ~TR_RII;
+				trld->trld_rcf = 0;
+			}
+			m->m_data -= 8;
+			m->m_len  += 8;
+			m->m_pkthdr.len += 8;
+			th->rcf = trld->trld_rcf;
+		}
+
+		if (rt->rt_expire) {
+			rt->rt_expire = time_uptime + arpt_keep;
+			callout_reset(&la->la_timer, hz * arpt_keep,
+			    arptimer, rt);
+		}
+		la->la_asked = 0;
+		la->la_preempt = arp_maxtries;
+		hold = la->la_hold;
+		la->la_hold = NULL;
+		RT_UNLOCK(rt);
+		if (hold != NULL)
+			(*ifp->if_output)(ifp, hold, rt_key(rt), rt);
+	} /* end of FIB loop */
 reply:
+
+	/*
+	 * Decide if we have to respond to something.
+	 */
 	if (op != ARPOP_REQUEST)
 		goto drop;
 	if (itaddr.s_addr == myaddr.s_addr) {
-		/* I am the target */
+		/* Shortcut.. the receiving interface is the target. */
 		(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 		(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 	} else {
-		rt = arplookup(itaddr.s_addr, 0, SIN_PROXY);
+		/* It's not asking for our address. But it still may
+		 * be something we should answer.
+		 *
+		 * XXX MRT
+		 * We assume that link level info is independent of
+		 * the table used and so we use whichever we can and don't
+		 * have a better option.
+		 */
+		/* Have we been asked to proxy for the target. */
+		rt = arplookup(itaddr.s_addr, 0, SIN_PROXY, goodfib);
 		if (rt == NULL) {
+			/* Nope, only intersted now if proxying everything. */
 			struct sockaddr_in sin;
 
 			if (!arp_proxyall)
 				goto drop;
 
 			bzero(&sin, sizeof sin);
 			sin.sin_family = AF_INET;
 			sin.sin_len = sizeof sin;
 			sin.sin_addr = itaddr;
 
-			rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+			/* XXX MRT use table 0 for arp reply  */
+			rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
 			if (!rt)
 				goto drop;
 			/*
 			 * Don't send proxies for nodes on the same interface
 			 * as this one came out of, or we'll get into a fight
 			 * over who claims what Ether address.
 			 */
 			if (rt->rt_ifp == ifp) {
 				rtfree(rt);
 				goto drop;
 			}
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 			rtfree(rt);
 
 			/*
 			 * Also check that the node which sent the ARP packet
 			 * is on the the interface we expect it to be on. This
 			 * avoids ARP chaos if an interface is connected to the
 			 * wrong network.
 			 */
 			sin.sin_addr = isaddr;
 
-			rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+			/* XXX MRT use table 0 for arp checks */
+			rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
 			if (!rt)
 				goto drop;
 			if (rt->rt_ifp != ifp) {
 				log(LOG_INFO, "arp_proxy: ignoring request"
 				    " from %s via %s, expecting %s\n",
 				    inet_ntoa(isaddr), ifp->if_xname,
 				    rt->rt_ifp->if_xname);
 				rtfree(rt);
 				goto drop;
 			}
 			rtfree(rt);
 
 #ifdef DEBUG_PROXY
 			printf("arp: proxying for %s\n",
 			       inet_ntoa(itaddr));
 #endif
 		} else {
 			/*
 			 * Return proxied ARP replies only on the interface
 			 * or bridge cluster where this network resides.
 			 * Otherwise we may conflict with the host we are
 			 * proxying for.
 			 */
 			if (rt->rt_ifp != ifp &&
 			    (rt->rt_ifp->if_bridge != ifp->if_bridge ||
 			    ifp->if_bridge == NULL)) {
 				RT_UNLOCK(rt);
 				goto drop;
 			}
 			sdl = SDL(rt->rt_gateway);
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			(void)memcpy(ar_sha(ah), LLADDR(sdl), ah->ar_hln);
 			RT_UNLOCK(rt);
 		}
 	}
 
 	if (itaddr.s_addr == myaddr.s_addr &&
 	    IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
 		/* RFC 3927 link-local IPv4; always reply by broadcast. */
 #ifdef DEBUG_LINKLOCAL
 		printf("arp: sending reply for link-local addr %s\n",
 		    inet_ntoa(itaddr));
 #endif
 		m->m_flags |= M_BCAST;
 		m->m_flags &= ~M_MCAST;
 	} else {
 		/* default behaviour; never reply by broadcast. */
 		m->m_flags &= ~(M_BCAST|M_MCAST);
 	}
 	(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
 	(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
 	ah->ar_op = htons(ARPOP_REPLY);
 	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
 	m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);   
 	m->m_pkthdr.len = m->m_len;   
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 	(*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0);
 	return;
 
 drop:
 	m_freem(m);
 }
 #endif
 
 /*
  * Lookup or enter a new address in arptab.
  */
 static struct rtentry *
-arplookup(u_long addr, int create, int proxy)
+arplookup(u_long addr, int create, int proxy, int fibnum)
 {
 	struct rtentry *rt;
 	struct sockaddr_inarp sin;
 	const char *why = 0;
 
 	bzero(&sin, sizeof(sin));
 	sin.sin_len = sizeof(sin);
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = addr;
 	if (proxy)
 		sin.sin_other = SIN_PROXY;
-	rt = rtalloc1((struct sockaddr *)&sin, create, 0UL);
+	rt = in_rtalloc1((struct sockaddr *)&sin, create, 0UL, fibnum);
 	if (rt == 0)
 		return (0);
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		why = "host is not on local network";
 	else if ((rt->rt_flags & RTF_LLINFO) == 0)
 		why = "could not allocate llinfo";
 	else if (rt->rt_gateway->sa_family != AF_LINK)
 		why = "gateway route is not ours";
 
 	if (why) {
 #define	ISDYNCLONE(_rt) \
 	(((_rt)->rt_flags & (RTF_STATIC | RTF_WASCLONED)) == RTF_WASCLONED)
 		if (create)
 			log(LOG_DEBUG, "arplookup %s failed: %s\n",
 			    inet_ntoa(sin.sin_addr), why);
 		/*
 		 * If there are no references to this Layer 2 route,
 		 * and it is a cloned route, and not static, and
 		 * arplookup() is creating the route, then purge
 		 * it from the routing table as it is probably bogus.
 		 */
 		if (rt->rt_refcnt == 1 && ISDYNCLONE(rt))
 			rtexpunge(rt);
 		RTFREE_LOCKED(rt);
 		return (0);
 #undef ISDYNCLONE
 	} else {
 		RT_REMREF(rt);
 		return (rt);
 	}
 }
 
 void
 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 {
 	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
 		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
 				&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
 	ifa->ifa_rtrequest = arp_rtrequest;
 	ifa->ifa_flags |= RTF_CLONING;
 }
 
 void
 arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr)
 {
 	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
 		arprequest(ifp, &IA_SIN(ifa)->sin_addr,
 				&IA_SIN(ifa)->sin_addr, enaddr);
 	ifa->ifa_rtrequest = arp_rtrequest;
 	ifa->ifa_flags |= RTF_CLONING;
 }
 
 static void
 arp_init(void)
 {
 
 	arpintrq.ifq_maxlen = 50;
 	mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF);
 	netisr_register(NETISR_ARP, arpintr, &arpintrq, NETISR_MPSAFE);
 }
 SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
Index: head/sys/netinet/in_gif.c
===================================================================
--- head/sys/netinet/in_gif.c	(revision 178887)
+++ head/sys/netinet/in_gif.c	(revision 178888)
@@ -1,428 +1,432 @@
 /*	$KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_mrouting.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/protosw.h>
 
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_gif.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_ecn.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #ifdef MROUTING
 #include <netinet/ip_mroute.h>
 #endif /* MROUTING */
 
 #include <net/if_gif.h>	
 
 static int gif_validate4(const struct ip *, struct gif_softc *,
 	struct ifnet *);
 
 extern  struct domain inetdomain;
 struct protosw in_gif_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		0/* IPPROTO_IPV[46] */,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		in_gif_input,
 	.pr_output =		(pr_output_t*)rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 
 static int ip_gif_ttl = GIF_TTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW,
 	&ip_gif_ttl,	0, "");
 
 int
 in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
 {
 	struct gif_softc *sc = ifp->if_softc;
 	struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
 	struct sockaddr_in *sin_src = (struct sockaddr_in *)sc->gif_psrc;
 	struct sockaddr_in *sin_dst = (struct sockaddr_in *)sc->gif_pdst;
 	struct ip iphdr;	/* capsule IP header, host byte ordered */
 	struct etherip_header eiphdr;
 	int proto, error;
 	u_int8_t tos;
 
 	GIF_LOCK_ASSERT(sc);
 
 	if (sin_src == NULL || sin_dst == NULL ||
 	    sin_src->sin_family != AF_INET ||
 	    sin_dst->sin_family != AF_INET) {
 		m_freem(m);
 		return EAFNOSUPPORT;
 	}
 
 	switch (family) {
 #ifdef INET
 	case AF_INET:
 	    {
 		struct ip *ip;
 
 		proto = IPPROTO_IPV4;
 		if (m->m_len < sizeof(*ip)) {
 			m = m_pullup(m, sizeof(*ip));
 			if (!m)
 				return ENOBUFS;
 		}
 		ip = mtod(m, struct ip *);
 		tos = ip->ip_tos;
 		break;
 	    }
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 	    {
 		struct ip6_hdr *ip6;
 		proto = IPPROTO_IPV6;
 		if (m->m_len < sizeof(*ip6)) {
 			m = m_pullup(m, sizeof(*ip6));
 			if (!m)
 				return ENOBUFS;
 		}
 		ip6 = mtod(m, struct ip6_hdr *);
 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 		break;
 	    }
 #endif /* INET6 */
 	case AF_LINK:
  		proto = IPPROTO_ETHERIP;
  		eiphdr.eip_ver = ETHERIP_VERSION & ETHERIP_VER_VERS_MASK;
  		eiphdr.eip_pad = 0;
  		/* prepend Ethernet-in-IP header */
  		M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
  		if (m && m->m_len < sizeof(struct etherip_header))
  			m = m_pullup(m, sizeof(struct etherip_header));
  		if (m == NULL)
  			return ENOBUFS;
  		bcopy(&eiphdr, mtod(m, struct etherip_header *),
 		    sizeof(struct etherip_header));
 		break;
 
 	default:
 #ifdef DEBUG
 		printf("in_gif_output: warning: unknown family %d passed\n",
 			family);
 #endif
 		m_freem(m);
 		return EAFNOSUPPORT;
 	}
 
 	bzero(&iphdr, sizeof(iphdr));
 	iphdr.ip_src = sin_src->sin_addr;
 	/* bidirectional configured tunnel mode */
 	if (sin_dst->sin_addr.s_addr != INADDR_ANY)
 		iphdr.ip_dst = sin_dst->sin_addr;
 	else {
 		m_freem(m);
 		return ENETUNREACH;
 	}
 	iphdr.ip_p = proto;
 	/* version will be set in ip_output() */
 	iphdr.ip_ttl = ip_gif_ttl;
 	iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip);
 	ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
 		       &iphdr.ip_tos, &tos);
 
 	/* prepend new IP header */
 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
 	if (m && m->m_len < sizeof(struct ip))
 		m = m_pullup(m, sizeof(struct ip));
 	if (m == NULL) {
 		printf("ENOBUFS in in_gif_output %d\n", __LINE__);
 		return ENOBUFS;
 	}
 	bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
 
+	M_SETFIB(m, sc->gif_fibnum);
+
 	if (dst->sin_family != sin_dst->sin_family ||
 	    dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
 		/* cache route doesn't match */
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = sin_dst->sin_family;
 		dst->sin_len = sizeof(struct sockaddr_in);
 		dst->sin_addr = sin_dst->sin_addr;
 		if (sc->gif_ro.ro_rt) {
 			RTFREE(sc->gif_ro.ro_rt);
 			sc->gif_ro.ro_rt = NULL;
 		}
 #if 0
 		GIF2IFP(sc)->if_mtu = GIF_MTU;
 #endif
 	}
 
 	if (sc->gif_ro.ro_rt == NULL) {
-		rtalloc_ign(&sc->gif_ro, 0);
+		in_rtalloc_ign(&sc->gif_ro, 0, sc->gif_fibnum);
 		if (sc->gif_ro.ro_rt == NULL) {
 			m_freem(m);
 			return ENETUNREACH;
 		}
 
 		/* if it constitutes infinite encapsulation, punt. */
 		if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
 			m_freem(m);
 			return ENETUNREACH;	/* XXX */
 		}
 #if 0
 		ifp->if_mtu = sc->gif_ro.ro_rt->rt_ifp->if_mtu
 			- sizeof(struct ip);
 #endif
 	}
 
 	error = ip_output(m, NULL, &sc->gif_ro, 0, NULL, NULL);
 
 	if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
 	    sc->gif_ro.ro_rt != NULL) {
 		RTFREE(sc->gif_ro.ro_rt);
 		sc->gif_ro.ro_rt = NULL;
 	}
 
 	return (error);
 }
 
 void
 in_gif_input(struct mbuf *m, int off)
 {
 	struct ifnet *gifp = NULL;
 	struct gif_softc *sc;
 	struct ip *ip;
 	int af;
 	u_int8_t otos;
 	int proto;
 
 	ip = mtod(m, struct ip *);
 	proto = ip->ip_p;
 
 	sc = (struct gif_softc *)encap_getarg(m);
 	if (sc == NULL) {
 		m_freem(m);
 		ipstat.ips_nogif++;
 		return;
 	}
 
 	gifp = GIF2IFP(sc);
 	if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		ipstat.ips_nogif++;
 		return;
 	}
 
 	otos = ip->ip_tos;
 	m_adj(m, off);
 
 	switch (proto) {
 #ifdef INET
 	case IPPROTO_IPV4:
 	    {
 		struct ip *ip;
 		af = AF_INET;
 		if (m->m_len < sizeof(*ip)) {
 			m = m_pullup(m, sizeof(*ip));
 			if (!m)
 				return;
 		}
 		ip = mtod(m, struct ip *);
 		if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
 				  ECN_ALLOWED : ECN_NOCARE,
 				  &otos, &ip->ip_tos) == 0) {
 			m_freem(m);
 			return;
 		}
 		break;
 	    }
 #endif
 #ifdef INET6
 	case IPPROTO_IPV6:
 	    {
 		struct ip6_hdr *ip6;
 		u_int8_t itos, oitos;
 
 		af = AF_INET6;
 		if (m->m_len < sizeof(*ip6)) {
 			m = m_pullup(m, sizeof(*ip6));
 			if (!m)
 				return;
 		}
 		ip6 = mtod(m, struct ip6_hdr *);
 		itos = oitos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 		if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
 				  ECN_ALLOWED : ECN_NOCARE,
 				  &otos, &itos) == 0) {
 			m_freem(m);
 			return;
 		}
 		if (itos != oitos) {
 			ip6->ip6_flow &= ~htonl(0xff << 20);
 			ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
 		}
 		break;
 	    }
 #endif /* INET6 */
  	case IPPROTO_ETHERIP:
  		af = AF_LINK;
  		break;	
 
 	default:
 		ipstat.ips_nogif++;
 		m_freem(m);
 		return;
 	}
 	gif_input(m, af, gifp);
 	return;
 }
 
 /*
  * validate outer address.
  */
 static int
 gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
 {
 	struct sockaddr_in *src, *dst;
 	struct in_ifaddr *ia4;
 
 	src = (struct sockaddr_in *)sc->gif_psrc;
 	dst = (struct sockaddr_in *)sc->gif_pdst;
 
 	/* check for address match */
 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
 		return 0;
 
 	/* martian filters on outer source - NOT done in ip_input! */
 	if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)))
 		return 0;
 	switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
 	case 0: case 127: case 255:
 		return 0;
 	}
 	/* reject packets with broadcast on source */
 	TAILQ_FOREACH(ia4, &in_ifaddrhead, ia_link) {
 		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
 			continue;
 		if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr)
 			return 0;
 	}
 
 	/* ingress filters on outer source */
 	if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) {
 		struct sockaddr_in sin;
 		struct rtentry *rt;
 
 		bzero(&sin, sizeof(sin));
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(struct sockaddr_in);
 		sin.sin_addr = ip->ip_src;
-		rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+		/* XXX MRT  check for the interface we would use on output */
+		rt = in_rtalloc1((struct sockaddr *)&sin, 0,
+		    0UL, sc->gif_fibnum);
 		if (!rt || rt->rt_ifp != ifp) {
 #if 0
 			log(LOG_WARNING, "%s: packet from 0x%x dropped "
 			    "due to ingress filter\n", if_name(GIF2IFP(sc)),
 			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
 #endif
 			if (rt)
 				RTFREE_LOCKED(rt);
 			return 0;
 		}
 		RTFREE_LOCKED(rt);
 	}
 
 	return 32 * 2;
 }
 
 /*
  * we know that we are in IFF_UP, outer address available, and outer family
  * matched the physical addr family.  see gif_encapcheck().
  */
 int
 gif_encapcheck4(const struct mbuf *m, int off, int proto, void *arg)
 {
 	struct ip ip;
 	struct gif_softc *sc;
 	struct ifnet *ifp;
 
 	/* sanity check done in caller */
 	sc = (struct gif_softc *)arg;
 
 	/* LINTED const cast */
 	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
 	ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
 
 	return gif_validate4(&ip, sc, ifp);
 }
 
 int
 in_gif_attach(struct gif_softc *sc)
 {
 	sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
 	    &in_gif_protosw, sc);
 	if (sc->encap_cookie4 == NULL)
 		return EEXIST;
 	return 0;
 }
 
 int
 in_gif_detach(struct gif_softc *sc)
 {
 	int error;
 
 	error = encap_detach(sc->encap_cookie4);
 	if (error == 0)
 		sc->encap_cookie4 = NULL;
 	return error;
 }
Index: head/sys/netinet/in_mcast.c
===================================================================
--- head/sys/netinet/in_mcast.c	(revision 178887)
+++ head/sys/netinet/in_mcast.c	(revision 178888)
@@ -1,1822 +1,1823 @@
 /*-
  * Copyright (c) 2007 Bruce M. Simpson.
  * Copyright (c) 2005 Robert N. M. Watson.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * IPv4 multicast socket, group, and socket option processing module.
  * Until further notice, this file requires INET to compile.
  * TODO: Make this infrastructure independent of address family.
  * TODO: Teach netinet6 to use this code.
  * TODO: Hook up SSM logic to IGMPv3/MLDv2.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/igmp_var.h>
 
 #ifndef __SOCKUNION_DECLARED
 union sockunion {
 	struct sockaddr_storage	ss;
 	struct sockaddr		sa;
 	struct sockaddr_dl	sdl;
 	struct sockaddr_in	sin;
 #ifdef INET6
 	struct sockaddr_in6	sin6;
 #endif
 };
 typedef union sockunion sockunion_t;
 #define __SOCKUNION_DECLARED
 #endif /* __SOCKUNION_DECLARED */
 
 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
 static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
 
 /*
  * The IPv4 multicast list (in_multihead and associated structures) are
  * protected by the global in_multi_mtx.  See in_var.h for more details.  For
  * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
  * ip_output() to send IGMP packets while holding the lock; this probably is
  * not quite desirable.
  */
 struct in_multihead in_multihead;	/* XXX BSS initialization */
 struct mtx in_multi_mtx;
 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
 
 /*
  * Functions with non-static linkage defined in this file should be
  * declared in in_var.h:
  *  imo_match_group()
  *  imo_match_source()
  *  in_addmulti()
  *  in_delmulti()
  *  in_delmulti_locked()
  * and ip_var.h:
  *  inp_freemoptions()
  *  inp_getmoptions()
  *  inp_setmoptions()
  */
 static int	imo_grow(struct ip_moptions *);
 static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
 static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
 static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
 static struct ip_moptions *
 		inp_findmoptions(struct inpcb *);
 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
 static int	inp_join_group(struct inpcb *, struct sockopt *);
 static int	inp_leave_group(struct inpcb *, struct sockopt *);
 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
 
 /*
  * Resize the ip_moptions vector to the next power-of-two minus 1.
  * May be called with locks held; do not sleep.
  */
 static int
 imo_grow(struct ip_moptions *imo)
 {
 	struct in_multi		**nmships;
 	struct in_multi		**omships;
 	struct in_mfilter	 *nmfilters;
 	struct in_mfilter	 *omfilters;
 	size_t			  idx;
 	size_t			  newmax;
 	size_t			  oldmax;
 
 	nmships = NULL;
 	nmfilters = NULL;
 	omships = imo->imo_membership;
 	omfilters = imo->imo_mfilters;
 	oldmax = imo->imo_max_memberships;
 	newmax = ((oldmax + 1) * 2) - 1;
 
 	if (newmax <= IP_MAX_MEMBERSHIPS) {
 		nmships = (struct in_multi **)realloc(omships,
 		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
 		nmfilters = (struct in_mfilter *)realloc(omfilters,
 		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
 		if (nmships != NULL && nmfilters != NULL) {
 			/* Initialize newly allocated source filter heads. */
 			for (idx = oldmax; idx < newmax; idx++) {
 				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
 				nmfilters[idx].imf_nsources = 0;
 				TAILQ_INIT(&nmfilters[idx].imf_sources);
 			}
 			imo->imo_max_memberships = newmax;
 			imo->imo_membership = nmships;
 			imo->imo_mfilters = nmfilters;
 		}
 	}
 
 	if (nmships == NULL || nmfilters == NULL) {
 		if (nmships != NULL)
 			free(nmships, M_IPMOPTS);
 		if (nmfilters != NULL)
 			free(nmfilters, M_IPMSOURCE);
 		return (ETOOMANYREFS);
 	}
 
 	return (0);
 }
 
 /*
  * Add a source to a multicast filter list.
  * Assumes the associated inpcb is locked.
  */
 static int
 imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
 {
 	struct in_msource	*ims, *nims;
 	struct in_mfilter	*imf;
 
 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
 	KASSERT(imo->imo_mfilters != NULL,
 	    ("%s: imo_mfilters vector not allocated", __func__));
 
 	imf = &imo->imo_mfilters[gidx];
 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
 		return (ENOBUFS);
 
 	ims = imo_match_source(imo, gidx, &src->sa);
 	if (ims != NULL)
 		return (EADDRNOTAVAIL);
 
 	/* Do not sleep with inp lock held. */
 	MALLOC(nims, struct in_msource *, sizeof(struct in_msource),
 	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
 	if (nims == NULL)
 		return (ENOBUFS);
 
 	nims->ims_addr = src->ss;
 	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
 	imf->imf_nsources++;
 
 	return (0);
 }
 
 static int
 imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
 {
 	struct in_msource	*ims;
 	struct in_mfilter	*imf;
 
 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
 	KASSERT(imo->imo_mfilters != NULL,
 	    ("%s: imo_mfilters vector not allocated", __func__));
 
 	imf = &imo->imo_mfilters[gidx];
 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
 		return (ENOBUFS);
 
 	ims = imo_match_source(imo, gidx, &src->sa);
 	if (ims == NULL)
 		return (EADDRNOTAVAIL);
 
 	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
 	FREE(ims, M_IPMSOURCE);
 	imf->imf_nsources--;
 
 	return (0);
 }
 
 /*
  * Find an IPv4 multicast group entry for this ip_moptions instance
  * which matches the specified group, and optionally an interface.
  * Return its index into the array, or -1 if not found.
  */
 size_t
 imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
     struct sockaddr *group)
 {
 	sockunion_t	 *gsa;
 	struct in_multi	**pinm;
 	int		  idx;
 	int		  nmships;
 
 	gsa = (sockunion_t *)group;
 
 	/* The imo_membership array may be lazy allocated. */
 	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
 		return (-1);
 
 	nmships = imo->imo_num_memberships;
 	pinm = &imo->imo_membership[0];
 	for (idx = 0; idx < nmships; idx++, pinm++) {
 		if (*pinm == NULL)
 			continue;
 #if 0
 		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
 		    ifp, inet_ntoa(gsa->sin.sin_addr));
 		printf("against %p, %s\n",
 		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
 #endif
 		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
 		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
 			break;
 		}
 	}
 	if (idx >= nmships)
 		idx = -1;
 
 	return (idx);
 }
 
 /*
  * Find a multicast source entry for this imo which matches
  * the given group index for this socket, and source address.
  */
 struct in_msource *
 imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
 {
 	struct in_mfilter	*imf;
 	struct in_msource	*ims, *pims;
 
 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
 	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
 	    ("%s: invalid index %d\n", __func__, (int)gidx));
 
 	/* The imo_mfilters array may be lazy allocated. */
 	if (imo->imo_mfilters == NULL)
 		return (NULL);
 
 	pims = NULL;
 	imf = &imo->imo_mfilters[gidx];
 	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
 		/*
 		 * Perform bitwise comparison of two IPv4 addresses.
 		 * TODO: Do the same for IPv6.
 		 * Do not use sa_equal() for this as it is not aware of
 		 * deeper structure in sockaddr_in or sockaddr_in6.
 		 */
 		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
 		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
 			pims = ims;
 			break;
 		}
 	}
 
 	return (pims);
 }
 
 /*
  * Join an IPv4 multicast group.
  */
 struct in_multi *
 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
 {
 	struct in_multi *inm;
 
 	inm = NULL;
 
 	IFF_LOCKGIANT(ifp);
 	IN_MULTI_LOCK();
 
 	IN_LOOKUP_MULTI(*ap, ifp, inm);
 	if (inm != NULL) {
 		/*
 		 * If we already joined this group, just bump the
 		 * refcount and return it.
 		 */
 		KASSERT(inm->inm_refcount >= 1,
 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
 		++inm->inm_refcount;
 	} else do {
 		sockunion_t		 gsa;
 		struct ifmultiaddr	*ifma;
 		struct in_multi		*ninm;
 		int			 error;
 
 		memset(&gsa, 0, sizeof(gsa));
 		gsa.sin.sin_family = AF_INET;
 		gsa.sin.sin_len = sizeof(struct sockaddr_in);
 		gsa.sin.sin_addr = *ap;
 
 		/*
 		 * Check if a link-layer group is already associated
 		 * with this network-layer group on the given ifnet.
 		 * If so, bump the refcount on the existing network-layer
 		 * group association and return it.
 		 */
 		error = if_addmulti(ifp, &gsa.sa, &ifma);
 		if (error)
 			break;
 		if (ifma->ifma_protospec != NULL) {
 			inm = (struct in_multi *)ifma->ifma_protospec;
 #ifdef INVARIANTS
 			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
 			    inm->inm_addr.s_addr != ap->s_addr)
 				panic("%s: ifma is inconsistent", __func__);
 #endif
 			++inm->inm_refcount;
 			break;
 		}
 
 		/*
 		 * A new membership is needed; construct it and
 		 * perform the IGMP join.
 		 */
 		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
 		if (ninm == NULL) {
 			if_delmulti_ifma(ifma);
 			break;
 		}
 		ninm->inm_addr = *ap;
 		ninm->inm_ifp = ifp;
 		ninm->inm_ifma = ifma;
 		ninm->inm_refcount = 1;
 		ifma->ifma_protospec = ninm;
 		LIST_INSERT_HEAD(&in_multihead, ninm, inm_link);
 
 		igmp_joingroup(ninm);
 
 		inm = ninm;
 	} while (0);
 
 	IN_MULTI_UNLOCK();
 	IFF_UNLOCKGIANT(ifp);
 
 	return (inm);
 }
 
 /*
  * Leave an IPv4 multicast group.
  * It is OK to call this routine if the underlying ifnet went away.
  *
  * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
  * will set ifma_ifp to NULL when the associated ifnet instance is detached
  * from the system.
  *
  * The only reason we need to violate layers and check ifma_ifp here at all
  * is because certain hardware drivers still require Giant to be held,
  * and it must always be taken before other locks.
  */
 void
 in_delmulti(struct in_multi *inm)
 {
 	struct ifnet *ifp;
 
 	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
 	ifp = inm->inm_ifma->ifma_ifp;
 
 	if (ifp != NULL) {
 		/*
 		 * Sanity check that netinet's notion of ifp is the
 		 * same as net's.
 		 */
 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
 		IFF_LOCKGIANT(ifp);
 	}
 
 	IN_MULTI_LOCK();
 	in_delmulti_locked(inm);
 	IN_MULTI_UNLOCK();
 
 	if (ifp != NULL)
 		IFF_UNLOCKGIANT(ifp);
 }
 
 /*
  * Delete a multicast address record, with locks held.
  *
  * It is OK to call this routine if the ifp went away.
  * Assumes that caller holds the IN_MULTI lock, and that
  * Giant was taken before other locks if required by the hardware.
  */
 void
 in_delmulti_locked(struct in_multi *inm)
 {
 	struct ifmultiaddr *ifma;
 
 	IN_MULTI_LOCK_ASSERT();
 	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
 
 	if (--inm->inm_refcount == 0) {
 		igmp_leavegroup(inm);
 
 		ifma = inm->inm_ifma;
 #ifdef DIAGNOSTIC
 		if (bootverbose)
 			printf("%s: purging ifma %p\n", __func__, ifma);
 #endif
 		KASSERT(ifma->ifma_protospec == inm,
 		    ("%s: ifma_protospec != inm", __func__));
 		ifma->ifma_protospec = NULL;
 
 		LIST_REMOVE(inm, inm_link);
 		free(inm, M_IPMADDR);
 
 		if_delmulti_ifma(ifma);
 	}
 }
 
 /*
  * Block or unblock an ASM/SSM multicast source on an inpcb.
  */
 static int
 inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_msource		*ims;
 	size_t				 idx;
 	int				 error;
 	int				 block;
 
 	ifp = NULL;
 	error = 0;
 	block = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 
 	switch (sopt->sopt_name) {
 	case IP_BLOCK_SOURCE:
 	case IP_UNBLOCK_SOURCE: {
 		struct ip_mreq_source	 mreqs;
 
 		error = sooptcopyin(sopt, &mreqs,
 		    sizeof(struct ip_mreq_source),
 		    sizeof(struct ip_mreq_source));
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 
 		ssa->sin.sin_family = AF_INET;
 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 
 		if (mreqs.imr_interface.s_addr != INADDR_ANY)
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
 
 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
 			block = 1;
 
 #ifdef DIAGNOSTIC
 		if (bootverbose) {
 			printf("%s: imr_interface = %s, ifp = %p\n",
 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
 		}
 #endif
 		break;
 	    }
 
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 		error = sooptcopyin(sopt, &gsr,
 		    sizeof(struct group_source_req),
 		    sizeof(struct group_source_req));
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (ssa->sin.sin_family != AF_INET ||
 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 
 		ifp = ifnet_byindex(gsr.gsr_interface);
 
 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
 			block = 1;
 		break;
 
 	default:
 #ifdef DIAGNOSTIC
 		if (bootverbose) {
 			printf("%s: unknown sopt_name %d\n", __func__,
 			    sopt->sopt_name);
 		}
 #endif
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	/* XXX INET6 */
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	/*
 	 * Check if we are actually a member of this group.
 	 */
 	imo = inp_findmoptions(inp);
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1 || imo->imo_mfilters == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_locked;
 	}
 
 	KASSERT(imo->imo_mfilters != NULL,
 	    ("%s: imo_mfilters not allocated", __func__));
 	imf = &imo->imo_mfilters[idx];
 
 	/*
 	 * SSM multicast truth table for block/unblock operations.
 	 *
 	 * Operation   Filter Mode  Entry exists?   Action
 	 *
 	 * block       exclude      no              add source to filter
 	 * unblock     include      no              add source to filter
 	 * block       include      no              EINVAL
 	 * unblock     exclude      no              EINVAL
 	 * block       exclude      yes             EADDRNOTAVAIL
 	 * unblock     include      yes             EADDRNOTAVAIL
 	 * block       include      yes             remove source from filter
 	 * unblock     exclude      yes             remove source from filter
 	 *
 	 * FreeBSD does not explicitly distinguish between ASM and SSM
 	 * mode sockets; all sockets are assumed to have a filter list.
 	 */
 #ifdef DIAGNOSTIC
 	if (bootverbose) {
 		printf("%s: imf_fmode is %s\n", __func__,
 		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
 	}
 #endif
 	ims = imo_match_source(imo, idx, &ssa->sa);
 	if (ims == NULL) {
 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
 #ifdef DIAGNOSTIC
 			if (bootverbose) {
 				printf("%s: adding %s to filter list\n",
 				    __func__, inet_ntoa(ssa->sin.sin_addr));
 			}
 #endif
 			error = imo_join_source(imo, idx, ssa);
 		}
 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
 			/*
 			 * If the socket is in inclusive mode:
 			 *  the source is already blocked as it has no entry.
 			 * If the socket is in exclusive mode:
 			 *  the source is already unblocked as it has no entry.
 			 */
 #ifdef DIAGNOSTIC
 			if (bootverbose) {
 				printf("%s: ims %p; %s already [un]blocked\n",
 				    __func__, ims,
 				    inet_ntoa(ssa->sin.sin_addr));
 			}
 #endif
 			error = EINVAL;
 		}
 	} else {
 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
 			/*
 			 * If the socket is in exclusive mode:
 			 *  the source is already blocked as it has an entry.
 			 * If the socket is in inclusive mode:
 			 *  the source is already unblocked as it has an entry.
 			 */
 #ifdef DIAGNOSTIC
 			if (bootverbose) {
 				printf("%s: ims %p; %s already [un]blocked\n",
 				    __func__, ims,
 				    inet_ntoa(ssa->sin.sin_addr));
 			}
 #endif
 			error = EADDRNOTAVAIL;
 		}
 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
 #ifdef DIAGNOSTIC
 			if (bootverbose) {
 				printf("%s: removing %s from filter list\n",
 				    __func__, inet_ntoa(ssa->sin.sin_addr));
 			}
 #endif
 			error = imo_leave_source(imo, idx, ssa);
 		}
 	}
 
 out_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Given an inpcb, return its multicast options structure pointer.  Accepts
  * an unlocked inpcb pointer, but will return it locked.  May sleep.
  */
 static struct ip_moptions *
 inp_findmoptions(struct inpcb *inp)
 {
 	struct ip_moptions	 *imo;
 	struct in_multi		**immp;
 	struct in_mfilter	 *imfp;
 	size_t			  idx;
 
 	INP_WLOCK(inp);
 	if (inp->inp_moptions != NULL)
 		return (inp->inp_moptions);
 
 	INP_WUNLOCK(inp);
 
 	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
 	    M_WAITOK);
 	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
 	    M_IPMOPTS, M_WAITOK | M_ZERO);
 	imfp = (struct in_mfilter *)malloc(
 	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
 	    M_IPMSOURCE, M_WAITOK);
 
 	imo->imo_multicast_ifp = NULL;
 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
 	imo->imo_multicast_vif = -1;
 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
 	imo->imo_num_memberships = 0;
 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 	imo->imo_membership = immp;
 
 	/* Initialize per-group source filters. */
 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
 		imfp[idx].imf_fmode = MCAST_EXCLUDE;
 		imfp[idx].imf_nsources = 0;
 		TAILQ_INIT(&imfp[idx].imf_sources);
 	}
 	imo->imo_mfilters = imfp;
 
 	INP_WLOCK(inp);
 	if (inp->inp_moptions != NULL) {
 		free(imfp, M_IPMSOURCE);
 		free(immp, M_IPMOPTS);
 		free(imo, M_IPMOPTS);
 		return (inp->inp_moptions);
 	}
 	inp->inp_moptions = imo;
 	return (imo);
 }
 
 /*
  * Discard the IP multicast options (and source filters).
  */
 void
 inp_freemoptions(struct ip_moptions *imo)
 {
 	struct in_mfilter	*imf;
 	struct in_msource	*ims, *tims;
 	size_t			 idx, nmships;
 
 	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
 
 	nmships = imo->imo_num_memberships;
 	for (idx = 0; idx < nmships; ++idx) {
 		in_delmulti(imo->imo_membership[idx]);
 
 		if (imo->imo_mfilters != NULL) {
 			imf = &imo->imo_mfilters[idx];
 			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
 			    ims_next, tims) {
 				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
 				FREE(ims, M_IPMSOURCE);
 				imf->imf_nsources--;
 			}
 			KASSERT(imf->imf_nsources == 0,
 			    ("%s: did not free all imf_nsources", __func__));
 		}
 	}
 
 	if (imo->imo_mfilters != NULL)
 		free(imo->imo_mfilters, M_IPMSOURCE);
 	free(imo->imo_membership, M_IPMOPTS);
 	free(imo, M_IPMOPTS);
 }
 
 /*
  * Atomically get source filters on a socket for an IPv4 multicast group.
  * Called with INP lock held; returns with lock released.
  */
 static int
 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct __msfilterreq	 msfr;
 	sockunion_t		*gsa;
 	struct ifnet		*ifp;
 	struct ip_moptions	*imo;
 	struct in_mfilter	*imf;
 	struct in_msource	*ims;
 	struct sockaddr_storage	*ptss;
 	struct sockaddr_storage	*tss;
 	int			 error;
 	size_t			 idx;
 
 	INP_WLOCK_ASSERT(inp);
 
 	imo = inp->inp_moptions;
 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
 
 	INP_WUNLOCK(inp);
 
 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 	    sizeof(struct __msfilterreq));
 	if (error)
 		return (error);
 
 	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
 		return (EINVAL);
 
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EINVAL);
 
 	INP_WLOCK(inp);
 
 	/*
 	 * Lookup group on the socket.
 	 */
 	gsa = (sockunion_t *)&msfr.msfr_group;
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1 || imo->imo_mfilters == NULL) {
 		INP_WUNLOCK(inp);
 		return (EADDRNOTAVAIL);
 	}
 
 	imf = &imo->imo_mfilters[idx];
 	msfr.msfr_fmode = imf->imf_fmode;
 	msfr.msfr_nsrcs = imf->imf_nsources;
 
 	/*
 	 * If the user specified a buffer, copy out the source filter
 	 * entries to userland gracefully.
 	 * msfr.msfr_nsrcs is always set to the total number of filter
 	 * entries which the kernel currently has for this group.
 	 */
 	tss = NULL;
 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
 		/*
 		 * Make a copy of the source vector so that we do not
 		 * thrash the inpcb lock whilst copying it out.
 		 * We only copy out the number of entries which userland
 		 * has asked for, but we always tell userland how big the
 		 * buffer really needs to be.
 		 */
 		MALLOC(tss, struct sockaddr_storage *,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 		    M_TEMP, M_NOWAIT);
 		if (tss == NULL) {
 			error = ENOBUFS;
 		} else {
 			ptss = tss;
 			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
 				memcpy(ptss++, &ims->ims_addr,
 				    sizeof(struct sockaddr_storage));
 			}
 		}
 	}
 
 	INP_WUNLOCK(inp);
 
 	if (tss != NULL) {
 		error = copyout(tss, msfr.msfr_srcs,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 		FREE(tss, M_TEMP);
 	}
 
 	if (error)
 		return (error);
 
 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
 
 	return (error);
 }
 
 /*
  * Return the IP multicast options in response to user getsockopt().
  */
 int
 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ip_mreqn		 mreqn;
 	struct ip_moptions	*imo;
 	struct ifnet		*ifp;
 	struct in_ifaddr	*ia;
 	int			 error, optval;
 	u_char			 coptval;
 
 	INP_WLOCK(inp);
 	imo = inp->inp_moptions;
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
 	 */
 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
 		INP_WUNLOCK(inp);
 		return (EOPNOTSUPP);
 	}
 
 	error = 0;
 	switch (sopt->sopt_name) {
 	case IP_MULTICAST_VIF:
 		if (imo != NULL)
 			optval = imo->imo_multicast_vif;
 		else
 			optval = -1;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MULTICAST_IF:
 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
 		if (imo != NULL) {
 			ifp = imo->imo_multicast_ifp;
 			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
 				mreqn.imr_address = imo->imo_multicast_addr;
 			} else if (ifp != NULL) {
 				mreqn.imr_ifindex = ifp->if_index;
 				IFP_TO_IA(ifp, ia);
 				if (ia != NULL) {
 					mreqn.imr_address =
 					    IA_SIN(ia)->sin_addr;
 				}
 			}
 		}
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
 			error = sooptcopyout(sopt, &mreqn,
 			    sizeof(struct ip_mreqn));
 		} else {
 			error = sooptcopyout(sopt, &mreqn.imr_address,
 			    sizeof(struct in_addr));
 		}
 		break;
 
 	case IP_MULTICAST_TTL:
 		if (imo == 0)
 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
 		else
 			optval = coptval = imo->imo_multicast_ttl;
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(u_char))
 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
 		else
 			error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MULTICAST_LOOP:
 		if (imo == 0)
 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
 		else
 			optval = coptval = imo->imo_multicast_loop;
 		INP_WUNLOCK(inp);
 		if (sopt->sopt_valsize == sizeof(u_char))
 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
 		else
 			error = sooptcopyout(sopt, &optval, sizeof(int));
 		break;
 
 	case IP_MSFILTER:
 		if (imo == NULL) {
 			error = EADDRNOTAVAIL;
 			INP_WUNLOCK(inp);
 		} else {
 			error = inp_get_source_filters(inp, sopt);
 		}
 		break;
 
 	default:
 		INP_WUNLOCK(inp);
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
 
 /*
  * Join an IPv4 multicast group, possibly with a source.
  */
 static int
 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_multi			*inm;
 	size_t				 idx;
 	int				 error;
 
 	ifp = NULL;
 	error = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	gsa->ss.ss_family = AF_UNSPEC;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
 	switch (sopt->sopt_name) {
 	case IP_ADD_MEMBERSHIP:
 	case IP_ADD_SOURCE_MEMBERSHIP: {
 		struct ip_mreq_source	 mreqs;
 
 		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
 			error = sooptcopyin(sopt, &mreqs,
 			    sizeof(struct ip_mreq),
 			    sizeof(struct ip_mreq));
 			/*
 			 * Do argument switcharoo from ip_mreq into
 			 * ip_mreq_source to avoid using two instances.
 			 */
 			mreqs.imr_interface = mreqs.imr_sourceaddr;
 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
 		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
 			error = sooptcopyin(sopt, &mreqs,
 			    sizeof(struct ip_mreq_source),
 			    sizeof(struct ip_mreq_source));
 		}
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 
 		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
 			ssa->sin.sin_family = AF_INET;
 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 		}
 
 		/*
 		 * Obtain ifp. If no interface address was provided,
 		 * use the interface of the route in the unicast FIB for
 		 * the given multicast destination; usually, this is the
 		 * default route.
 		 * If this lookup fails, attempt to use the first non-loopback
 		 * interface with multicast capability in the system as a
 		 * last resort. The legacy IPv4 ASM API requires that we do
 		 * this in order to allow groups to be joined when the routing
 		 * table has not yet been populated during boot.
 		 * If all of these conditions fail, return EADDRNOTAVAIL, and
 		 * reject the IPv4 multicast join.
 		 */
 		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
 		} else {
 			struct route ro;
 
 			ro.ro_rt = NULL;
 			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
-			rtalloc_ign(&ro, RTF_CLONING);
+			in_rtalloc_ign(&ro, RTF_CLONING,
+			   inp->inp_inc.inc_fibnum);
 			if (ro.ro_rt != NULL) {
 				ifp = ro.ro_rt->rt_ifp;
 				KASSERT(ifp != NULL, ("%s: null ifp",
 				    __func__));
 				RTFREE(ro.ro_rt);
 			} else {
 				struct in_ifaddr *ia;
 				struct ifnet *mfp = NULL;
 				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
 					mfp = ia->ia_ifp;
 					if (!(mfp->if_flags & IFF_LOOPBACK) &&
 					     (mfp->if_flags & IFF_MULTICAST)) {
 						ifp = mfp;
 						break;
 					}
 				}
 			}
 		}
 #ifdef DIAGNOSTIC
 		if (bootverbose) {
 			printf("%s: imr_interface = %s, ifp = %p\n",
 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
 		}
 #endif
 		break;
 	}
 
 	case MCAST_JOIN_GROUP:
 	case MCAST_JOIN_SOURCE_GROUP:
 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_req),
 			    sizeof(struct group_req));
 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_source_req),
 			    sizeof(struct group_source_req));
 		}
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		/*
 		 * Overwrite the port field if present, as the sockaddr
 		 * being copied in may be matched with a binary comparison.
 		 * XXX INET6
 		 */
 		gsa->sin.sin_port = 0;
 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
 			if (ssa->sin.sin_family != AF_INET ||
 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 				return (EINVAL);
 			ssa->sin.sin_port = 0;
 		}
 
 		/*
 		 * Obtain the ifp.
 		 */
 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 		ifp = ifnet_byindex(gsr.gsr_interface);
 
 		break;
 
 	default:
 #ifdef DIAGNOSTIC
 		if (bootverbose) {
 			printf("%s: unknown sopt_name %d\n", __func__,
 			    sopt->sopt_name);
 		}
 #endif
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Check if we already hold membership of this group for this inpcb.
 	 * If so, we do not need to perform the initial join.
 	 */
 	imo = inp_findmoptions(inp);
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx != -1) {
 		if (ssa->ss.ss_family != AF_UNSPEC) {
 			/*
 			 * Attempting to join an ASM group (when already
 			 * an ASM or SSM member) is an error.
 			 */
 			error = EADDRNOTAVAIL;
 		} else {
 			imf = &imo->imo_mfilters[idx];
 			if (imf->imf_nsources == 0) {
 				/*
 				 * Attempting to join an SSM group (when
 				 * already an ASM member) is an error.
 				 */
 				error = EINVAL;
 			} else {
 				/*
 				 * Attempting to join an SSM group (when
 				 * already an SSM member) means "add this
 				 * source to the inclusive filter list".
 				 */
 				error = imo_join_source(imo, idx, ssa);
 			}
 		}
 		goto out_locked;
 	}
 
 	/*
 	 * Call imo_grow() to reallocate the membership and source filter
 	 * vectors if they are full. If the size would exceed the hard limit,
 	 * then we know we've really run out of entries. We keep the INP
 	 * lock held to avoid introducing a race condition.
 	 */
 	if (imo->imo_num_memberships == imo->imo_max_memberships) {
 		error = imo_grow(imo);
 		if (error)
 			goto out_locked;
 	}
 
 	/*
 	 * So far, so good: perform the layer 3 join, layer 2 join,
 	 * and make an IGMP announcement if needed.
 	 */
 	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
 	if (inm == NULL) {
 		error = ENOBUFS;
 		goto out_locked;
 	}
 	idx = imo->imo_num_memberships;
 	imo->imo_membership[idx] = inm;
 	imo->imo_num_memberships++;
 
 	KASSERT(imo->imo_mfilters != NULL,
 	    ("%s: imf_mfilters vector was not allocated", __func__));
 	imf = &imo->imo_mfilters[idx];
 	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
 	    ("%s: imf_sources not empty", __func__));
 
 	/*
 	 * If this is a new SSM group join (i.e. a source was specified
 	 * with this group), add this source to the filter list.
 	 */
 	if (ssa->ss.ss_family != AF_UNSPEC) {
 		/*
 		 * An initial SSM join implies that this socket's membership
 		 * of the multicast group is now in inclusive mode.
 		 */
 		imf->imf_fmode = MCAST_INCLUDE;
 
 		error = imo_join_source(imo, idx, ssa);
 		if (error) {
 			/*
 			 * Drop inp lock before calling in_delmulti(),
 			 * to prevent a lock order reversal.
 			 */
 			--imo->imo_num_memberships;
 			INP_WUNLOCK(inp);
 			in_delmulti(inm);
 			return (error);
 		}
 	}
 
 out_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
  */
 static int
 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
 	struct ip_mreq_source		 mreqs;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
 	struct ip_moptions		*imo;
 	struct in_msource		*ims, *tims;
 	struct in_multi			*inm;
 	size_t				 idx;
 	int				 error;
 
 	ifp = NULL;
 	error = 0;
 
 	memset(&gsr, 0, sizeof(struct group_source_req));
 	gsa = (sockunion_t *)&gsr.gsr_group;
 	gsa->ss.ss_family = AF_UNSPEC;
 	ssa = (sockunion_t *)&gsr.gsr_source;
 	ssa->ss.ss_family = AF_UNSPEC;
 
 	switch (sopt->sopt_name) {
 	case IP_DROP_MEMBERSHIP:
 	case IP_DROP_SOURCE_MEMBERSHIP:
 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
 			error = sooptcopyin(sopt, &mreqs,
 			    sizeof(struct ip_mreq),
 			    sizeof(struct ip_mreq));
 			/*
 			 * Swap interface and sourceaddr arguments,
 			 * as ip_mreq and ip_mreq_source are laid
 			 * out differently.
 			 */
 			mreqs.imr_interface = mreqs.imr_sourceaddr;
 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
 			error = sooptcopyin(sopt, &mreqs,
 			    sizeof(struct ip_mreq_source),
 			    sizeof(struct ip_mreq_source));
 		}
 		if (error)
 			return (error);
 
 		gsa->sin.sin_family = AF_INET;
 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
 
 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
 			ssa->sin.sin_family = AF_INET;
 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 		}
 
 		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
 
 #ifdef DIAGNOSTIC
 		if (bootverbose) {
 			printf("%s: imr_interface = %s, ifp = %p\n",
 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
 		}
 #endif
 		break;
 
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_req),
 			    sizeof(struct group_req));
 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			error = sooptcopyin(sopt, &gsr,
 			    sizeof(struct group_source_req),
 			    sizeof(struct group_source_req));
 		}
 		if (error)
 			return (error);
 
 		if (gsa->sin.sin_family != AF_INET ||
 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 
 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
 			if (ssa->sin.sin_family != AF_INET ||
 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
 				return (EINVAL);
 		}
 
 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
 			return (EADDRNOTAVAIL);
 
 		ifp = ifnet_byindex(gsr.gsr_interface);
 		break;
 
 	default:
 #ifdef DIAGNOSTIC
 		if (bootverbose) {
 			printf("%s: unknown sopt_name %d\n", __func__,
 			    sopt->sopt_name);
 		}
 #endif
 		return (EOPNOTSUPP);
 		break;
 	}
 
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	/*
 	 * Find the membership in the membership array.
 	 */
 	imo = inp_findmoptions(inp);
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1) {
 		error = EADDRNOTAVAIL;
 		goto out_locked;
 	}
 	imf = &imo->imo_mfilters[idx];
 
 	/*
 	 * If we were instructed only to leave a given source, do so.
 	 */
 	if (ssa->ss.ss_family != AF_UNSPEC) {
 		if (imf->imf_nsources == 0 ||
 		    imf->imf_fmode == MCAST_EXCLUDE) {
 			/*
 			 * Attempting to SSM leave an ASM group
 			 * is an error; should use *_BLOCK_SOURCE instead.
 			 * Attempting to SSM leave a source in a group when
 			 * the socket is in 'exclude mode' is also an error.
 			 */
 			error = EINVAL;
 		} else {
 			error = imo_leave_source(imo, idx, ssa);
 		}
 		/*
 		 * If an error occurred, or this source is not the last
 		 * source in the group, do not leave the whole group.
 		 */
 		if (error || imf->imf_nsources > 0)
 			goto out_locked;
 	}
 
 	/*
 	 * Give up the multicast address record to which the membership points.
 	 */
 	inm = imo->imo_membership[idx];
 	in_delmulti(inm);
 
 	/*
 	 * Free any source filters for this group if they exist.
 	 * Revert inpcb to the default MCAST_EXCLUDE state.
 	 */
 	if (imo->imo_mfilters != NULL) {
 		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
 			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
 			FREE(ims, M_IPMSOURCE);
 			imf->imf_nsources--;
 		}
 		KASSERT(imf->imf_nsources == 0,
 		    ("%s: imf_nsources not 0", __func__));
 		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
 		    ("%s: imf_sources not empty", __func__));
 		imf->imf_fmode = MCAST_EXCLUDE;
 	}
 
 	/*
 	 * Remove the gap in the membership array.
 	 */
 	for (++idx; idx < imo->imo_num_memberships; ++idx)
 		imo->imo_membership[idx-1] = imo->imo_membership[idx];
 	imo->imo_num_memberships--;
 
 out_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Select the interface for transmitting IPv4 multicast datagrams.
  *
  * Either an instance of struct in_addr or an instance of struct ip_mreqn
  * may be passed to this socket option. An address of INADDR_ANY or an
  * interface index of 0 is used to remove a previous selection.
  * When no interface is selected, one is chosen for every send.
  */
 static int
 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct in_addr		 addr;
 	struct ip_mreqn		 mreqn;
 	struct ifnet		*ifp;
 	struct ip_moptions	*imo;
 	int			 error;
 
 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
 		/*
 		 * An interface index was specified using the
 		 * Linux-derived ip_mreqn structure.
 		 */
 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
 		    sizeof(struct ip_mreqn));
 		if (error)
 			return (error);
 
 		if (mreqn.imr_ifindex < 0 || if_index < mreqn.imr_ifindex)
 			return (EINVAL);
 
 		if (mreqn.imr_ifindex == 0) {
 			ifp = NULL;
 		} else {
 			ifp = ifnet_byindex(mreqn.imr_ifindex);
 			if (ifp == NULL)
 				return (EADDRNOTAVAIL);
 		}
 	} else {
 		/*
 		 * An interface was specified by IPv4 address.
 		 * This is the traditional BSD usage.
 		 */
 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
 		    sizeof(struct in_addr));
 		if (error)
 			return (error);
 		if (addr.s_addr == INADDR_ANY) {
 			ifp = NULL;
 		} else {
 			INADDR_TO_IFP(addr, ifp);
 			if (ifp == NULL)
 				return (EADDRNOTAVAIL);
 		}
 #ifdef DIAGNOSTIC
 		if (bootverbose) {
 			printf("%s: ifp = %p, addr = %s\n",
 			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
 		}
 #endif
 	}
 
 	/* Reject interfaces which do not support multicast. */
 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
 		return (EOPNOTSUPP);
 
 	imo = inp_findmoptions(inp);
 	imo->imo_multicast_ifp = ifp;
 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 /*
  * Atomically set source filters on a socket for an IPv4 multicast group.
  */
 static int
 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct __msfilterreq	 msfr;
 	sockunion_t		*gsa;
 	struct ifnet		*ifp;
 	struct in_mfilter	*imf;
 	struct ip_moptions	*imo;
 	struct in_msource	*ims, *tims;
 	size_t			 idx;
 	int			 error;
 
 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
 	    sizeof(struct __msfilterreq));
 	if (error)
 		return (error);
 
 	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
 	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
 	     msfr.msfr_fmode != MCAST_INCLUDE))
 		return (EINVAL);
 
 	if (msfr.msfr_group.ss_family != AF_INET ||
 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
 		return (EINVAL);
 
 	gsa = (sockunion_t *)&msfr.msfr_group;
 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
 		return (EINVAL);
 
 	gsa->sin.sin_port = 0;	/* ignore port */
 
 	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
 		return (EADDRNOTAVAIL);
 
 	ifp = ifnet_byindex(msfr.msfr_ifindex);
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Take the INP lock.
 	 * Check if this socket is a member of this group.
 	 */
 	imo = inp_findmoptions(inp);
 	idx = imo_match_group(imo, ifp, &gsa->sa);
 	if (idx == -1 || imo->imo_mfilters == NULL) {
 		error = EADDRNOTAVAIL;
 		goto out_locked;
 	}
 	imf = &imo->imo_mfilters[idx];
 
 #ifdef DIAGNOSTIC
 	if (bootverbose)
 		printf("%s: clearing source list\n", __func__);
 #endif
 
 	/*
 	 * Remove any existing source filters.
 	 */
 	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
 		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
 		FREE(ims, M_IPMSOURCE);
 		imf->imf_nsources--;
 	}
 	KASSERT(imf->imf_nsources == 0,
 	    ("%s: source list not cleared", __func__));
 
 	/*
 	 * Apply any new source filters, if present.
 	 */
 	if (msfr.msfr_nsrcs > 0) {
 		struct in_msource	**pnims;
 		struct in_msource	*nims;
 		struct sockaddr_storage	*kss;
 		struct sockaddr_storage	*pkss;
 		sockunion_t		*psu;
 		int			 i, j;
 
 		/*
 		 * Drop the inp lock so we may sleep if we need to
 		 * in order to satisfy a malloc request.
 		 * We will re-take it before changing socket state.
 		 */
 		INP_WUNLOCK(inp);
 #ifdef DIAGNOSTIC
 		if (bootverbose) {
 			printf("%s: loading %lu source list entries\n",
 			    __func__, (unsigned long)msfr.msfr_nsrcs);
 		}
 #endif
 		/*
 		 * Make a copy of the user-space source vector so
 		 * that we may copy them with a single copyin. This
 		 * allows us to deal with page faults up-front.
 		 */
 		MALLOC(kss, struct sockaddr_storage *,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
 		    M_TEMP, M_WAITOK);
 		error = copyin(msfr.msfr_srcs, kss,
 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
 		if (error) {
 			FREE(kss, M_TEMP);
 			return (error);
 		}
 
 		/*
 		 * Perform argument checking on every sockaddr_storage
 		 * structure in the vector provided to us. Overwrite
 		 * fields which should not apply to source entries.
 		 * TODO: Check for duplicate sources on this pass.
 		 */
 		psu = (sockunion_t *)kss;
 		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
 			switch (psu->ss.ss_family) {
 			case AF_INET:
 				if (psu->sin.sin_len !=
 				    sizeof(struct sockaddr_in)) {
 					error = EINVAL;
 				} else {
 					psu->sin.sin_port = 0;
 				}
 				break;
 #ifdef notyet
 			case AF_INET6;
 				if (psu->sin6.sin6_len !=
 				    sizeof(struct sockaddr_in6)) {
 					error = EINVAL;
 				} else {
 					psu->sin6.sin6_port = 0;
 					psu->sin6.sin6_flowinfo = 0;
 				}
 				break;
 #endif
 			default:
 				error = EAFNOSUPPORT;
 				break;
 			}
 			if (error)
 				break;
 		}
 		if (error) {
 			FREE(kss, M_TEMP);
 			return (error);
 		}
 
 		/*
 		 * Allocate a block to track all the in_msource
 		 * entries we are about to allocate, in case we
 		 * abruptly need to free them.
 		 */
 		MALLOC(pnims, struct in_msource **,
 		    sizeof(struct in_msource *) * msfr.msfr_nsrcs,
 		    M_TEMP, M_WAITOK | M_ZERO);
 
 		/*
 		 * Allocate up to nsrcs individual chunks.
 		 * If we encounter an error, backtrack out of
 		 * all allocations cleanly; updates must be atomic.
 		 */
 		pkss = kss;
 		nims = NULL;
 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
 			MALLOC(nims, struct in_msource *,
 			    sizeof(struct in_msource) * msfr.msfr_nsrcs,
 			    M_IPMSOURCE, M_WAITOK | M_ZERO);
 			pnims[i] = nims;
 		}
 		if (i < msfr.msfr_nsrcs) {
 			for (j = 0; j < i; j++) {
 				if (pnims[j] != NULL)
 					FREE(pnims[j], M_IPMSOURCE);
 			}
 			FREE(pnims, M_TEMP);
 			FREE(kss, M_TEMP);
 			return (ENOBUFS);
 		}
 
 		INP_UNLOCK_ASSERT(inp);
 
 		/*
 		 * Finally, apply the filters to the socket.
 		 * Re-take the inp lock; we are changing socket state.
 		 */
 		pkss = kss;
 		INP_WLOCK(inp);
 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
 			memcpy(&(pnims[i]->ims_addr), pkss,
 			    sizeof(struct sockaddr_storage));
 			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
 			    ims_next);
 			imf->imf_nsources++;
 		}
 		FREE(pnims, M_TEMP);
 		FREE(kss, M_TEMP);
 	}
 
 	/*
 	 * Update the filter mode on the socket before releasing the inpcb.
 	 */
 	INP_WLOCK_ASSERT(inp);
 	imf->imf_fmode = msfr.msfr_fmode;
 
 out_locked:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Set the IP multicast options in response to user setsockopt().
  *
  * Many of the socket options handled in this function duplicate the
  * functionality of socket options in the regular unicast API. However,
  * it is not possible to merge the duplicate code, because the idempotence
  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
  * the effects of these options must be treated as separate and distinct.
  */
 int
 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct ip_moptions	*imo;
 	int			 error;
 
 	error = 0;
 
 	/*
 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
 	 * or is a divert socket, reject it.
 	 * XXX Unlocked read of inp_socket believed OK.
 	 */
 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
 		return (EOPNOTSUPP);
 
 	switch (sopt->sopt_name) {
 	case IP_MULTICAST_VIF: {
 		int vifi;
 		/*
 		 * Select a multicast VIF for transmission.
 		 * Only useful if multicast forwarding is active.
 		 */
 		if (legal_vif_num == NULL) {
 			error = EOPNOTSUPP;
 			break;
 		}
 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
 		if (error)
 			break;
 		if (!legal_vif_num(vifi) && (vifi != -1)) {
 			error = EINVAL;
 			break;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_vif = vifi;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_MULTICAST_IF:
 		error = inp_set_multicast_if(inp, sopt);
 		break;
 
 	case IP_MULTICAST_TTL: {
 		u_char ttl;
 
 		/*
 		 * Set the IP time-to-live for outgoing multicast packets.
 		 * The original multicast API required a char argument,
 		 * which is inconsistent with the rest of the socket API.
 		 * We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == sizeof(u_char)) {
 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
 			    sizeof(u_char));
 			if (error)
 				break;
 		} else {
 			u_int ittl;
 
 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
 			    sizeof(u_int));
 			if (error)
 				break;
 			if (ittl > 255) {
 				error = EINVAL;
 				break;
 			}
 			ttl = (u_char)ittl;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_ttl = ttl;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_MULTICAST_LOOP: {
 		u_char loop;
 
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.  The original multicast API required a
 		 * char argument, which is inconsistent with the rest
 		 * of the socket API.  We allow either a char or an int.
 		 */
 		if (sopt->sopt_valsize == sizeof(u_char)) {
 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
 			    sizeof(u_char));
 			if (error)
 				break;
 		} else {
 			u_int iloop;
 
 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
 					    sizeof(u_int));
 			if (error)
 				break;
 			loop = (u_char)iloop;
 		}
 		imo = inp_findmoptions(inp);
 		imo->imo_multicast_loop = !!loop;
 		INP_WUNLOCK(inp);
 		break;
 	}
 
 	case IP_ADD_MEMBERSHIP:
 	case IP_ADD_SOURCE_MEMBERSHIP:
 	case MCAST_JOIN_GROUP:
 	case MCAST_JOIN_SOURCE_GROUP:
 		error = inp_join_group(inp, sopt);
 		break;
 
 	case IP_DROP_MEMBERSHIP:
 	case IP_DROP_SOURCE_MEMBERSHIP:
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 		error = inp_leave_group(inp, sopt);
 		break;
 
 	case IP_BLOCK_SOURCE:
 	case IP_UNBLOCK_SOURCE:
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
 		error = inp_change_source_filter(inp, sopt);
 		break;
 
 	case IP_MSFILTER:
 		error = inp_set_source_filters(inp, sopt);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	INP_UNLOCK_ASSERT(inp);
 
 	return (error);
 }
Index: head/sys/netinet/in_pcb.c
===================================================================
--- head/sys/netinet/in_pcb.c	(revision 178887)
+++ head/sys/netinet/in_pcb.c	(revision 178888)
@@ -1,1502 +1,1503 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
  *	The Regents of the University of California.
  * Copyright (c) 2007 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #endif /* IPSEC */
 
 #include <security/mac/mac_framework.h>
 
 /*
  * These configure the range of local port addresses assigned to
  * "unspecified" outgoing connections/packets/whatever.
  */
 int	ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
 int	ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
 int	ipport_firstauto = IPPORT_EPHEMERALFIRST;	/* 10000 */
 int	ipport_lastauto  = IPPORT_EPHEMERALLAST;	/* 65535 */
 int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
 int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
 
 /*
  * Reserved ports accessible only to root. There are significant
  * security considerations that must be accounted for when changing these,
  * but the security benefits can be great. Please be careful.
  */
 int	ipport_reservedhigh = IPPORT_RESERVED - 1;	/* 1023 */
 int	ipport_reservedlow = 0;
 
 /* Variables dealing with random ephemeral port allocation. */
 int	ipport_randomized = 1;	/* user controlled via sysctl */
 int	ipport_randomcps = 10;	/* user controlled via sysctl */
 int	ipport_randomtime = 45;	/* user controlled via sysctl */
 int	ipport_stoprandom = 0;	/* toggled by ipport_tick */
 int	ipport_tcpallocs;
 int	ipport_tcplastcount;
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
 	else if ((var) > (max)) { (var) = (max); }
 
 static int
 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
 	if (error == 0) {
 		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
 	}
 	return (error);
 }
 
 #undef RANGECHK
 
 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
 
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
 	   CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
 	   CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
 	   &ipport_randomized, 0, "Enable random port allocation");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW,
 	   &ipport_randomcps, 0, "Maximum number of random port "
 	   "allocations before switching to a sequental one");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
 	   &ipport_randomtime, 0, "Minimum time to keep sequental port "
 	   "allocation before switching to a random one");
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
  *
  * NOTE: It is assumed that most of these functions will be called with
  * the pcbinfo lock held, and often, the inpcb lock held, as these utility
  * functions often modify hash chains or addresses in pcbs.
  */
 
 /*
  * Allocate a PCB and associate it with the socket.
  * On success return with the PCB locked.
  */
 int
 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
 {
 	struct inpcb *inp;
 	int error;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	error = 0;
 	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
 	if (inp == NULL)
 		return (ENOBUFS);
 	bzero(inp, inp_zero_size);
 	inp->inp_pcbinfo = pcbinfo;
 	inp->inp_socket = so;
+	inp->inp_inc.inc_fibnum = so->so_fibnum;
 #ifdef MAC
 	error = mac_inpcb_init(inp, M_NOWAIT);
 	if (error != 0)
 		goto out;
 	SOCK_LOCK(so);
 	mac_inpcb_create(so, inp);
 	SOCK_UNLOCK(so);
 #endif
 
 #ifdef IPSEC
 	error = ipsec_init_policy(so, &inp->inp_sp);
 	if (error != 0) {
 #ifdef MAC
 		mac_inpcb_destroy(inp);
 #endif
 		goto out;
 	}
 #endif /*IPSEC*/
 #ifdef INET6
 	if (INP_SOCKAF(so) == AF_INET6) {
 		inp->inp_vflag |= INP_IPV6PROTO;
 		if (ip6_v6only)
 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
 	}
 #endif
 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
 #ifdef INET6
 	if (ip6_auto_flowlabel)
 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif
 	INP_WLOCK(inp);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	
 #if defined(IPSEC) || defined(MAC)
 out:
 	if (error != 0)
 		uma_zfree(pcbinfo->ipi_zone, inp);
 #endif
 	return (error);
 }
 
 int
 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
 {
 	int anonport, error;
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	anonport = inp->inp_lport == 0 && (nam == NULL ||
 	    ((struct sockaddr_in *)nam)->sin_port == 0);
 	error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
 	    &inp->inp_lport, cred);
 	if (error)
 		return (error);
 	if (in_pcbinshash(inp) != 0) {
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 
 /*
  * Set up a bind operation on a PCB, performing port allocation
  * as required, but do not actually modify the PCB. Callers can
  * either complete the bind by setting inp_laddr/inp_lport and
  * calling in_pcbinshash(), or they can just use the resulting
  * port and address to authorise the sending of a once-off packet.
  *
  * On error, the values of *laddrp and *lportp are not changed.
  */
 int
 in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
     u_short *lportp, struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	unsigned short *lastport;
 	struct sockaddr_in *sin;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct in_addr laddr;
 	u_short lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error, prison = 0;
 	int dorandom;
 
 	/*
 	 * Because no actual state changes occur here, a write global write
 	 * lock on the pcbinfo isn't required.
 	 */
 	INP_INFO_LOCK_ASSERT(pcbinfo);
 	INP_LOCK_ASSERT(inp);
 
 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	if (nam != NULL && laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		wild = INPLOOKUP_WILDCARD;
 	if (nam) {
 		sin = (struct sockaddr_in *)nam;
 		if (nam->sa_len != sizeof (*sin))
 			return (EINVAL);
 #ifdef notdef
 		/*
 		 * We should check the family, but old programs
 		 * incorrectly fail to initialize it.
 		 */
 		if (sin->sin_family != AF_INET)
 			return (EAFNOSUPPORT);
 #endif
 		if (sin->sin_addr.s_addr != INADDR_ANY)
 			if (prison_ip(cred, 0, &sin->sin_addr.s_addr))
 				return(EINVAL);
 		if (sin->sin_port != *lportp) {
 			/* Don't allow the port to change. */
 			if (*lportp != 0)
 				return (EINVAL);
 			lport = sin->sin_port;
 		}
 		/* NB: lport is left as 0 if the port isn't being changed. */
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow complete duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if (so->so_options & SO_REUSEADDR)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
 			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
 			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
 				return (EADDRNOTAVAIL);
 		}
 		laddr = sin->sin_addr;
 		if (lport) {
 			struct inpcb *t;
 			struct tcptw *tw;
 
 			/* GROSS */
 			if (ntohs(lport) <= ipport_reservedhigh &&
 			    ntohs(lport) >= ipport_reservedlow &&
 			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
 			    0))
 				return (EACCES);
 			if (jailed(cred))
 				prison = 1;
 			if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
 			    priv_check_cred(so->so_cred,
 			    PRIV_NETINET_REUSEPORT, 0) != 0) {
 				t = in_pcblookup_local(inp->inp_pcbinfo,
 				    sin->sin_addr, lport,
 				    prison ? 0 :  INPLOOKUP_WILDCARD);
 	/*
 	 * XXX
 	 * This entire block sorely needs a rewrite.
 	 */
 				if (t &&
 				    ((t->inp_vflag & INP_TIMEWAIT) == 0) &&
 				    (so->so_type != SOCK_STREAM ||
 				     ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 				     (t->inp_socket->so_options &
 					 SO_REUSEPORT) == 0) &&
 				    (so->so_cred->cr_uid !=
 				     t->inp_socket->so_cred->cr_uid))
 					return (EADDRINUSE);
 			}
 			if (prison && prison_ip(cred, 0, &sin->sin_addr.s_addr))
 				return (EADDRNOTAVAIL);
 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 			    lport, prison ? 0 : wild);
 			if (t && (t->inp_vflag & INP_TIMEWAIT)) {
 				/*
 				 * XXXRW: If an incpb has had its timewait
 				 * state recycled, we treat the address as
 				 * being in use (for now).  This is better
 				 * than a panic, but not desirable.
 				 */
 				tw = intotw(inp);
 				if (tw == NULL ||
 				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
 			} else if (t &&
 			    (reuseport & t->inp_socket->so_options) == 0) {
 #ifdef INET6
 				if (ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
 				    ntohl(t->inp_laddr.s_addr) !=
 				    INADDR_ANY ||
 				    INP_SOCKAF(so) ==
 				    INP_SOCKAF(t->inp_socket))
 #endif
 				return (EADDRINUSE);
 			}
 		}
 	}
 	if (*lportp != 0)
 		lport = *lportp;
 	if (lport == 0) {
 		u_short first, last, aux;
 		int count;
 
 		if (laddr.s_addr != INADDR_ANY)
 			if (prison_ip(cred, 0, &laddr.s_addr))
 				return (EINVAL);
 
 		if (inp->inp_flags & INP_HIGHPORT) {
 			first = ipport_hifirstauto;	/* sysctl */
 			last  = ipport_hilastauto;
 			lastport = &pcbinfo->ipi_lasthi;
 		} else if (inp->inp_flags & INP_LOWPORT) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_RESERVEDPORT, 0);
 			if (error)
 				return error;
 			first = ipport_lowfirstauto;	/* 1023 */
 			last  = ipport_lowlastauto;	/* 600 */
 			lastport = &pcbinfo->ipi_lastlow;
 		} else {
 			first = ipport_firstauto;	/* sysctl */
 			last  = ipport_lastauto;
 			lastport = &pcbinfo->ipi_lastport;
 		}
 		/*
 		 * For UDP, use random port allocation as long as the user
 		 * allows it.  For TCP (and as of yet unknown) connections,
 		 * use random port allocation only if the user allows it AND
 		 * ipport_tick() allows it.
 		 */
 		if (ipport_randomized &&
 			(!ipport_stoprandom || pcbinfo == &udbinfo))
 			dorandom = 1;
 		else
 			dorandom = 0;
 		/*
 		 * It makes no sense to do random port allocation if
 		 * we have the only port available.
 		 */
 		if (first == last)
 			dorandom = 0;
 		/* Make sure to not include UDP packets in the count. */
 		if (pcbinfo != &udbinfo)
 			ipport_tcpallocs++;
 		/*
 		 * Simple check to ensure all ports are not used up causing
 		 * a deadlock here.
 		 */
 		if (first > last) {
 			aux = first;
 			first = last;
 			last = aux;
 		}
 
 		if (dorandom)
 			*lastport = first +
 				    (arc4random() % (last - first));
 
 		count = last - first;
 
 		do {
 			if (count-- < 0)	/* completely used? */
 				return (EADDRNOTAVAIL);
 			++*lastport;
 			if (*lastport < first || *lastport > last)
 				*lastport = first;
 			lport = htons(*lastport);
 		} while (in_pcblookup_local(pcbinfo, laddr, lport,
 		    wild));
 	}
 	if (prison_ip(cred, 0, &laddr.s_addr))
 		return (EINVAL);
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	return (0);
 }
 
 /*
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
 {
 	u_short lport, fport;
 	in_addr_t laddr, faddr;
 	int anonport, error;
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	lport = inp->inp_lport;
 	laddr = inp->inp_laddr.s_addr;
 	anonport = (lport == 0);
 	error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
 	    NULL, cred);
 	if (error)
 		return (error);
 
 	/* Do the initial binding of the local address if required. */
 	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
 		inp->inp_lport = lport;
 		inp->inp_laddr.s_addr = laddr;
 		if (in_pcbinshash(inp) != 0) {
 			inp->inp_laddr.s_addr = INADDR_ANY;
 			inp->inp_lport = 0;
 			return (EAGAIN);
 		}
 	}
 
 	/* Commit the remaining changes. */
 	inp->inp_lport = lport;
 	inp->inp_laddr.s_addr = laddr;
 	inp->inp_faddr.s_addr = faddr;
 	inp->inp_fport = fport;
 	in_pcbrehash(inp);
 
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 
 /*
  * Set up for a connect from a socket to the specified address.
  * On entry, *laddrp and *lportp should contain the current local
  * address and port for the PCB; these are updated to the values
  * that should be placed in inp_laddr and inp_lport to complete
  * the connect.
  *
  * On success, *faddrp and *fportp will be set to the remote address
  * and port. These are not updated in the error case.
  *
  * If the operation fails because the connection already exists,
  * *oinpp will be set to the PCB of that connection so that the
  * caller can decide to override it. In all other cases, *oinpp
  * is set to NULL.
  */
 int
 in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
     in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
     struct inpcb **oinpp, struct ucred *cred)
 {
 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 	struct in_ifaddr *ia;
 	struct sockaddr_in sa;
 	struct ucred *socred;
 	struct inpcb *oinp;
 	struct in_addr laddr, faddr;
 	u_short lport, fport;
 	int error;
 
 	/*
 	 * Because a global state change doesn't actually occur here, a read
 	 * lock is sufficient.
 	 */
 	INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
 	INP_LOCK_ASSERT(inp);
 
 	if (oinpp != NULL)
 		*oinpp = NULL;
 	if (nam->sa_len != sizeof (*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (sin->sin_port == 0)
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	lport = *lportp;
 	faddr = sin->sin_addr;
 	fport = sin->sin_port;
 	socred = inp->inp_socket->so_cred;
 	if (laddr.s_addr == INADDR_ANY && jailed(socred)) {
 		bzero(&sa, sizeof(sa));
 		sa.sin_addr.s_addr = htonl(prison_getip(socred));
 		sa.sin_len = sizeof(sa);
 		sa.sin_family = AF_INET;
 		error = in_pcbbind_setup(inp, (struct sockaddr *)&sa,
 		    &laddr.s_addr, &lport, cred);
 		if (error)
 			return (error);
 	}
 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
 		 * use the primary local address.
 		 * If the supplied address is INADDR_BROADCAST,
 		 * and the primary interface supports broadcast,
 		 * choose the broadcast address for that interface.
 		 */
 		if (faddr.s_addr == INADDR_ANY)
 			faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
 		else if (faddr.s_addr == (u_long)INADDR_BROADCAST &&
 		    (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags &
 		    IFF_BROADCAST))
 			faddr = satosin(&TAILQ_FIRST(
 			    &in_ifaddrhead)->ia_broadaddr)->sin_addr;
 	}
 	if (laddr.s_addr == INADDR_ANY) {
 		ia = (struct in_ifaddr *)0;
 		/*
 		 * If route is known our src addr is taken from the i/f,
 		 * else punt.
 		 *
 		 * Find out route to destination
 		 */
 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
-			ia = ip_rtaddr(faddr);
+			ia = ip_rtaddr(faddr, inp->inp_inc.inc_fibnum);
 		/*
 		 * If we found a route, use the address corresponding to
 		 * the outgoing interface.
 		 * 
 		 * Otherwise assume faddr is reachable on a directly connected
 		 * network and try to find a corresponding interface to take
 		 * the source address from.
 		 */
 		if (ia == 0) {
 			bzero(&sa, sizeof(sa));
 			sa.sin_addr = faddr;
 			sa.sin_len = sizeof(sa);
 			sa.sin_family = AF_INET;
 
 			ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa)));
 			if (ia == 0)
 				ia = ifatoia(ifa_ifwithnet(sintosa(&sa)));
 			if (ia == 0)
 				return (ENETUNREACH);
 		}
 		/*
 		 * If the destination address is multicast and an outgoing
 		 * interface has been set as a multicast option, use the
 		 * address of that interface as our source address.
 		 */
 		if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
 		    inp->inp_moptions != NULL) {
 			struct ip_moptions *imo;
 			struct ifnet *ifp;
 
 			imo = inp->inp_moptions;
 			if (imo->imo_multicast_ifp != NULL) {
 				ifp = imo->imo_multicast_ifp;
 				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
 					if (ia->ia_ifp == ifp)
 						break;
 				if (ia == 0)
 					return (EADDRNOTAVAIL);
 			}
 		}
 		laddr = ia->ia_addr.sin_addr;
 	}
 
 	oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
 	    0, NULL);
 	if (oinp != NULL) {
 		if (oinpp != NULL)
 			*oinpp = oinp;
 		return (EADDRINUSE);
 	}
 	if (lport == 0) {
 		error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
 		    cred);
 		if (error)
 			return (error);
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	*faddrp = faddr.s_addr;
 	*fportp = fport;
 	return (0);
 }
 
 void
 in_pcbdisconnect(struct inpcb *inp)
 {
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
 }
 
 /*
  * In the old world order, in_pcbdetach() served two functions: to detach the
  * pcb from the socket/potentially free the socket, and to free the pcb
  * itself.  In the new world order, the protocol code is responsible for
  * managing the relationship with the socket, and this code simply frees the
  * pcb.
  */
 void
 in_pcbdetach(struct inpcb *inp)
 {
 
 	KASSERT(inp->inp_socket != NULL, ("in_pcbdetach: inp_socket == NULL"));
 	inp->inp_socket->so_pcb = NULL;
 	inp->inp_socket = NULL;
 }
 
 void
 in_pcbfree(struct inpcb *inp)
 {
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
 	KASSERT(inp->inp_socket == NULL, ("in_pcbfree: inp_socket != NULL"));
 
 	INP_INFO_WLOCK_ASSERT(ipi);
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef IPSEC
 	ipsec4_delete_pcbpolicy(inp);
 #endif /*IPSEC*/
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	if (inp->inp_moptions != NULL)
 		inp_freemoptions(inp->inp_moptions);
 	inp->inp_vflag = 0;
 	
 #ifdef MAC
 	mac_inpcb_destroy(inp);
 #endif
 	INP_WUNLOCK(inp);
 	uma_zfree(ipi->ipi_zone, inp);
 }
 
 /*
  * TCP needs to maintain its inpcb structure after the TCP connection has
  * been torn down.  However, it must be disconnected from the inpcb hashes as
  * it must not prevent binding of future connections to the same port/ip
  * combination by other inpcbs.
  */
 void
 in_pcbdrop(struct inpcb *inp)
 {
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	inp->inp_vflag |= INP_DROPPED;
 	if (inp->inp_lport) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 		inp->inp_lport = 0;
 	}
 }
 
 /*
  * Common routines to return the socket addresses associated with inpcbs.
  */
 struct sockaddr *
 in_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
 	struct sockaddr_in *sin;
 
 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = *addr_p;
 	sin->sin_port = port;
 
 	return (struct sockaddr *)sin;
 }
 
 int
 in_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_lport;
 	addr = inp->inp_laddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 int
 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 void
 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
     struct inpcb *(*notify)(struct inpcb *, int))
 {
 	struct inpcb *inp, *inp_temp;
 
 	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
 		INP_WLOCK(inp);
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 #endif
 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
 		    inp->inp_socket == NULL) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 		if ((*notify)(inp, errno))
 			INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 void
 in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
 {
 	struct inpcb *inp;
 	struct ip_moptions *imo;
 	int i, gap;
 
 	INP_INFO_RLOCK(pcbinfo);
 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		imo = inp->inp_moptions;
 		if ((inp->inp_vflag & INP_IPV4) &&
 		    imo != NULL) {
 			/*
 			 * Unselect the outgoing interface if it is being
 			 * detached.
 			 */
 			if (imo->imo_multicast_ifp == ifp)
 				imo->imo_multicast_ifp = NULL;
 
 			/*
 			 * Drop multicast group membership if we joined
 			 * through the interface being detached.
 			 */
 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
 			    i++) {
 				if (imo->imo_membership[i]->inm_ifp == ifp) {
 					in_delmulti(imo->imo_membership[i]);
 					gap++;
 				} else if (gap != 0)
 					imo->imo_membership[i - gap] =
 					    imo->imo_membership[i];
 			}
 			imo->imo_num_memberships -= gap;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(pcbinfo);
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 #define INP_LOOKUP_MAPPED_PCB_COST	3
 struct inpcb *
 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
     u_int lport_arg, int wild_okay)
 {
 	struct inpcb *inp;
 #ifdef INET6
 	int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
 #else
 	int matchwild = 3;
 #endif
 	int wildcard;
 	u_short lport = lport_arg;
 
 	INP_INFO_LOCK_ASSERT(pcbinfo);
 
 	if (!wild_okay) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
 		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_laddr.s_addr == laddr.s_addr &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found.
 				 */
 				return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->ipi_porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 				wildcard = 0;
 #ifdef INET6
 				if ((inp->inp_vflag & INP_IPV4) == 0)
 					continue;
 				/*
 				 * We never select the PCB that has
 				 * INP_IPV6 flag and is bound to :: if
 				 * we have another PCB which is bound
 				 * to 0.0.0.0.  If a PCB has the
 				 * INP_IPV6 flag, then we set its cost
 				 * higher than IPv4 only PCBs.
 				 *
 				 * Note that the case only happens
 				 * when a socket is bound to ::, under
 				 * the condition that the use of the
 				 * mapped address is allowed.
 				 */
 				if ((inp->inp_vflag & INP_IPV6) != 0)
 					wildcard += INP_LOOKUP_MAPPED_PCB_COST;
 #endif
 				if (inp->inp_faddr.s_addr != INADDR_ANY)
 					wildcard++;
 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
 					if (laddr.s_addr == INADDR_ANY)
 						wildcard++;
 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
 						continue;
 				} else {
 					if (laddr.s_addr != INADDR_ANY)
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0) {
 						break;
 					}
 				}
 			}
 		}
 		return (match);
 	}
 }
 #undef INP_LOOKUP_MAPPED_PCB_COST
 
 /*
  * Lookup PCB in hash list.
  */
 struct inpcb *
 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
     struct ifnet *ifp)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	INP_INFO_LOCK_ASSERT(pcbinfo);
 
 	/*
 	 * First look for an exact match.
 	 */
 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
 	    pcbinfo->ipi_hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport)
 			return (inp);
 	}
 
 	/*
 	 * Then look for a wildcard match, if requested.
 	 */
 	if (wildcard) {
 		struct inpcb *local_wild = NULL;
 #ifdef INET6
 		struct inpcb *local_wild_mapped = NULL;
 #endif
 
 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
 		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == lport) {
 				if (ifp && ifp->if_type == IFT_FAITH &&
 				    (inp->inp_flags & INP_FAITH) == 0)
 					continue;
 				if (inp->inp_laddr.s_addr == laddr.s_addr)
 					return (inp);
 				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #ifdef INET6
 					if (INP_CHECK_SOCKAF(inp->inp_socket,
 							     AF_INET6))
 						local_wild_mapped = inp;
 					else
 #endif
 						local_wild = inp;
 				}
 			}
 		}
 #ifdef INET6
 		if (local_wild == NULL)
 			return (local_wild_mapped);
 #endif
 		return (local_wild);
 	}
 	return (NULL);
 }
 
 /*
  * Insert PCB onto various hash lists.
  */
 int
 in_pcbinshash(struct inpcb *inp)
 {
 	struct inpcbhead *pcbhash;
 	struct inpcbporthead *pcbporthash;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbport *phd;
 	u_int32_t hashkey_faddr;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 	else
 #endif /* INET6 */
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
 		 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
 
 	pcbporthash = &pcbinfo->ipi_porthashbase[
 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
 
 	/*
 	 * Go through port list and look for a head for this lport.
 	 */
 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
 		if (phd->phd_port == inp->inp_lport)
 			break;
 	}
 	/*
 	 * If none exists, malloc one and tack it on.
 	 */
 	if (phd == NULL) {
 		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
 		if (phd == NULL) {
 			return (ENOBUFS); /* XXX */
 		}
 		phd->phd_port = inp->inp_lport;
 		LIST_INIT(&phd->phd_pcblist);
 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
 	}
 	inp->inp_phd = phd;
 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 	return (0);
 }
 
 /*
  * Move PCB to the proper hash bucket when { faddr, fport } have  been
  * changed. NOTE: This does not handle the case of the lport changing (the
  * hashed port list would have to be updated as well), so the lport must
  * not change after in_pcbinshash() has been called.
  */
 void
 in_pcbrehash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *head;
 	u_int32_t hashkey_faddr;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 	else
 #endif /* INET6 */
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
 		inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
 
 	LIST_REMOVE(inp, inp_hash);
 	LIST_INSERT_HEAD(head, inp, inp_hash);
 }
 
 /*
  * Remove PCB from various lists.
  */
 void
 in_pcbremlists(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	if (inp->inp_lport) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 	}
 	LIST_REMOVE(inp, inp_list);
 	pcbinfo->ipi_count--;
 }
 
 /*
  * A set label operation has occurred at the socket layer, propagate the
  * label change into the in_pcb for the socket.
  */
 void
 in_pcbsosetlabel(struct socket *so)
 {
 #ifdef MAC
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
 
 	INP_WLOCK(inp);
 	SOCK_LOCK(so);
 	mac_inpcb_sosetlabel(so, inp);
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 #endif
 }
 
 /*
  * ipport_tick runs once per second, determining if random port allocation
  * should be continued.  If more than ipport_randomcps ports have been
  * allocated in the last second, then we return to sequential port
  * allocation. We return to random allocation only once we drop below
  * ipport_randomcps for at least ipport_randomtime seconds.
  */
 void
 ipport_tick(void *xtp)
 {
 
 	if (ipport_tcpallocs <= ipport_tcplastcount + ipport_randomcps) {
 		if (ipport_stoprandom > 0)
 			ipport_stoprandom--;
 	} else
 		ipport_stoprandom = ipport_randomtime;
 	ipport_tcplastcount = ipport_tcpallocs;
 	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
 }
 
 void
 inp_wlock(struct inpcb *inp)
 {
 
 	INP_WLOCK(inp);
 }
 
 void
 inp_wunlock(struct inpcb *inp)
 {
 
 	INP_WUNLOCK(inp);
 }
 
 void
 inp_rlock(struct inpcb *inp)
 {
 
 	INP_RLOCK(inp);
 }
 
 void
 inp_runlock(struct inpcb *inp)
 {
 
 	INP_RUNLOCK(inp);
 }
 
 #ifdef INVARIANTS
 void
 inp_lock_assert(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 }
 
 void
 inp_unlock_assert(struct inpcb *inp)
 {
 
 	INP_UNLOCK_ASSERT(inp);
 }
 #endif
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
 {
 	char faddr_str[48], laddr_str[48];
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, inc);
 
 	indent += 2;
 
 #ifdef INET6
 	if (inc->inc_flags == 1) {
 		/* IPv6. */
 		ip6_sprintf(laddr_str, &inc->inc6_laddr);
 		ip6_sprintf(faddr_str, &inc->inc6_faddr);
 	} else {
 #endif
 		/* IPv4. */
 		inet_ntoa_r(inc->inc_laddr, laddr_str);
 		inet_ntoa_r(inc->inc_faddr, faddr_str);
 #ifdef INET6
 	}
 #endif
 	db_print_indent(indent);
 	db_printf("inc_laddr %s   inc_lport %u\n", laddr_str,
 	    ntohs(inc->inc_lport));
 	db_print_indent(indent);
 	db_printf("inc_faddr %s   inc_fport %u\n", faddr_str,
 	    ntohs(inc->inc_fport));
 }
 
 static void
 db_print_inpflags(int inp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (inp_flags & INP_RECVOPTS) {
 		db_printf("%sINP_RECVOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVRETOPTS) {
 		db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVDSTADDR) {
 		db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_HDRINCL) {
 		db_printf("%sINP_HDRINCL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_HIGHPORT) {
 		db_printf("%sINP_HIGHPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_LOWPORT) {
 		db_printf("%sINP_LOWPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_ANONPORT) {
 		db_printf("%sINP_ANONPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVIF) {
 		db_printf("%sINP_RECVIF", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_MTUDISC) {
 		db_printf("%sINP_MTUDISC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_FAITH) {
 		db_printf("%sINP_FAITH", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVTTL) {
 		db_printf("%sINP_RECVTTL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_DONTFRAG) {
 		db_printf("%sINP_DONTFRAG", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_IPV6_V6ONLY) {
 		db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_PKTINFO) {
 		db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_HOPLIMIT) {
 		db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_HOPOPTS) {
 		db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_DSTOPTS) {
 		db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RTHDR) {
 		db_printf("%sIN6P_RTHDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RTHDRDSTOPTS) {
 		db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_TCLASS) {
 		db_printf("%sIN6P_TCLASS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_AUTOFLOWLABEL) {
 		db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RFC2292) {
 		db_printf("%sIN6P_RFC2292", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_MTU) {
 		db_printf("IN6P_MTU%s", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_inpvflag(u_char inp_vflag)
 {
 	int comma;
 
 	comma = 0;
 	if (inp_vflag & INP_IPV4) {
 		db_printf("%sINP_IPV4", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_IPV6) {
 		db_printf("%sINP_IPV6", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_IPV6PROTO) {
 		db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_TIMEWAIT) {
 		db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_ONESBCAST) {
 		db_printf("%sINP_ONESBCAST", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_DROPPED) {
 		db_printf("%sINP_DROPPED", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_SOCKREF) {
 		db_printf("%sINP_SOCKREF", comma ? ", " : "");
 		comma  = 1;
 	}
 }
 
 void
 db_print_inpcb(struct inpcb *inp, const char *name, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, inp);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("inp_flow: 0x%x\n", inp->inp_flow);
 
 	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
 
 	db_print_indent(indent);
 	db_printf("inp_ppcb: %p   inp_pcbinfo: %p   inp_socket: %p\n",
 	    inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
 
 	db_print_indent(indent);
 	db_printf("inp_label: %p   inp_flags: 0x%x (",
 	   inp->inp_label, inp->inp_flags);
 	db_print_inpflags(inp->inp_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("inp_sp: %p   inp_vflag: 0x%x (", inp->inp_sp,
 	    inp->inp_vflag);
 	db_print_inpvflag(inp->inp_vflag);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("inp_ip_ttl: %d   inp_ip_p: %d   inp_ip_minttl: %d\n",
 	    inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
 
 	db_print_indent(indent);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		db_printf("in6p_options: %p   in6p_outputopts: %p   "
 		    "in6p_moptions: %p\n", inp->in6p_options,
 		    inp->in6p_outputopts, inp->in6p_moptions);
 		db_printf("in6p_icmp6filt: %p   in6p_cksum %d   "
 		    "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
 		    inp->in6p_hops);
 	} else
 #endif
 	{
 		db_printf("inp_ip_tos: %d   inp_ip_options: %p   "
 		    "inp_ip_moptions: %p\n", inp->inp_ip_tos,
 		    inp->inp_options, inp->inp_moptions);
 	}
 
 	db_print_indent(indent);
 	db_printf("inp_phd: %p   inp_gencnt: %ju\n", inp->inp_phd,
 	    (uintmax_t)inp->inp_gencnt);
 }
 
 DB_SHOW_COMMAND(inpcb, db_show_inpcb)
 {
 	struct inpcb *inp;
 
 	if (!have_addr) {
 		db_printf("usage: show inpcb <addr>\n");
 		return;
 	}
 	inp = (struct inpcb *)addr;
 
 	db_print_inpcb(inp, "inpcb", 0);
 }
 #endif
Index: head/sys/netinet/in_pcb.h
===================================================================
--- head/sys/netinet/in_pcb.h	(revision 178887)
+++ head/sys/netinet/in_pcb.h	(revision 178888)
@@ -1,446 +1,446 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IN_PCB_H_
 #define _NETINET_IN_PCB_H_
 
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_rwlock.h>
 
 #include <net/route.h>
 
 #ifdef _KERNEL
 #include <sys/rwlock.h>
 #endif
 
 #define	in6pcb		inpcb	/* for KAME src sync over BSD*'s */
 #define	in6p_sp		inp_sp	/* for KAME src sync over BSD*'s */
 struct inpcbpolicy;
 
 /*
  * Struct inpcb is the ommon structure pcb for the Internet Protocol
  * implementation.
  *
  * Pointers to local and foreign host table entries, local and foreign socket
  * numbers, and pointers up (to a socket structure) and down (to a
  * protocol-specific control block) are stored here.
  */
 LIST_HEAD(inpcbhead, inpcb);
 LIST_HEAD(inpcbporthead, inpcbport);
 typedef	u_quad_t	inp_gen_t;
 
 /*
  * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
  * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
  * the following structure.
  */
 struct in_addr_4in6 {
 	u_int32_t	ia46_pad32[3];
 	struct	in_addr	ia46_addr4;
 };
 
 /*
  * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553.  in_conninfo has
  * some extra padding to accomplish this.
  */
 struct in_endpoints {
 	u_int16_t	ie_fport;		/* foreign port */
 	u_int16_t	ie_lport;		/* local port */
 	/* protocol dependent part, local and foreign addr */
 	union {
 		/* foreign host table entry */
 		struct	in_addr_4in6 ie46_foreign;
 		struct	in6_addr ie6_foreign;
 	} ie_dependfaddr;
 	union {
 		/* local host table entry */
 		struct	in_addr_4in6 ie46_local;
 		struct	in6_addr ie6_local;
 	} ie_dependladdr;
 #define	ie_faddr	ie_dependfaddr.ie46_foreign.ia46_addr4
 #define	ie_laddr	ie_dependladdr.ie46_local.ia46_addr4
 #define	ie6_faddr	ie_dependfaddr.ie6_foreign
 #define	ie6_laddr	ie_dependladdr.ie6_local
 };
 
 /*
  * XXX The defines for inc_* are hacks and should be changed to direct
  * references.
  */
 struct in_conninfo {
 	u_int8_t	inc_flags;
 	u_int8_t	inc_len;
-	u_int16_t	inc_pad;	/* XXX alignment for in_endpoints */
+	u_int16_t	inc_fibnum;	/* XXX was pad, 16 bits is plenty */
 	/* protocol dependent part */
 	struct	in_endpoints inc_ie;
 };
 #define inc_isipv6	inc_flags	/* temp compatability */
 #define	inc_fport	inc_ie.ie_fport
 #define	inc_lport	inc_ie.ie_lport
 #define	inc_faddr	inc_ie.ie_faddr
 #define	inc_laddr	inc_ie.ie_laddr
 #define	inc6_faddr	inc_ie.ie6_faddr
 #define	inc6_laddr	inc_ie.ie6_laddr
 
 struct	icmp6_filter;
 
 struct inpcb {
 	LIST_ENTRY(inpcb) inp_hash;	/* hash list */
 	LIST_ENTRY(inpcb) inp_list;	/* list for all PCBs of this proto */
 	void	*inp_ppcb;		/* pointer to per-protocol pcb */
 	struct	inpcbinfo *inp_pcbinfo;	/* PCB list info */
 	struct	socket *inp_socket;	/* back pointer to socket */
 
 	u_int32_t	inp_flow;
 	int	inp_flags;		/* generic IP/datagram flags */
 
 	u_char	inp_vflag;		/* IP version flag (v4/v6) */
 #define	INP_IPV4	0x1
 #define	INP_IPV6	0x2
 #define	INP_IPV6PROTO	0x4		/* opened under IPv6 protocol */
 #define	INP_TIMEWAIT	0x8		/* .. probably doesn't go here */
 #define	INP_ONESBCAST	0x10		/* send all-ones broadcast */
 #define	INP_DROPPED	0x20		/* protocol drop flag */
 #define	INP_SOCKREF	0x40		/* strong socket reference */
 	u_char	inp_ip_ttl;		/* time to live proto */
 	u_char	inp_ip_p;		/* protocol proto */
 	u_char	inp_ip_minttl;		/* minimum TTL or drop */
 	uint32_t inp_ispare1;		/* connection id / queue id */
 	void	*inp_pspare[2];		/* rtentry / general use */
 
 	/* Local and foreign ports, local and foreign addr. */
 	struct	in_conninfo inp_inc;
 
 					/* list for this PCB's local port */
 	struct	label *inp_label;	/* MAC label */
 	struct	inpcbpolicy *inp_sp;    /* for IPSEC */
 
 	/* Protocol-dependent part; options. */
 	struct {
 		u_char	inp4_ip_tos;		/* type of service proto */
 		struct	mbuf *inp4_options;	/* IP options */
 		struct	ip_moptions *inp4_moptions; /* IP multicast options */
 	} inp_depend4;
 #define	inp_fport	inp_inc.inc_fport
 #define	inp_lport	inp_inc.inc_lport
 #define	inp_faddr	inp_inc.inc_faddr
 #define	inp_laddr	inp_inc.inc_laddr
 #define	inp_ip_tos	inp_depend4.inp4_ip_tos
 #define	inp_options	inp_depend4.inp4_options
 #define	inp_moptions	inp_depend4.inp4_moptions
 	struct {
 		/* IP options */
 		struct	mbuf *inp6_options;
 		/* IP6 options for outgoing packets */
 		struct	ip6_pktopts *inp6_outputopts;
 		/* IP multicast options */
 		struct	ip6_moptions *inp6_moptions;
 		/* ICMPv6 code type filter */
 		struct	icmp6_filter *inp6_icmp6filt;
 		/* IPV6_CHECKSUM setsockopt */
 		int	inp6_cksum;
 		short	inp6_hops;
 	} inp_depend6;
 	LIST_ENTRY(inpcb) inp_portlist;
 	struct	inpcbport *inp_phd;	/* head of this list */
 #define inp_zero_size offsetof(struct inpcb, inp_gencnt)
 	inp_gen_t	inp_gencnt;	/* generation count of this instance */
 	struct rwlock	inp_lock;
 
 #define	in6p_faddr	inp_inc.inc6_faddr
 #define	in6p_laddr	inp_inc.inc6_laddr
 #define	in6p_hops	inp_depend6.inp6_hops	/* default hop limit */
 #define	in6p_ip6_nxt	inp_ip_p
 #define	in6p_flowinfo	inp_flow
 #define	in6p_vflag	inp_vflag
 #define	in6p_options	inp_depend6.inp6_options
 #define	in6p_outputopts	inp_depend6.inp6_outputopts
 #define	in6p_moptions	inp_depend6.inp6_moptions
 #define	in6p_icmp6filt	inp_depend6.inp6_icmp6filt
 #define	in6p_cksum	inp_depend6.inp6_cksum
 #define	in6p_flags	inp_flags  /* for KAME src sync over BSD*'s */
 #define	in6p_socket	inp_socket  /* for KAME src sync over BSD*'s */
 #define	in6p_lport	inp_lport  /* for KAME src sync over BSD*'s */
 #define	in6p_fport	inp_fport  /* for KAME src sync over BSD*'s */
 #define	in6p_ppcb	inp_ppcb  /* for KAME src sync over BSD*'s */
 };
 /*
  * The range of the generation count, as used in this implementation, is 9e19.
  * We would have to create 300 billion connections per second for this number
  * to roll over in a year.  This seems sufficiently unlikely that we simply
  * don't concern ourselves with that possibility.
  */
 
 /*
  * Interface exported to userland by various protocols which use inpcbs.  Hack
  * alert -- only define if struct xsocket is in scope.
  */
 #ifdef _SYS_SOCKETVAR_H_
 struct	xinpcb {
 	size_t	xi_len;		/* length of this structure */
 	struct	inpcb xi_inp;
 	struct	xsocket xi_socket;
 	u_quad_t	xi_alignment_hack;
 };
 
 struct	xinpgen {
 	size_t	xig_len;	/* length of this structure */
 	u_int	xig_count;	/* number of PCBs at this time */
 	inp_gen_t xig_gen;	/* generation count at this time */
 	so_gen_t xig_sogen;	/* socket generation count at this time */
 };
 #endif /* _SYS_SOCKETVAR_H_ */
 
 struct inpcbport {
 	LIST_ENTRY(inpcbport) phd_hash;
 	struct inpcbhead phd_pcblist;
 	u_short phd_port;
 };
 
 /*
  * Global data structure for each high-level protocol (UDP, TCP, ...) in both
  * IPv4 and IPv6.  Holds inpcb lists and information for managing them.
  */
 struct inpcbinfo {
 	/*
 	 * Global list of inpcbs on the protocol.
 	 */
 	struct inpcbhead	*ipi_listhead;
 	u_int			 ipi_count;
 
 	/*
 	 * Global hash of inpcbs, hashed by local and foreign addresses and
 	 * port numbers.
 	 */
 	struct inpcbhead	*ipi_hashbase;
 	u_long			 ipi_hashmask;
 
 	/*
 	 * Global hash of inpcbs, hashed by only local port number.
 	 */
 	struct inpcbporthead	*ipi_porthashbase;
 	u_long			 ipi_porthashmask;
 
 	/*
 	 * Fields associated with port lookup and allocation.
 	 */
 	u_short			 ipi_lastport;
 	u_short			 ipi_lastlow;
 	u_short			 ipi_lasthi;
 
 	/*
 	 * UMA zone from which inpcbs are allocated for this protocol.
 	 */
 	struct	uma_zone	*ipi_zone;
 
 	/*
 	 * Generation count--incremented each time a connection is allocated
 	 * or freed.
 	 */
 	u_quad_t		 ipi_gencnt;
 	struct rwlock		 ipi_lock;
 
 	/*
 	 * vimage 1
 	 * general use 1
 	 */
 	void 			*ipi_pspare[2];	
 };
 
 #define INP_LOCK_INIT(inp, d, t) \
 	rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE |  RW_DUPOK)
 #define INP_LOCK_DESTROY(inp)	rw_destroy(&(inp)->inp_lock)
 #define INP_RLOCK(inp)		rw_rlock(&(inp)->inp_lock)
 #define INP_WLOCK(inp)		rw_wlock(&(inp)->inp_lock)
 #define INP_RUNLOCK(inp)	rw_runlock(&(inp)->inp_lock)
 #define INP_WUNLOCK(inp)	rw_wunlock(&(inp)->inp_lock)
 #define INP_LOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_LOCKED)
 #define	INP_RLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_RLOCKED)
 #define	INP_WLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_WLOCKED)
 #define	INP_UNLOCK_ASSERT(inp)	rw_assert(&(inp)->inp_lock, RA_UNLOCKED)
 
 #ifdef _KERNEL
 /*
  * These locking functions are for inpcb consumers outside of sys/netinet, 
  * more specifically, they were added for the benefit of TOE drivers. The
  * macros are reserved for use by the stack.
  */
 void inp_wlock(struct inpcb *);
 void inp_wunlock(struct inpcb *);
 void inp_rlock(struct inpcb *);
 void inp_runlock(struct inpcb *);
 
 #ifdef INVARIANTS
 void inp_lock_assert(struct inpcb *);
 void inp_unlock_assert(struct inpcb *);
 #else
 static __inline void
 inp_lock_assert(struct inpcb *inp __unused)
 {
 }
 
 static __inline void
 inp_unlock_assert(struct inpcb *inp __unused)
 {
 }
 
 #endif
 #endif /* _KERNEL */
 
 
 #define INP_INFO_LOCK_INIT(ipi, d) \
 	rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE)
 #define INP_INFO_LOCK_DESTROY(ipi)  rw_destroy(&(ipi)->ipi_lock)
 #define INP_INFO_RLOCK(ipi)	rw_rlock(&(ipi)->ipi_lock)
 #define INP_INFO_WLOCK(ipi)	rw_wlock(&(ipi)->ipi_lock)
 #define INP_INFO_RUNLOCK(ipi)	rw_runlock(&(ipi)->ipi_lock)
 #define INP_INFO_WUNLOCK(ipi)	rw_wunlock(&(ipi)->ipi_lock)
 #define	INP_INFO_LOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
 #define INP_INFO_RLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_RLOCKED)
 #define INP_INFO_WLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
 #define INP_INFO_UNLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
 
 #define INP_PCBHASH(faddr, lport, fport, mask) \
 	(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
 #define INP_PCBPORTHASH(lport, mask) \
 	(ntohs((lport)) & (mask))
 
 /* flags in inp_flags: */
 #define	INP_RECVOPTS		0x01	/* receive incoming IP options */
 #define	INP_RECVRETOPTS		0x02	/* receive IP options for reply */
 #define	INP_RECVDSTADDR		0x04	/* receive IP dst address */
 #define	INP_HDRINCL		0x08	/* user supplies entire IP header */
 #define	INP_HIGHPORT		0x10	/* user wants "high" port binding */
 #define	INP_LOWPORT		0x20	/* user wants "low" port binding */
 #define	INP_ANONPORT		0x40	/* port chosen for user */
 #define	INP_RECVIF		0x80	/* receive incoming interface */
 #define	INP_MTUDISC		0x100	/* user can do MTU discovery */
 #define	INP_FAITH		0x200	/* accept FAITH'ed connections */
 #define	INP_RECVTTL		0x400	/* receive incoming IP TTL */
 #define	INP_DONTFRAG		0x800	/* don't fragment packet */
 
 #define IN6P_IPV6_V6ONLY	0x008000 /* restrict AF_INET6 socket for v6 */
 
 #define	IN6P_PKTINFO		0x010000 /* receive IP6 dst and I/F */
 #define	IN6P_HOPLIMIT		0x020000 /* receive hoplimit */
 #define	IN6P_HOPOPTS		0x040000 /* receive hop-by-hop options */
 #define	IN6P_DSTOPTS		0x080000 /* receive dst options after rthdr */
 #define	IN6P_RTHDR		0x100000 /* receive routing header */
 #define	IN6P_RTHDRDSTOPTS	0x200000 /* receive dstoptions before rthdr */
 #define	IN6P_TCLASS		0x400000 /* receive traffic class value */
 #define	IN6P_AUTOFLOWLABEL	0x800000 /* attach flowlabel automatically */
 #define	IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
 #define	IN6P_MTU		0x80000000 /* receive path MTU */
 
 #define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
 				 INP_RECVIF|INP_RECVTTL|\
 				 IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
 				 IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\
 				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
 				 IN6P_MTU)
 #define	INP_UNMAPPABLEOPTS	(IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\
 				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL)
 
  /* for KAME src sync over BSD*'s */
 #define	IN6P_HIGHPORT		INP_HIGHPORT
 #define	IN6P_LOWPORT		INP_LOWPORT
 #define	IN6P_ANONPORT		INP_ANONPORT
 #define	IN6P_RECVIF		INP_RECVIF
 #define	IN6P_MTUDISC		INP_MTUDISC
 #define	IN6P_FAITH		INP_FAITH
 #define	IN6P_CONTROLOPTS INP_CONTROLOPTS
 	/*
 	 * socket AF version is {newer than,or include}
 	 * actual datagram AF version
 	 */
 
 #define	INPLOOKUP_WILDCARD	1
 #define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
 #define	sotoin6pcb(so)	sotoinpcb(so) /* for KAME src sync over BSD*'s */
 
 #define	INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
 
 #define	INP_CHECK_SOCKAF(so, af)	(INP_SOCKAF(so) == af)
 
 #ifdef _KERNEL
 extern int	ipport_reservedhigh;
 extern int	ipport_reservedlow;
 extern int	ipport_lowfirstauto;
 extern int	ipport_lowlastauto;
 extern int	ipport_firstauto;
 extern int	ipport_lastauto;
 extern int	ipport_hifirstauto;
 extern int	ipport_hilastauto;
 extern struct callout ipport_tick_callout;
 
 void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
 int	in_pcballoc(struct socket *, struct inpcbinfo *);
 int	in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
 int	in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
 	    u_short *, struct ucred *);
 int	in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
 int	in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
 	    u_short *, in_addr_t *, u_short *, struct inpcb **,
 	    struct ucred *);
 void	in_pcbdetach(struct inpcb *);
 void	in_pcbdisconnect(struct inpcb *);
 void	in_pcbdrop(struct inpcb *);
 void	in_pcbfree(struct inpcb *);
 int	in_pcbinshash(struct inpcb *);
 struct inpcb *
 	in_pcblookup_local(struct inpcbinfo *,
 	    struct in_addr, u_int, int);
 struct inpcb *
 	in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int,
 	    struct in_addr, u_int, int, struct ifnet *);
 void	in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
 	    int, struct inpcb *(*)(struct inpcb *, int));
 void	in_pcbrehash(struct inpcb *);
 void	in_pcbsetsolabel(struct socket *so);
 int	in_getpeeraddr(struct socket *so, struct sockaddr **nam);
 int	in_getsockaddr(struct socket *so, struct sockaddr **nam);
 struct sockaddr *
 	in_sockaddr(in_port_t port, struct in_addr *addr);
 void	in_pcbsosetlabel(struct socket *so);
 void	in_pcbremlists(struct inpcb *inp);
 void	ipport_tick(void *xtp);
 
 /*
  * Debugging routines compiled in when DDB is present.
  */
 void	db_print_inpcb(struct inpcb *inp, const char *name, int indent);
 
 #endif /* _KERNEL */
 
 #endif /* !_NETINET_IN_PCB_H_ */
Index: head/sys/netinet/in_rmx.c
===================================================================
--- head/sys/netinet/in_rmx.c	(revision 178887)
+++ head/sys/netinet/in_rmx.c	(revision 178888)
@@ -1,399 +1,497 @@
 /*-
  * Copyright 1994, 1995 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  *
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This code does two things necessary for the enhanced TCP metrics to
  * function in a useful manner:
  *  1) It marks all non-host routes as `cloning', thus ensuring that
  *     every actual reference to such a route actually gets turned
  *     into a reference to a host route to the specific destination
  *     requested.
  *  2) When such routes lose all their references, it arranges for them
  *     to be deleted in some random collection of circumstances, so that
  *     a large quantity of stale routing data is not kept in kernel memory
  *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/callout.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 
 extern int	in_inithead(void **head, int off);
 
 #define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
 
 /*
  * Do what we need to do when inserting a route.
  */
 static struct radix_node *
 in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
     struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
 	struct radix_node *ret;
 
 	/*
 	 * A little bit of help for both IP output and input:
 	 *   For host routes, we make sure that RTF_BROADCAST
 	 *   is set for anything that looks like a broadcast address.
 	 *   This way, we can avoid an expensive call to in_broadcast()
 	 *   in ip_output() most of the time (because the route passed
 	 *   to ip_output() is almost always a host route).
 	 *
 	 *   We also do the same for local addresses, with the thought
 	 *   that this might one day be used to speed up ip_input().
 	 *
 	 * We also mark routes to multicast addresses as such, because
 	 * it's easy to do and might be useful (but this is much more
 	 * dubious since it's so easy to inspect the address).
 	 */
 	if (rt->rt_flags & RTF_HOST) {
 		if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
 			rt->rt_flags |= RTF_BROADCAST;
 		} else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr ==
 		    sin->sin_addr.s_addr) {
 			rt->rt_flags |= RTF_LOCAL;
 		}
 	}
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		rt->rt_flags |= RTF_MULTICAST;
 
 	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
 	if (ret == NULL && rt->rt_flags & RTF_HOST) {
 		struct rtentry *rt2;
 		/*
 		 * We are trying to add a host route, but can't.
 		 * Find out if it is because of an
 		 * ARP entry and delete it if so.
 		 */
-		rt2 = rtalloc1((struct sockaddr *)sin, 0, RTF_CLONING);
+		rt2 = in_rtalloc1((struct sockaddr *)sin, 0,
+		    RTF_CLONING, rt->rt_fibnum);
 		if (rt2) {
 			if (rt2->rt_flags & RTF_LLINFO &&
 			    rt2->rt_flags & RTF_HOST &&
 			    rt2->rt_gateway &&
 			    rt2->rt_gateway->sa_family == AF_LINK) {
 				rtexpunge(rt2);
 				RTFREE_LOCKED(rt2);
 				ret = rn_addroute(v_arg, n_arg, head,
 						  treenodes);
 			} else
 				RTFREE_LOCKED(rt2);
 		}
 	}
 
 	return ret;
 }
 
 /*
  * This code is the inverse of in_clsroute: on first reference, if we
  * were managing the route, stop doing so and set the expiration timer
  * back off again.
  */
 static struct radix_node *
 in_matroute(void *v_arg, struct radix_node_head *head)
 {
 	struct radix_node *rn = rn_match(v_arg, head);
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	/*XXX locking? */
 	if (rt && rt->rt_refcnt == 0) {		/* this is first reference */
 		if (rt->rt_flags & RTPRF_OURS) {
 			rt->rt_flags &= ~RTPRF_OURS;
 			rt->rt_rmx.rmx_expire = 0;
 		}
 	}
 	return rn;
 }
 
 static int rtq_reallyold = 60*60;		/* one hour is "really old" */
 SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW,
     &rtq_reallyold, 0, "Default expiration time on dynamically learned routes");
 
 static int rtq_minreallyold = 10;  /* never automatically crank down to less */
 SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
     &rtq_minreallyold, 0,
     "Minimum time to attempt to hold onto dynamically learned routes");
 
 static int rtq_toomany = 128;		/* 128 cached routes is "too many" */
 SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
     &rtq_toomany, 0, "Upper limit on dynamically learned routes");
 
 /*
  * On last reference drop, mark the route as belong to us so that it can be
  * timed out.
  */
 static void
 in_clsroute(struct radix_node *rn, struct radix_node_head *head)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	RT_LOCK_ASSERT(rt);
 
 	if (!(rt->rt_flags & RTF_UP))
 		return;			/* prophylactic measures */
 
 	if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
 		return;
 
 	if (rt->rt_flags & RTPRF_OURS)
 		return;
 
 	if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC)))
 		return;
 
 	/*
 	 * If rtq_reallyold is 0, just delete the route without
 	 * waiting for a timeout cycle to kill it.
 	 */
 	if (rtq_reallyold != 0) {
 		rt->rt_flags |= RTPRF_OURS;
 		rt->rt_rmx.rmx_expire = time_uptime + rtq_reallyold;
 	} else {
 		rtexpunge(rt);
 	}
 }
 
 struct rtqk_arg {
 	struct radix_node_head *rnh;
 	int draining;
 	int killed;
 	int found;
 	int updating;
 	time_t nextstop;
 };
 
 /*
  * Get rid of old routes.  When draining, this deletes everything, even when
  * the timeout is not expired yet.  When updating, this makes sure that
  * nothing has a timeout longer than the current value of rtq_reallyold.
  */
 static int
 in_rtqkill(struct radix_node *rn, void *rock)
 {
 	struct rtqk_arg *ap = rock;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int err;
 
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
 		if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
-			err = rtrequest(RTM_DELETE,
+			err = in_rtrequest(RTM_DELETE,
 					(struct sockaddr *)rt_key(rt),
 					rt->rt_gateway, rt_mask(rt),
-					rt->rt_flags, 0);
+					rt->rt_flags, 0, rt->rt_fibnum);
 			if (err) {
 				log(LOG_WARNING, "in_rtqkill: error %d\n", err);
 			} else {
 				ap->killed++;
 			}
 		} else {
 			if (ap->updating &&
 			    (rt->rt_rmx.rmx_expire - time_uptime >
 			     rtq_reallyold)) {
 				rt->rt_rmx.rmx_expire =
 				    time_uptime + rtq_reallyold;
 			}
 			ap->nextstop = lmin(ap->nextstop,
 					    rt->rt_rmx.rmx_expire);
 		}
 	}
 
 	return 0;
 }
 
 #define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
 static int rtq_timeout = RTQ_TIMEOUT;
 static struct callout rtq_timer;
 
+static void in_rtqtimo_one(void *rock);
+
 static void
 in_rtqtimo(void *rock)
 {
+	int fibnum;
+	void *newrock;
+	struct timeval atv;
+
+	KASSERT((rock == (void *)rt_tables[0][AF_INET]),
+			("in_rtqtimo: unexpected arg"));
+	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+		if ((newrock = rt_tables[fibnum][AF_INET]) != NULL)
+			in_rtqtimo_one(newrock);
+	}
+	atv.tv_usec = 0;
+	atv.tv_sec = rtq_timeout;
+	callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
+}
+
+static void
+in_rtqtimo_one(void *rock)
+{
 	struct radix_node_head *rnh = rock;
 	struct rtqk_arg arg;
-	struct timeval atv;
 	static time_t last_adjusted_timeout = 0;
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
 	arg.nextstop = time_uptime + rtq_timeout;
 	arg.draining = arg.updating = 0;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 
 	/*
 	 * Attempt to be somewhat dynamic about this:
 	 * If there are ``too many'' routes sitting around taking up space,
 	 * then crank down the timeout, and see if we can't make some more
 	 * go away.  However, we make sure that we will never adjust more
 	 * than once in rtq_timeout seconds, to keep from cranking down too
 	 * hard.
 	 */
 	if ((arg.found - arg.killed > rtq_toomany) &&
 	    (time_uptime - last_adjusted_timeout >= rtq_timeout) &&
 	    rtq_reallyold > rtq_minreallyold) {
 		rtq_reallyold = 2 * rtq_reallyold / 3;
 		if (rtq_reallyold < rtq_minreallyold) {
 			rtq_reallyold = rtq_minreallyold;
 		}
 
 		last_adjusted_timeout = time_uptime;
 #ifdef DIAGNOSTIC
 		log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
 		    rtq_reallyold);
 #endif
 		arg.found = arg.killed = 0;
 		arg.updating = 1;
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 
-	atv.tv_usec = 0;
-	atv.tv_sec = arg.nextstop - time_uptime;
-	callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
 }
 
 void
 in_rtqdrain(void)
 {
-	struct radix_node_head *rnh = rt_tables[AF_INET];
+	struct radix_node_head *rnh;
 	struct rtqk_arg arg;
+	int 	fibnum;
 
-	arg.found = arg.killed = 0;
-	arg.rnh = rnh;
-	arg.nextstop = 0;
-	arg.draining = 1;
-	arg.updating = 0;
-	RADIX_NODE_HEAD_LOCK(rnh);
-	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
-	RADIX_NODE_HEAD_UNLOCK(rnh);
+	for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+		rnh = rt_tables[fibnum][AF_INET];
+		arg.found = arg.killed = 0;
+		arg.rnh = rnh;
+		arg.nextstop = 0;
+		arg.draining = 1;
+		arg.updating = 0;
+		RADIX_NODE_HEAD_LOCK(rnh);
+		rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+		RADIX_NODE_HEAD_UNLOCK(rnh);
+	}
 }
 
+static int _in_rt_was_here;
 /*
  * Initialize our routing tree.
  */
 int
 in_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
-	if (!rn_inithead(head, off))
+	/* XXX MRT
+	 * This can be called from vfs_export.c too in which case 'off'
+	 * will be 0. We know the correct value so just use that and
+	 * return directly if it was 0.
+	 * This is a hack that replaces an even worse hack on a bad hack
+	 * on a bad design. After RELENG_7 this should be fixed but that
+	 * will change the ABI, so for now do it this way.
+	 */
+	if (!rn_inithead(head, 32))
 		return 0;
 
-	if (head != (void **)&rt_tables[AF_INET])	/* BOGUS! */
-		return 1;	/* only do this for the real routing table */
+	if (off == 0)		/* XXX MRT  see above */
+		return 1;	/* only do the rest for a real routing table */
 
 	rnh = *head;
 	rnh->rnh_addaddr = in_addroute;
 	rnh->rnh_matchaddr = in_matroute;
 	rnh->rnh_close = in_clsroute;
-	callout_init(&rtq_timer, CALLOUT_MPSAFE);
-	in_rtqtimo(rnh);	/* kick off timeout first time */
+	if (_in_rt_was_here == 0 ) {
+		callout_init(&rtq_timer, CALLOUT_MPSAFE);
+		in_rtqtimo(rnh);	/* kick off timeout first time */
+		_in_rt_was_here = 1;
+	}
 	return 1;
 }
 
 /*
  * This zaps old routes when the interface goes down or interface
  * address is deleted.  In the latter case, it deletes static routes
  * that point to this address.  If we don't do this, we may end up
  * using the old address in the future.  The ones we always want to
  * get rid of are things like ARP entries, since the user might down
  * the interface, walk over to a completely different network, and
  * plug back in.
  */
 struct in_ifadown_arg {
 	struct ifaddr *ifa;
 	int del;
 };
 
 static int
 in_ifadownkill(struct radix_node *rn, void *xap)
 {
 	struct in_ifadown_arg *ap = xap;
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	RT_LOCK(rt);
 	if (rt->rt_ifa == ap->ifa &&
 	    (ap->del || !(rt->rt_flags & RTF_STATIC))) {
 		/*
 		 * We need to disable the automatic prune that happens
 		 * in this case in rtrequest() because it will blow
 		 * away the pointers that rn_walktree() needs in order
 		 * continue our descent.  We will end up deleting all
 		 * the routes that rtrequest() would have in any case,
 		 * so that behavior is not needed there.
 		 */
 		rt->rt_flags &= ~RTF_CLONING;
 		rtexpunge(rt);
 	}
 	RT_UNLOCK(rt);
 	return 0;
 }
 
 int
 in_ifadown(struct ifaddr *ifa, int delete)
 {
 	struct in_ifadown_arg arg;
 	struct radix_node_head *rnh;
+	int	fibnum;
 
 	if (ifa->ifa_addr->sa_family != AF_INET)
 		return 1;
 
-	rnh = rt_tables[AF_INET];
-	arg.ifa = ifa;
-	arg.del = delete;
-	RADIX_NODE_HEAD_LOCK(rnh);
-	rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
-	RADIX_NODE_HEAD_UNLOCK(rnh);
-	ifa->ifa_flags &= ~IFA_ROUTE;		/* XXXlocking? */
+	for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+		rnh = rt_tables[fibnum][AF_INET];
+		arg.ifa = ifa;
+		arg.del = delete;
+		RADIX_NODE_HEAD_LOCK(rnh);
+		rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
+		RADIX_NODE_HEAD_UNLOCK(rnh);
+		ifa->ifa_flags &= ~IFA_ROUTE;		/* XXXlocking? */
+	}
 	return 0;
 }
+
+/*
+ * inet versions of rt functions. These have fib extensions and 
+ * for now will just reference the _fib variants.
+ * eventually this order will be reversed,
+ */
+void
+in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum)
+{
+	rtalloc_ign_fib(ro, ignflags, fibnum);
+}
+
+int
+in_rtrequest( int req,
+	struct sockaddr *dst,
+	struct sockaddr *gateway,
+	struct sockaddr *netmask,
+	int flags,
+	struct rtentry **ret_nrt,
+	u_int fibnum)
+{
+	return (rtrequest_fib(req, dst, gateway, netmask, 
+	    flags, ret_nrt, fibnum));
+}
+
+struct rtentry *
+in_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum)
+{
+	return (rtalloc1_fib(dst, report, ignflags, fibnum));
+}
+
+int
+in_rt_check(struct rtentry **lrt, struct rtentry **lrt0,
+	struct sockaddr *dst, u_int fibnum)
+{
+	return (rt_check_fib(lrt, lrt0, dst, fibnum));
+}
+
+void
+in_rtredirect(struct sockaddr *dst,
+	struct sockaddr *gateway,
+	struct sockaddr *netmask,
+	int flags,
+	struct sockaddr *src,
+	u_int fibnum)
+{
+	rtredirect_fib(dst, gateway, netmask, flags, src, fibnum);
+}
+ 
+void
+in_rtalloc(struct route *ro, u_int fibnum)
+{
+	rtalloc_ign_fib(ro, 0UL, fibnum);
+}
+
+#if 0
+int	 in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
+int	 in_rtioctl(u_long, caddr_t, u_int);
+int	 in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
+#endif
+
+
Index: head/sys/netinet/in_var.h
===================================================================
--- head/sys/netinet/in_var.h	(revision 178887)
+++ head/sys/netinet/in_var.h	(revision 178888)
@@ -1,313 +1,329 @@
 /*-
  * Copyright (c) 1985, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_var.h	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IN_VAR_H_
 #define _NETINET_IN_VAR_H_
 
 #include <sys/queue.h>
 #include <sys/fnv_hash.h>
 
 /*
  * Interface address, Internet version.  One of these structures
  * is allocated for each Internet address on an interface.
  * The ifaddr structure contains the protocol-independent part
  * of the structure and is assumed to be first.
  */
 struct in_ifaddr {
 	struct	ifaddr ia_ifa;		/* protocol-independent info */
 #define	ia_ifp		ia_ifa.ifa_ifp
 #define ia_flags	ia_ifa.ifa_flags
 					/* ia_{,sub}net{,mask} in host order */
 	u_long	ia_net;			/* network number of interface */
 	u_long	ia_netmask;		/* mask of net part */
 	u_long	ia_subnet;		/* subnet number, including net */
 	u_long	ia_subnetmask;		/* mask of subnet part */
 	struct	in_addr ia_netbroadcast; /* to recognize net broadcasts */
 	LIST_ENTRY(in_ifaddr) ia_hash;	/* entry in bucket of inet addresses */
 	TAILQ_ENTRY(in_ifaddr) ia_link;	/* list of internet addresses */
 	struct	sockaddr_in ia_addr;	/* reserve space for interface name */
 	struct	sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
 #define	ia_broadaddr	ia_dstaddr
 	struct	sockaddr_in ia_sockmask; /* reserve space for general netmask */
 };
 
 struct	in_aliasreq {
 	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
 	struct	sockaddr_in ifra_addr;
 	struct	sockaddr_in ifra_broadaddr;
 #define ifra_dstaddr ifra_broadaddr
 	struct	sockaddr_in ifra_mask;
 };
 /*
  * Given a pointer to an in_ifaddr (ifaddr),
  * return a pointer to the addr as a sockaddr_in.
  */
 #define IA_SIN(ia)    (&(((struct in_ifaddr *)(ia))->ia_addr))
 #define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr))
 
 #define IN_LNAOF(in, ifa) \
 	((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
 
 
 #ifdef	_KERNEL
 extern	u_char	inetctlerrmap[];
 
 /*
  * Hash table for IP addresses.
  */
 extern	LIST_HEAD(in_ifaddrhashhead, in_ifaddr) *in_ifaddrhashtbl;
 extern	TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead;
 extern	u_long in_ifaddrhmask;			/* mask for hash table */
 
 #define INADDR_NHASH_LOG2       9
 #define INADDR_NHASH		(1 << INADDR_NHASH_LOG2)
 #define INADDR_HASHVAL(x)	fnv_32_buf((&(x)), sizeof(x), FNV1_32_INIT)
 #define INADDR_HASH(x) \
 	(&in_ifaddrhashtbl[INADDR_HASHVAL(x) & in_ifaddrhmask])
 
 /*
  * Macro for finding the internet address structure (in_ifaddr)
  * corresponding to one of our IP addresses (in_addr).
  */
 #define INADDR_TO_IFADDR(addr, ia) \
 	/* struct in_addr addr; */ \
 	/* struct in_ifaddr *ia; */ \
 do { \
 \
 	LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \
 		if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
 			break; \
 } while (0)
 
 /*
  * Macro for finding the interface (ifnet structure) corresponding to one
  * of our IP addresses.
  */
 #define INADDR_TO_IFP(addr, ifp) \
 	/* struct in_addr addr; */ \
 	/* struct ifnet *ifp; */ \
 { \
 	struct in_ifaddr *ia; \
 \
 	INADDR_TO_IFADDR(addr, ia); \
 	(ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
 }
 
 /*
  * Macro for finding the internet address structure (in_ifaddr) corresponding
  * to a given interface (ifnet structure).
  */
 #define IFP_TO_IA(ifp, ia) \
 	/* struct ifnet *ifp; */ \
 	/* struct in_ifaddr *ia; */ \
 { \
 	for ((ia) = TAILQ_FIRST(&in_ifaddrhead); \
 	    (ia) != NULL && (ia)->ia_ifp != (ifp); \
 	    (ia) = TAILQ_NEXT((ia), ia_link)) \
 		continue; \
 }
 #endif
 
 /*
  * This information should be part of the ifnet structure but we don't wish
  * to change that - as it might break a number of things
  */
 
 struct router_info {
 	struct ifnet *rti_ifp;
 	int    rti_type; /* type of router which is querier on this interface */
 	int    rti_time; /* # of slow timeouts since last old query */
 	SLIST_ENTRY(router_info) rti_list;
 #ifdef notyet
 	int	rti_timev1;	/* IGMPv1 querier present */
 	int	rti_timev2;	/* IGMPv2 querier present */
 	int	rti_timer;	/* report to general query */
 	int	rti_qrv;	/* querier robustness */
 #endif
 };
 
 /*
  * Internet multicast address structure.  There is one of these for each IP
  * multicast group to which this host belongs on a given network interface.
  * For every entry on the interface's if_multiaddrs list which represents
  * an IP multicast group, there is one of these structures.  They are also
  * kept on a system-wide list to make it easier to keep our legacy IGMP code
  * compatible with the rest of the world (see IN_FIRST_MULTI et al, below).
  */
 struct in_multi {
 	LIST_ENTRY(in_multi) inm_link;	/* queue macro glue */
 	struct	in_addr inm_addr;	/* IP multicast address, convenience */
 	struct	ifnet *inm_ifp;		/* back pointer to ifnet */
 	struct	ifmultiaddr *inm_ifma;	/* back pointer to ifmultiaddr */
 	u_int	inm_timer;		/* IGMP membership report timer */
 	u_int	inm_state;		/*  state of the membership */
 	struct	router_info *inm_rti;	/* router info*/
 	u_int	inm_refcount;		/* reference count */
 #ifdef notyet		/* IGMPv3 source-specific multicast fields */
 	TAILQ_HEAD(, in_msfentry) inm_msf;	/* all active source filters */
 	TAILQ_HEAD(, in_msfentry) inm_msf_record;	/* recorded sources */
 	TAILQ_HEAD(, in_msfentry) inm_msf_exclude;	/* exclude sources */
 	TAILQ_HEAD(, in_msfentry) inm_msf_include;	/* include sources */
 	/* XXX: should this lot go to the router_info structure? */
 	/* XXX: can/should these be callouts? */
 	/* IGMP protocol timers */
 	int32_t		inm_ti_curstate;	/* current state timer */
 	int32_t		inm_ti_statechg;	/* state change timer */
 	/* IGMP report timers */
 	uint16_t	inm_rpt_statechg;	/* state change report timer */
 	uint16_t	inm_rpt_toxx;		/* fmode change report timer */
 	/* IGMP protocol state */
 	uint16_t	inm_fmode;		/* filter mode */
 	uint32_t	inm_recsrc_count;	/* # of recorded sources */
 	uint16_t	inm_exclude_sock_count;	/* # of exclude-mode sockets */
 	uint16_t	inm_gass_count;		/* # of g-a-s queries */
 #endif
 };
 
 #ifdef notyet
 /*
  * Internet multicast source filter list. This list is used to store
  * IP multicast source addresses for each membership on an interface.
  * TODO: Allocate these structures using UMA.
  * TODO: Find an easier way of linking the struct into two lists at once.
  */
 struct in_msfentry {
 	TAILQ_ENTRY(in_msfentry) isf_link;	/* next filter in all-list */
 	TAILQ_ENTRY(in_msfentry) isf_next;	/* next filter in queue */
 	struct in_addr	isf_addr;	/* the address of this source */
 	uint16_t	isf_refcount;	/* reference count */
 	uint16_t	isf_reporttag;	/* what to report to the IGMP router */
 	uint16_t	isf_rexmit;	/* retransmission state/count */
 };
 #endif
 
 #ifdef _KERNEL
 
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet);
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_DECL(_net_inet_raw);
 #endif
 
 extern LIST_HEAD(in_multihead, in_multi) in_multihead;
 
 /*
  * Lock macros for IPv4 layer multicast address lists.  IPv4 lock goes
  * before link layer multicast locks in the lock order.  In most cases,
  * consumers of IN_*_MULTI() macros should acquire the locks before
  * calling them; users of the in_{add,del}multi() functions should not.
  */
 extern struct mtx in_multi_mtx;
 #define	IN_MULTI_LOCK()		mtx_lock(&in_multi_mtx)
 #define	IN_MULTI_UNLOCK()	mtx_unlock(&in_multi_mtx)
 #define	IN_MULTI_LOCK_ASSERT()	mtx_assert(&in_multi_mtx, MA_OWNED)
 
 /*
  * Structure used by macros below to remember position when stepping through
  * all of the in_multi records.
  */
 struct in_multistep {
 	struct in_multi *i_inm;
 };
 
 /*
  * Macro for looking up the in_multi record for a given IP multicast address
  * on a given interface.  If no matching record is found, "inm" is set null.
  */
 #define IN_LOOKUP_MULTI(addr, ifp, inm) \
 	/* struct in_addr addr; */ \
 	/* struct ifnet *ifp; */ \
 	/* struct in_multi *inm; */ \
 do { \
 	struct ifmultiaddr *ifma; \
 \
 	IN_MULTI_LOCK_ASSERT(); \
 	IF_ADDR_LOCK(ifp); \
 	TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { \
 		if (ifma->ifma_addr->sa_family == AF_INET \
 		    && ((struct sockaddr_in *)ifma->ifma_addr)->sin_addr.s_addr == \
 		    (addr).s_addr) \
 			break; \
 	} \
 	(inm) = ifma ? ifma->ifma_protospec : 0; \
 	IF_ADDR_UNLOCK(ifp); \
 } while(0)
 
 /*
  * Macro to step through all of the in_multi records, one at a time.
  * The current position is remembered in "step", which the caller must
  * provide.  IN_FIRST_MULTI(), below, must be called to initialize "step"
  * and get the first record.  Both macros return a NULL "inm" when there
  * are no remaining records.
  */
 #define IN_NEXT_MULTI(step, inm) \
 	/* struct in_multistep  step; */ \
 	/* struct in_multi *inm; */ \
 do { \
 	IN_MULTI_LOCK_ASSERT(); \
 	if (((inm) = (step).i_inm) != NULL) \
 		(step).i_inm = LIST_NEXT((step).i_inm, inm_link); \
 } while(0)
 
 #define IN_FIRST_MULTI(step, inm) \
 	/* struct in_multistep step; */ \
 	/* struct in_multi *inm; */ \
 do { \
 	IN_MULTI_LOCK_ASSERT(); \
 	(step).i_inm = LIST_FIRST(&in_multihead); \
 	IN_NEXT_MULTI((step), (inm)); \
 } while(0)
 
+struct	rtentry;
 struct	route;
 struct	ip_moptions;
 
 size_t	imo_match_group(struct ip_moptions *, struct ifnet *,
 	    struct sockaddr *);
 struct	in_msource *imo_match_source(struct ip_moptions *, size_t,
 	    struct sockaddr *);
 struct	in_multi *in_addmulti(struct in_addr *, struct ifnet *);
 void	in_delmulti(struct in_multi *);
 void	in_delmulti_locked(struct in_multi *);
 int	in_control(struct socket *, u_long, caddr_t, struct ifnet *,
 	    struct thread *);
 void	in_rtqdrain(void);
 void	ip_input(struct mbuf *);
 int	in_ifadown(struct ifaddr *ifa, int);
 void	in_ifscrub(struct ifnet *, struct in_ifaddr *);
 struct	mbuf	*ip_fastforward(struct mbuf *);
 
+/* XXX */
+void	 in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum);
+void	 in_rtalloc(struct route *ro, u_int fibnum);
+struct rtentry *in_rtalloc1(struct sockaddr *, int, u_long, u_int);
+void	 in_rtredirect(struct sockaddr *, struct sockaddr *,
+	    struct sockaddr *, int, struct sockaddr *, u_int);
+int	 in_rtrequest(int, struct sockaddr *,
+	    struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
+int	 in_rt_check(struct rtentry **, struct rtentry **, struct sockaddr *, u_int);
+
+#if 0
+int	 in_rt_getifa(struct rt_addrinfo *, u_int fibnum);
+int	 in_rtioctl(u_long, caddr_t, u_int);
+int	 in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int);
+#endif
 #endif /* _KERNEL */
 
 /* INET6 stuff */
 #include <netinet6/in6_var.h>
 
 #endif /* _NETINET_IN_VAR_H_ */
Index: head/sys/netinet/ip_fastfwd.c
===================================================================
--- head/sys/netinet/ip_fastfwd.c	(revision 178887)
+++ head/sys/netinet/ip_fastfwd.c	(revision 178888)
@@ -1,613 +1,613 @@
 /*-
  * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * ip_fastforward gets its speed from processing the forwarded packet to
  * completion (if_output on the other side) without any queues or netisr's.
  * The receiving interface DMAs the packet into memory, the upper half of
  * driver calls ip_fastforward, we do our routing table lookup and directly
  * send it off to the outgoing interface, which DMAs the packet to the
  * network card. The only part of the packet we touch with the CPU is the
  * IP header (unless there are complex firewall rules touching other parts
  * of the packet, but that is up to you). We are essentially limited by bus
  * bandwidth and how fast the network card/driver can set up receives and
  * transmits.
  *
  * We handle basic errors, IP header errors, checksum errors,
  * destination unreachable, fragmentation and fragmentation needed and
  * report them via ICMP to the sender.
  *
  * Else if something is not pure IPv4 unicast forwarding we fall back to
  * the normal ip_input processing path. We should only be called from
  * interfaces connected to the outside world.
  *
  * Firewalling is fully supported including divert, ipfw fwd and ipfilter
  * ipnat and address rewrite.
  *
  * IPSEC is not supported if this host is a tunnel broker. IPSEC is
  * supported for connections to/from local host.
  *
  * We try to do the least expensive (in CPU ops) checks and operations
  * first to catch junk with as little overhead as possible.
  * 
  * We take full advantage of hardware support for IP checksum and
  * fragmentation offloading.
  *
  * We don't do ICMP redirect in the fast forwarding path. I have had my own
  * cases where two core routers with Zebra routing suite would send millions
  * ICMP redirects to connected hosts if the destination router was not the
  * default gateway. In one case it was filling the routing table of a host
  * with approximately 300.000 cloned redirect entries until it ran out of
  * kernel memory. However the networking code proved very robust and it didn't
  * crash or fail in other ways.
  */
 
 /*
  * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which
  * is being followed here.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 
 #include <machine/in_cksum.h>
 
 static int ipfastforward_active = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW,
     &ipfastforward_active, 0, "Enable fast IP forwarding");
 
 static struct sockaddr_in *
 ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
 {
 	struct sockaddr_in *dst;
 	struct rtentry *rt;
 
 	/*
 	 * Find route to destination.
 	 */
 	bzero(ro, sizeof(*ro));
 	dst = (struct sockaddr_in *)&ro->ro_dst;
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_addr.s_addr = dest.s_addr;
-	rtalloc_ign(ro, RTF_CLONING);
+	in_rtalloc_ign(ro, RTF_CLONING, M_GETFIB(m));
 
 	/*
 	 * Route there and interface still up?
 	 */
 	rt = ro->ro_rt;
 	if (rt && (rt->rt_flags & RTF_UP) &&
 	    (rt->rt_ifp->if_flags & IFF_UP) &&
 	    (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 		if (rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in *)rt->rt_gateway;
 	} else {
 		ipstat.ips_noroute++;
 		ipstat.ips_cantforward++;
 		if (rt)
 			RTFREE(rt);
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return NULL;
 	}
 	return dst;
 }
 
 /*
  * Try to forward a packet based on the destination address.
  * This is a fast path optimized for the plain forwarding case.
  * If the packet is handled (and consumed) here then we return 1;
  * otherwise 0 is returned and the packet should be delivered
  * to ip_input for full processing.
  */
 struct mbuf *
 ip_fastforward(struct mbuf *m)
 {
 	struct ip *ip;
 	struct mbuf *m0 = NULL;
 	struct route ro;
 	struct sockaddr_in *dst = NULL;
 	struct ifnet *ifp;
 	struct in_addr odest, dest;
 	u_short sum, ip_len;
 	int error = 0;
 	int hlen, mtu;
 #ifdef IPFIREWALL_FORWARD
 	struct m_tag *fwd_tag;
 #endif
 
 	/*
 	 * Are we active and forwarding packets?
 	 */
 	if (!ipfastforward_active || !ipforwarding)
 		return m;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ro.ro_rt = NULL;
 
 	/*
 	 * Step 1: check for packet drop conditions (and sanity checks)
 	 */
 
 	/*
 	 * Is entire packet big enough?
 	 */
 	if (m->m_pkthdr.len < sizeof(struct ip)) {
 		ipstat.ips_tooshort++;
 		goto drop;
 	}
 
 	/*
 	 * Is first mbuf large enough for ip header and is header present?
 	 */
 	if (m->m_len < sizeof (struct ip) &&
 	   (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		ipstat.ips_toosmall++;
 		return NULL;	/* mbuf already free'd */
 	}
 
 	ip = mtod(m, struct ip *);
 
 	/*
 	 * Is it IPv4?
 	 */
 	if (ip->ip_v != IPVERSION) {
 		ipstat.ips_badvers++;
 		goto drop;
 	}
 
 	/*
 	 * Is IP header length correct and is it in first mbuf?
 	 */
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		ipstat.ips_badlen++;
 		goto drop;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			ipstat.ips_badhlen++;
 			return NULL;	/* mbuf already free'd */
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	/*
 	 * Checksum correct?
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	else {
 		if (hlen == sizeof(struct ip))
 			sum = in_cksum_hdr(ip);
 		else
 			sum = in_cksum(m, hlen);
 	}
 	if (sum) {
 		ipstat.ips_badsum++;
 		goto drop;
 	}
 
 	/*
 	 * Remember that we have checked the IP header and found it valid.
 	 */
 	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
 
 	ip_len = ntohs(ip->ip_len);
 
 	/*
 	 * Is IP length longer than packet we have got?
 	 */
 	if (m->m_pkthdr.len < ip_len) {
 		ipstat.ips_tooshort++;
 		goto drop;
 	}
 
 	/*
 	 * Is packet longer than IP header tells us? If yes, truncate packet.
 	 */
 	if (m->m_pkthdr.len > ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip_len;
 			m->m_pkthdr.len = ip_len;
 		} else
 			m_adj(m, ip_len - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Is packet from or to 127/8?
 	 */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		ipstat.ips_badaddr++;
 		goto drop;
 	}
 
 #ifdef ALTQ
 	/*
 	 * Is packet dropped by traffic conditioner?
 	 */
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		goto drop;
 #endif
 
 	/*
 	 * Step 2: fallback conditions to normal ip_input path processing
 	 */
 
 	/*
 	 * Only IP packets without options
 	 */
 	if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
 		if (ip_doopts == 1)
 			return m;
 		else if (ip_doopts == 2) {
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
 				0, 0);
 			return NULL;	/* mbuf already free'd */
 		}
 		/* else ignore IP options and continue */
 	}
 
 	/*
 	 * Only unicast IP, not from loopback, no L2 or IP broadcast,
 	 * no multicast, no INADDR_ANY
 	 *
 	 * XXX: Probably some of these checks could be direct drop
 	 * conditions.  However it is not clear whether there are some
 	 * hacks or obscure behaviours which make it neccessary to
 	 * let ip_input handle it.  We play safe here and let ip_input
 	 * deal with it until it is proven that we can directly drop it.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) ||
 	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
 	    ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
 	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
 	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
 	    ip->ip_src.s_addr == INADDR_ANY ||
 	    ip->ip_dst.s_addr == INADDR_ANY )
 		return m;
 
 	/*
 	 * Is it for a local address on this host?
 	 */
 	if (in_localip(ip->ip_dst))
 		return m;
 
 	ipstat.ips_total++;
 
 	/*
 	 * Step 3: incoming packet firewall processing
 	 */
 
 	/*
 	 * Convert to host representation
 	 */
 	ip->ip_len = ntohs(ip->ip_len);
 	ip->ip_off = ntohs(ip->ip_off);
 
 	odest.s_addr = dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Run through list of ipfilter hooks for input packets
 	 */
 	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passin;
 
 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL) ||
 	    m == NULL)
 		goto drop;
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ip = mtod(m, struct ip *);	/* m may have changed by pfil hook */
 	dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Destination address changed?
 	 */
 	if (odest.s_addr != dest.s_addr) {
 		/*
 		 * Is it now for a local address on this host?
 		 */
 		if (in_localip(dest))
 			goto forwardlocal;
 		/*
 		 * Go on with new destination address
 		 */
 	}
 #ifdef IPFIREWALL_FORWARD
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * ipfw changed it for a local address on this host.
 		 */
 		goto forwardlocal;
 	}
 #endif /* IPFIREWALL_FORWARD */
 
 passin:
 	/*
 	 * Step 4: decrement TTL and look up route
 	 */
 
 	/*
 	 * Check TTL
 	 */
 #ifdef IPSTEALTH
 	if (!ipstealth) {
 #endif
 	if (ip->ip_ttl <= IPTTLDEC) {
 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
 		return NULL;	/* mbuf already free'd */
 	}
 
 	/*
 	 * Decrement the TTL and incrementally change the IP header checksum.
 	 * Don't bother doing this with hw checksum offloading, it's faster
 	 * doing it right here.
 	 */
 	ip->ip_ttl -= IPTTLDEC;
 	if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
 		ip->ip_sum -= ~htons(IPTTLDEC << 8);
 	else
 		ip->ip_sum += htons(IPTTLDEC << 8);
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * Find route to destination.
 	 */
 	if ((dst = ip_findroute(&ro, dest, m)) == NULL)
 		return NULL;	/* icmp unreach already sent */
 	ifp = ro.ro_rt->rt_ifp;
 
 	/*
 	 * Immediately drop blackholed traffic, and directed broadcasts
 	 * for either the all-ones or all-zero subnet addresses on
 	 * locally attached networks.
 	 */
 	if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0)
 		goto drop;
 
 	/*
 	 * Step 5: outgoing firewall packet processing
 	 */
 
 	/*
 	 * Run through list of hooks for output packets.
 	 */
 	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passout;
 
 	if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, NULL) || m == NULL) {
 		goto drop;
 	}
 
 	M_ASSERTVALID(m);
 	M_ASSERTPKTHDR(m);
 
 	ip = mtod(m, struct ip *);
 	dest.s_addr = ip->ip_dst.s_addr;
 
 	/*
 	 * Destination address changed?
 	 */
 #ifndef IPFIREWALL_FORWARD
 	if (odest.s_addr != dest.s_addr) {
 #else
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 	if (odest.s_addr != dest.s_addr || fwd_tag != NULL) {
 #endif /* IPFIREWALL_FORWARD */
 		/*
 		 * Is it now for a local address on this host?
 		 */
 #ifndef IPFIREWALL_FORWARD
 		if (in_localip(dest)) {
 #else
 		if (m->m_flags & M_FASTFWD_OURS || in_localip(dest)) {
 #endif /* IPFIREWALL_FORWARD */
 forwardlocal:
 			/*
 			 * Return packet for processing by ip_input().
 			 * Keep host byte order as expected at ip_input's
 			 * "ours"-label.
 			 */
 			m->m_flags |= M_FASTFWD_OURS;
 			if (ro.ro_rt)
 				RTFREE(ro.ro_rt);
 			return m;
 		}
 		/*
 		 * Redo route lookup with new destination address
 		 */
 #ifdef IPFIREWALL_FORWARD
 		if (fwd_tag) {
 			dest.s_addr = ((struct sockaddr_in *)
 				    (fwd_tag + 1))->sin_addr.s_addr;
 			m_tag_delete(m, fwd_tag);
 		}
 #endif /* IPFIREWALL_FORWARD */
 		RTFREE(ro.ro_rt);
 		if ((dst = ip_findroute(&ro, dest, m)) == NULL)
 			return NULL;	/* icmp unreach already sent */
 		ifp = ro.ro_rt->rt_ifp;
 	}
 
 passout:
 	/*
 	 * Step 6: send off the packet
 	 */
 
 	/*
 	 * Check if route is dampned (when ARP is unable to resolve)
 	 */
 	if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
 	    (ro.ro_rt->rt_rmx.rmx_expire == 0 ||
 	    time_uptime < ro.ro_rt->rt_rmx.rmx_expire)) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		goto consumed;
 	}
 
 #ifndef ALTQ
 	/*
 	 * Check if there is enough space in the interface queue
 	 */
 	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen) {
 		ipstat.ips_odropped++;
 		/* would send source quench here but that is depreciated */
 		goto drop;
 	}
 #endif
 
 	/*
 	 * Check if media link state of interface is not down
 	 */
 	if (ifp->if_link_state == LINK_STATE_DOWN) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		goto consumed;
 	}
 
 	/*
 	 * Check if packet fits MTU or if hardware will fragment for us
 	 */
 	if (ro.ro_rt->rt_rmx.rmx_mtu)
 		mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
 	else
 		mtu = ifp->if_mtu;
 
 	if (ip->ip_len <= mtu ||
 	    (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
 		/*
 		 * Restore packet header fields to original values
 		 */
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		/*
 		 * Send off the packet via outgoing interface
 		 */
 		error = (*ifp->if_output)(ifp, m,
 				(struct sockaddr *)dst, ro.ro_rt);
 	} else {
 		/*
 		 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
 		 */
 		if (ip->ip_off & IP_DF) {
 			ipstat.ips_cantfrag++;
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
 				0, mtu);
 			goto consumed;
 		} else {
 			/*
 			 * We have to fragment the packet
 			 */
 			m->m_pkthdr.csum_flags |= CSUM_IP;
 			/*
 			 * ip_fragment expects ip_len and ip_off in host byte
 			 * order but returns all packets in network byte order
 			 */
 			if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
 					(~ifp->if_hwassist & CSUM_DELAY_IP))) {
 				goto drop;
 			}
 			KASSERT(m != NULL, ("null mbuf and no error"));
 			/*
 			 * Send off the fragments via outgoing interface
 			 */
 			error = 0;
 			do {
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
 
 				error = (*ifp->if_output)(ifp, m,
 					(struct sockaddr *)dst, ro.ro_rt);
 				if (error)
 					break;
 			} while ((m = m0) != NULL);
 			if (error) {
 				/* Reclaim remaining fragments */
 				for (m = m0; m; m = m0) {
 					m0 = m->m_nextpkt;
 					m_freem(m);
 				}
 			} else
 				ipstat.ips_fragmented++;
 		}
 	}
 
 	if (error != 0)
 		ipstat.ips_odropped++;
 	else {
 		ro.ro_rt->rt_rmx.rmx_pksent++;
 		ipstat.ips_forward++;
 		ipstat.ips_fastforward++;
 	}
 consumed:
 	RTFREE(ro.ro_rt);
 	return NULL;
 drop:
 	if (m)
 		m_freem(m);
 	if (ro.ro_rt)
 		RTFREE(ro.ro_rt);
 	return NULL;
 }
Index: head/sys/netinet/ip_fw.h
===================================================================
--- head/sys/netinet/ip_fw.h	(revision 178887)
+++ head/sys/netinet/ip_fw.h	(revision 178888)
@@ -1,662 +1,666 @@
 /*-
  * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _IPFW2_H
 #define _IPFW2_H
 
 /*
  * The kernel representation of ipfw rules is made of a list of
  * 'instructions' (for all practical purposes equivalent to BPF
  * instructions), which specify which fields of the packet
  * (or its metadata) should be analysed.
  *
  * Each instruction is stored in a structure which begins with
  * "ipfw_insn", and can contain extra fields depending on the
  * instruction type (listed below).
  * Note that the code is written so that individual instructions
  * have a size which is a multiple of 32 bits. This means that, if
  * such structures contain pointers or other 64-bit entities,
  * (there is just one instance now) they may end up unaligned on
  * 64-bit architectures, so the must be handled with care.
  *
  * "enum ipfw_opcodes" are the opcodes supported. We can have up
  * to 256 different opcodes. When adding new opcodes, they should
  * be appended to the end of the opcode list before O_LAST_OPCODE,
  * this will prevent the ABI from being broken, otherwise users
  * will have to recompile ipfw(8) when they update the kernel.
  */
 
 enum ipfw_opcodes {		/* arguments (4 byte each)	*/
 	O_NOP,
 
 	O_IP_SRC,		/* u32 = IP			*/
 	O_IP_SRC_MASK,		/* ip = IP/mask			*/
 	O_IP_SRC_ME,		/* none				*/
 	O_IP_SRC_SET,		/* u32=base, arg1=len, bitmap	*/
 
 	O_IP_DST,		/* u32 = IP			*/
 	O_IP_DST_MASK,		/* ip = IP/mask			*/
 	O_IP_DST_ME,		/* none				*/
 	O_IP_DST_SET,		/* u32=base, arg1=len, bitmap	*/
 
 	O_IP_SRCPORT,		/* (n)port list:mask 4 byte ea	*/
 	O_IP_DSTPORT,		/* (n)port list:mask 4 byte ea	*/
 	O_PROTO,		/* arg1=protocol		*/
 
 	O_MACADDR2,		/* 2 mac addr:mask		*/
 	O_MAC_TYPE,		/* same as srcport		*/
 
 	O_LAYER2,		/* none				*/
 	O_IN,			/* none				*/
 	O_FRAG,			/* none				*/
 
 	O_RECV,			/* none				*/
 	O_XMIT,			/* none				*/
 	O_VIA,			/* none				*/
 
 	O_IPOPT,		/* arg1 = 2*u8 bitmap		*/
 	O_IPLEN,		/* arg1 = len			*/
 	O_IPID,			/* arg1 = id			*/
 
 	O_IPTOS,		/* arg1 = id			*/
 	O_IPPRECEDENCE,		/* arg1 = precedence << 5	*/
 	O_IPTTL,		/* arg1 = TTL			*/
 
 	O_IPVER,		/* arg1 = version		*/
 	O_UID,			/* u32 = id			*/
 	O_GID,			/* u32 = id			*/
 	O_ESTAB,		/* none (tcp established)	*/
 	O_TCPFLAGS,		/* arg1 = 2*u8 bitmap		*/
 	O_TCPWIN,		/* arg1 = desired win		*/
 	O_TCPSEQ,		/* u32 = desired seq.		*/
 	O_TCPACK,		/* u32 = desired seq.		*/
 	O_ICMPTYPE,		/* u32 = icmp bitmap		*/
 	O_TCPOPTS,		/* arg1 = 2*u8 bitmap		*/
 
 	O_VERREVPATH,		/* none				*/
 	O_VERSRCREACH,		/* none				*/
 
 	O_PROBE_STATE,		/* none				*/
 	O_KEEP_STATE,		/* none				*/
 	O_LIMIT,		/* ipfw_insn_limit		*/
 	O_LIMIT_PARENT,		/* dyn_type, not an opcode.	*/
 
 	/*
 	 * These are really 'actions'.
 	 */
 
 	O_LOG,			/* ipfw_insn_log		*/
 	O_PROB,			/* u32 = match probability	*/
 
 	O_CHECK_STATE,		/* none				*/
 	O_ACCEPT,		/* none				*/
 	O_DENY,			/* none 			*/
 	O_REJECT,		/* arg1=icmp arg (same as deny)	*/
 	O_COUNT,		/* none				*/
 	O_SKIPTO,		/* arg1=next rule number	*/
 	O_PIPE,			/* arg1=pipe number		*/
 	O_QUEUE,		/* arg1=queue number		*/
 	O_DIVERT,		/* arg1=port number		*/
 	O_TEE,			/* arg1=port number		*/
 	O_FORWARD_IP,		/* fwd sockaddr			*/
 	O_FORWARD_MAC,		/* fwd mac			*/
 	O_NAT,                  /* nope                         */
 
 	/*
 	 * More opcodes.
 	 */
 	O_IPSEC,		/* has ipsec history 		*/
 	O_IP_SRC_LOOKUP,	/* arg1=table number, u32=value	*/
 	O_IP_DST_LOOKUP,	/* arg1=table number, u32=value	*/
 	O_ANTISPOOF,		/* none				*/
 	O_JAIL,			/* u32 = id			*/
 	O_ALTQ,			/* u32 = altq classif. qid	*/
 	O_DIVERTED,		/* arg1=bitmap (1:loop, 2:out)	*/
 	O_TCPDATALEN,		/* arg1 = tcp data len		*/
 	O_IP6_SRC,		/* address without mask		*/
 	O_IP6_SRC_ME,		/* my addresses			*/
 	O_IP6_SRC_MASK,		/* address with the mask	*/
 	O_IP6_DST,
 	O_IP6_DST_ME,
 	O_IP6_DST_MASK,
 	O_FLOW6ID,		/* for flow id tag in the ipv6 pkt */
 	O_ICMP6TYPE,		/* icmp6 packet type filtering	*/
 	O_EXT_HDR,		/* filtering for ipv6 extension header */
 	O_IP6,
 
 	/*
 	 * actions for ng_ipfw
 	 */
 	O_NETGRAPH,		/* send to ng_ipfw		*/
 	O_NGTEE,		/* copy to ng_ipfw		*/
 
 	O_IP4,
 
 	O_UNREACH6,		/* arg1=icmpv6 code arg (deny)  */
 
 	O_TAG,   		/* arg1=tag number */
 	O_TAGGED,		/* arg1=tag number */
 
+	O_SETFIB,		/* arg1=FIB number */
+	O_FIB,			/* arg1=FIB desired fib number */
+
 	O_LAST_OPCODE		/* not an opcode!		*/
 };
 
 /*
  * The extension header are filtered only for presence using a bit
  * vector with a flag for each header.
  */
 #define EXT_FRAGMENT	0x1
 #define EXT_HOPOPTS	0x2
 #define EXT_ROUTING	0x4
 #define EXT_AH		0x8
 #define EXT_ESP		0x10
 #define EXT_DSTOPTS	0x20
 #define EXT_RTHDR0		0x40
 #define EXT_RTHDR2		0x80
 
 /*
  * Template for instructions.
  *
  * ipfw_insn is used for all instructions which require no operands,
  * a single 16-bit value (arg1), or a couple of 8-bit values.
  *
  * For other instructions which require different/larger arguments
  * we have derived structures, ipfw_insn_*.
  *
  * The size of the instruction (in 32-bit words) is in the low
  * 6 bits of "len". The 2 remaining bits are used to implement
  * NOT and OR on individual instructions. Given a type, you can
  * compute the length to be put in "len" using F_INSN_SIZE(t)
  *
  * F_NOT	negates the match result of the instruction.
  *
  * F_OR		is used to build or blocks. By default, instructions
  *		are evaluated as part of a logical AND. An "or" block
  *		{ X or Y or Z } contains F_OR set in all but the last
  *		instruction of the block. A match will cause the code
  *		to skip past the last instruction of the block.
  *
  * NOTA BENE: in a couple of places we assume that
  *	sizeof(ipfw_insn) == sizeof(u_int32_t)
  * this needs to be fixed.
  *
  */
 typedef struct	_ipfw_insn {	/* template for instructions */
 	enum ipfw_opcodes	opcode:8;
 	u_int8_t	len;	/* numer of 32-byte words */
 #define	F_NOT		0x80
 #define	F_OR		0x40
 #define	F_LEN_MASK	0x3f
 #define	F_LEN(cmd)	((cmd)->len & F_LEN_MASK)
 
 	u_int16_t	arg1;
 } ipfw_insn;
 
 /*
  * The F_INSN_SIZE(type) computes the size, in 4-byte words, of
  * a given type.
  */
 #define	F_INSN_SIZE(t)	((sizeof (t))/sizeof(u_int32_t))
 
 #define MTAG_IPFW	1148380143	/* IPFW-tagged cookie */
 
 /*
  * This is used to store an array of 16-bit entries (ports etc.)
  */
 typedef struct	_ipfw_insn_u16 {
 	ipfw_insn o;
 	u_int16_t ports[2];	/* there may be more */
 } ipfw_insn_u16;
 
 /*
  * This is used to store an array of 32-bit entries
  * (uid, single IPv4 addresses etc.)
  */
 typedef struct	_ipfw_insn_u32 {
 	ipfw_insn o;
 	u_int32_t d[1];	/* one or more */
 } ipfw_insn_u32;
 
 /*
  * This is used to store IP addr-mask pairs.
  */
 typedef struct	_ipfw_insn_ip {
 	ipfw_insn o;
 	struct in_addr	addr;
 	struct in_addr	mask;
 } ipfw_insn_ip;
 
 /*
  * This is used to forward to a given address (ip).
  */
 typedef struct  _ipfw_insn_sa {
 	ipfw_insn o;
 	struct sockaddr_in sa;
 } ipfw_insn_sa;
 
 /*
  * This is used for MAC addr-mask pairs.
  */
 typedef struct	_ipfw_insn_mac {
 	ipfw_insn o;
 	u_char addr[12];	/* dst[6] + src[6] */
 	u_char mask[12];	/* dst[6] + src[6] */
 } ipfw_insn_mac;
 
 /*
  * This is used for interface match rules (recv xx, xmit xx).
  */
 typedef struct	_ipfw_insn_if {
 	ipfw_insn o;
 	union {
 		struct in_addr ip;
 		int glob;
 	} p;
 	char name[IFNAMSIZ];
 } ipfw_insn_if;
 
 /*
  * This is used for storing an altq queue id number.
  */
 typedef struct _ipfw_insn_altq {
 	ipfw_insn	o;
 	u_int32_t	qid;
 } ipfw_insn_altq;
 
 /*
  * This is used for limit rules.
  */
 typedef struct	_ipfw_insn_limit {
 	ipfw_insn o;
 	u_int8_t _pad;
 	u_int8_t limit_mask;	/* combination of DYN_* below	*/
 #define	DYN_SRC_ADDR	0x1
 #define	DYN_SRC_PORT	0x2
 #define	DYN_DST_ADDR	0x4
 #define	DYN_DST_PORT	0x8
 
 	u_int16_t conn_limit;
 } ipfw_insn_limit;
 
 /*
  * This is used for log instructions.
  */
 typedef struct  _ipfw_insn_log {
         ipfw_insn o;
 	u_int32_t max_log;	/* how many do we log -- 0 = all */
 	u_int32_t log_left;	/* how many left to log 	*/
 } ipfw_insn_log;
 
 /*
  * Data structures required by both ipfw(8) and ipfw(4) but not part of the
  * management API are protected by IPFW_INTERNAL.
  */
 #ifdef IPFW_INTERNAL
 /* Server pool support (LSNAT). */
 struct cfg_spool {
 	LIST_ENTRY(cfg_spool)   _next;          /* chain of spool instances */
 	struct in_addr          addr;
 	u_short                 port;
 };
 #endif
 
 /* Redirect modes id. */
 #define REDIR_ADDR      0x01
 #define REDIR_PORT      0x02
 #define REDIR_PROTO     0x04
 
 #ifdef IPFW_INTERNAL
 /* Nat redirect configuration. */
 struct cfg_redir {
 	LIST_ENTRY(cfg_redir)   _next;          /* chain of redir instances */
 	u_int16_t               mode;           /* type of redirect mode */
 	struct in_addr	        laddr;          /* local ip address */
 	struct in_addr	        paddr;          /* public ip address */
 	struct in_addr	        raddr;          /* remote ip address */
 	u_short                 lport;          /* local port */
 	u_short                 pport;          /* public port */
 	u_short                 rport;          /* remote port  */
 	u_short                 pport_cnt;      /* number of public ports */
 	u_short                 rport_cnt;      /* number of remote ports */
 	int                     proto;          /* protocol: tcp/udp */
 	struct alias_link       **alink;	
 	/* num of entry in spool chain */
 	u_int16_t               spool_cnt;      
 	/* chain of spool instances */
 	LIST_HEAD(spool_chain, cfg_spool) spool_chain;
 };
 #endif
 
 #define NAT_BUF_LEN     1024
 
 #ifdef IPFW_INTERNAL
 /* Nat configuration data struct. */
 struct cfg_nat {
 	/* chain of nat instances */
 	LIST_ENTRY(cfg_nat)     _next;
 	int                     id;                     /* nat id */
 	struct in_addr          ip;                     /* nat ip address */
 	char                    if_name[IF_NAMESIZE];   /* interface name */
 	int                     mode;                   /* aliasing mode */
 	struct libalias	        *lib;                   /* libalias instance */
 	/* number of entry in spool chain */
 	int                     redir_cnt;              
 	/* chain of redir instances */
 	LIST_HEAD(redir_chain, cfg_redir) redir_chain;  
 };
 #endif
 
 #define SOF_NAT         sizeof(struct cfg_nat)
 #define SOF_REDIR       sizeof(struct cfg_redir)
 #define SOF_SPOOL       sizeof(struct cfg_spool)
 
 /* Nat command. */
 typedef struct	_ipfw_insn_nat {
  	ipfw_insn	o;
  	struct cfg_nat *nat;	
 } ipfw_insn_nat;
 
 /* Apply ipv6 mask on ipv6 addr */
 #define APPLY_MASK(addr,mask)                          \
     (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \
     (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \
     (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \
     (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3];
 
 /* Structure for ipv6 */
 typedef struct _ipfw_insn_ip6 {
        ipfw_insn o;
        struct in6_addr addr6;
        struct in6_addr mask6;
 } ipfw_insn_ip6;
 
 /* Used to support icmp6 types */
 typedef struct _ipfw_insn_icmp6 {
        ipfw_insn o;
        uint32_t d[7]; /* XXX This number si related to the netinet/icmp6.h
                        *     define ICMP6_MAXTYPE
                        *     as follows: n = ICMP6_MAXTYPE/32 + 1
                         *     Actually is 203 
                        */
 } ipfw_insn_icmp6;
 
 /*
  * Here we have the structure representing an ipfw rule.
  *
  * It starts with a general area (with link fields and counters)
  * followed by an array of one or more instructions, which the code
  * accesses as an array of 32-bit values.
  *
  * Given a rule pointer  r:
  *
  *  r->cmd		is the start of the first instruction.
  *  ACTION_PTR(r)	is the start of the first action (things to do
  *			once a rule matched).
  *
  * When assembling instruction, remember the following:
  *
  *  + if a rule has a "keep-state" (or "limit") option, then the
  *	first instruction (at r->cmd) MUST BE an O_PROBE_STATE
  *  + if a rule has a "log" option, then the first action
  *	(at ACTION_PTR(r)) MUST be O_LOG
  *  + if a rule has an "altq" option, it comes after "log"
  *  + if a rule has an O_TAG option, it comes after "log" and "altq"
  *
  * NOTE: we use a simple linked list of rules because we never need
  * 	to delete a rule without scanning the list. We do not use
  *	queue(3) macros for portability and readability.
  */
 
 struct ip_fw {
 	struct ip_fw	*next;		/* linked list of rules		*/
 	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
 	/* 'next_rule' is used to pass up 'set_disable' status		*/
 
 	u_int16_t	act_ofs;	/* offset of action in 32-bit units */
 	u_int16_t	cmd_len;	/* # of 32-bit words in cmd	*/
 	u_int16_t	rulenum;	/* rule number			*/
 	u_int8_t	set;		/* rule set (0..31)		*/
 #define	RESVD_SET	31	/* set for default and persistent rules */
 	u_int8_t	_pad;		/* padding			*/
 
 	/* These fields are present in all rules.			*/
 	u_int64_t	pcnt;		/* Packet counter		*/
 	u_int64_t	bcnt;		/* Byte counter			*/
 	u_int32_t	timestamp;	/* tv_sec of last match		*/
 
 	ipfw_insn	cmd[1];		/* storage for commands		*/
 };
 
 #define ACTION_PTR(rule)				\
 	(ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) )
 
 #define RULESIZE(rule)  (sizeof(struct ip_fw) + \
 	((struct ip_fw *)(rule))->cmd_len * 4 - 4)
 
 /*
  * This structure is used as a flow mask and a flow id for various
  * parts of the code.
  */
 struct ipfw_flow_id {
 	u_int32_t	dst_ip;
 	u_int32_t	src_ip;
 	u_int16_t	dst_port;
 	u_int16_t	src_port;
+	u_int8_t	fib;
 	u_int8_t	proto;
 	u_int8_t	flags;	/* protocol-specific flags */
 	uint8_t		addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */
 	struct in6_addr dst_ip6;	/* could also store MAC addr! */
 	struct in6_addr src_ip6;
 	u_int32_t	flow_id6;
 	u_int32_t	frag_id6;
 };
 
 #define IS_IP6_FLOW_ID(id)	((id)->addr_type == 6)
 
 /*
  * Dynamic ipfw rule.
  */
 typedef struct _ipfw_dyn_rule ipfw_dyn_rule;
 
 struct _ipfw_dyn_rule {
 	ipfw_dyn_rule	*next;		/* linked list of rules.	*/
 	struct ip_fw *rule;		/* pointer to rule		*/
 	/* 'rule' is used to pass up the rule number (from the parent)	*/
 
 	ipfw_dyn_rule *parent;		/* pointer to parent rule	*/
 	u_int64_t	pcnt;		/* packet match counter		*/
 	u_int64_t	bcnt;		/* byte match counter		*/
 	struct ipfw_flow_id id;		/* (masked) flow id		*/
 	u_int32_t	expire;		/* expire time			*/
 	u_int32_t	bucket;		/* which bucket in hash table	*/
 	u_int32_t	state;		/* state of this rule (typically a
 					 * combination of TCP flags)
 					 */
 	u_int32_t	ack_fwd;	/* most recent ACKs in forward	*/
 	u_int32_t	ack_rev;	/* and reverse directions (used	*/
 					/* to generate keepalives)	*/
 	u_int16_t	dyn_type;	/* rule type			*/
 	u_int16_t	count;		/* refcount			*/
 };
 
 /*
  * Definitions for IP option names.
  */
 #define	IP_FW_IPOPT_LSRR	0x01
 #define	IP_FW_IPOPT_SSRR	0x02
 #define	IP_FW_IPOPT_RR		0x04
 #define	IP_FW_IPOPT_TS		0x08
 
 /*
  * Definitions for TCP option names.
  */
 #define	IP_FW_TCPOPT_MSS	0x01
 #define	IP_FW_TCPOPT_WINDOW	0x02
 #define	IP_FW_TCPOPT_SACK	0x04
 #define	IP_FW_TCPOPT_TS		0x08
 #define	IP_FW_TCPOPT_CC		0x10
 
 #define	ICMP_REJECT_RST		0x100	/* fake ICMP code (send a TCP RST) */
 #define	ICMP6_UNREACH_RST	0x100	/* fake ICMPv6 code (send a TCP RST) */
 
 /*
  * These are used for lookup tables.
  */
 typedef struct	_ipfw_table_entry {
 	in_addr_t	addr;		/* network address		*/
 	u_int32_t	value;		/* value			*/
 	u_int16_t	tbl;		/* table number			*/
 	u_int8_t	masklen;	/* mask length			*/
 } ipfw_table_entry;
 
 typedef struct	_ipfw_table {
 	u_int32_t	size;		/* size of entries in bytes	*/
 	u_int32_t	cnt;		/* # of entries			*/
 	u_int16_t	tbl;		/* table number			*/
 	ipfw_table_entry ent[0];	/* entries			*/
 } ipfw_table;
 
 #define IP_FW_TABLEARG	65535
 
 /*
  * Main firewall chains definitions and global var's definitions.
  */
 #ifdef _KERNEL
 
 /* Return values from ipfw_chk() */
 enum {
 	IP_FW_PASS = 0,
 	IP_FW_DENY,
 	IP_FW_DIVERT,
 	IP_FW_TEE,
 	IP_FW_DUMMYNET,
 	IP_FW_NETGRAPH,
 	IP_FW_NGTEE,
 	IP_FW_NAT,
 };
 
 /* flags for divert mtag */
 #define	IP_FW_DIVERT_LOOPBACK_FLAG	0x00080000
 #define	IP_FW_DIVERT_OUTPUT_FLAG	0x00100000
 
 /*
  * Structure for collecting parameters to dummynet for ip6_output forwarding
  */
 struct _ip6dn_args {
        struct ip6_pktopts *opt_or;
        struct route_in6 ro_or;
        int flags_or;
        struct ip6_moptions *im6o_or;
        struct ifnet *origifp_or;
        struct ifnet *ifp_or;
        struct sockaddr_in6 dst_or;
        u_long mtu_or;
        struct route_in6 ro_pmtu_or;
 };
 
 /*
  * Arguments for calling ipfw_chk() and dummynet_io(). We put them
  * all into a structure because this way it is easier and more
  * efficient to pass variables around and extend the interface.
  */
 struct ip_fw_args {
 	struct mbuf	*m;		/* the mbuf chain		*/
 	struct ifnet	*oif;		/* output interface		*/
 	struct sockaddr_in *next_hop;	/* forward address		*/
 	struct ip_fw	*rule;		/* matching rule		*/
 	struct ether_header *eh;	/* for bridged packets		*/
 
 	struct ipfw_flow_id f_id;	/* grabbed from IP header	*/
 	u_int32_t	cookie;		/* a cookie depending on rule action */
 	struct inpcb	*inp;
 
 	struct _ip6dn_args	dummypar; /* dummynet->ip6_output */
 	struct sockaddr_in hopstore;	/* store here if cannot use a pointer */
 };
 
 /*
  * Function definitions.
  */
 
 /* Firewall hooks */
 struct sockopt;
 struct dn_flow_set;
 
 int ipfw_check_in(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp);
 int ipfw_check_out(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp);
 
 int ipfw_chk(struct ip_fw_args *);
 
 int ipfw_init(void);
 void ipfw_destroy(void);
 
 typedef int ip_fw_ctl_t(struct sockopt *);
 extern ip_fw_ctl_t *ip_fw_ctl_ptr;
 extern int fw_one_pass;
 extern int fw_enable;
 #ifdef INET6
 extern int fw6_enable;
 #endif
 
 /* For kernel ipfw_ether and ipfw_bridge. */
 typedef	int ip_fw_chk_t(struct ip_fw_args *args);
 extern	ip_fw_chk_t	*ip_fw_chk_ptr;
 #define	IPFW_LOADED	(ip_fw_chk_ptr != NULL)
 
 #ifdef IPFW_INTERNAL
 
 #define	IPFW_TABLES_MAX		128
 struct ip_fw_chain {
 	struct ip_fw	*rules;		/* list of rules */
 	struct ip_fw	*reap;		/* list of rules to reap */
 	LIST_HEAD(, cfg_nat) nat;       /* list of nat entries */
 	struct radix_node_head *tables[IPFW_TABLES_MAX];
 	struct rwlock	rwmtx;
 };
 #define	IPFW_LOCK_INIT(_chain) \
 	rw_init(&(_chain)->rwmtx, "IPFW static rules")
 #define	IPFW_LOCK_DESTROY(_chain)	rw_destroy(&(_chain)->rwmtx)
 #define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
 #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
 #define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
 #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
 #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
 
 #define LOOKUP_NAT(l, i, p) do {					\
 		LIST_FOREACH((p), &(l.nat), _next) {			\
 			if ((p)->id == (i)) {				\
 				break;					\
 			} 						\
 		}							\
 	} while (0)
 
 typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
 typedef int ipfw_nat_cfg_t(struct sockopt *);
 #endif
 
 #endif /* _KERNEL */
 #endif /* _IPFW2_H */
Index: head/sys/netinet/ip_fw2.c
===================================================================
--- head/sys/netinet/ip_fw2.c	(revision 178887)
+++ head/sys/netinet/ip_fw2.c	(revision 178888)
@@ -1,4568 +1,4610 @@
 /*-
  * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define        DEB(x)
 #define        DDB(x) x
 
 /*
  * Implement IP packet firewall (new version)
  */
 
 #if !defined(KLD_MODULE)
 #include "opt_ipfw.h"
 #include "opt_ipdivert.h"
 #include "opt_ipdn.h"
 #include "opt_inet.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 #endif
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/jail.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <net/if.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/pf_mtag.h>
 
 #define	IPFW_INTERNAL	/* Access to protected data structures in ip_fw.h. */
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_divert.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/ip_carp.h>
 #include <netinet/pim.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/sctp.h>
 #include <netgraph/ng_ipfw.h>
 
 #include <altq/if_altq.h>
 
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #ifdef INET6
 #include <netinet6/scope6_var.h>
 #endif
 
 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 #include <security/mac/mac_framework.h>
 
 /*
  * set_disable contains one bit per set value (0..31).
  * If the bit is set, all rules with the corresponding set
  * are disabled. Set RESVD_SET(31) is reserved for the default rule
  * and rules that are not deleted by the flush command,
  * and CANNOT be disabled.
  * Rules in set RESVD_SET can only be deleted explicitly.
  */
 static u_int32_t set_disable;
 
 static int fw_verbose;
 static int verbose_limit;
 
 static struct callout ipfw_timeout;
 static uma_zone_t ipfw_dyn_rule_zone;
 #define	IPFW_DEFAULT_RULE	65535
 
 /*
  * Data structure to cache our ucred related
  * information. This structure only gets used if
  * the user specified UID/GID based constraints in
  * a firewall rule.
  */
 struct ip_fw_ugid {
 	gid_t		fw_groups[NGROUPS];
 	int		fw_ngroups;
 	uid_t		fw_uid;
 	int		fw_prid;
 };
 
 /*
  * list of rules for layer 3
  */
 struct ip_fw_chain layer3_chain;
 
 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
 MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
 #define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
 ipfw_nat_t *ipfw_nat_ptr = NULL;
 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
 
 struct table_entry {
 	struct radix_node	rn[2];
 	struct sockaddr_in	addr, mask;
 	u_int32_t		value;
 };
 
 static int fw_debug = 1;
 static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
 
 extern int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
 
 #ifdef SYSCTL_NODE
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &fw_enable, 0,
     ipfw_chg_hook, "I", "Enable ipfw");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW,
     &autoinc_step, 0, "Rule number autincrement step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_RW | CTLFLAG_SECURE3,
     &fw_one_pass, 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW,
     &fw_debug, 0, "Enable printing of debug ip_fw statements");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
     CTLFLAG_RW | CTLFLAG_SECURE3,
     &fw_verbose, 0, "Log matches to ipfw rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW,
     &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
 
 /*
  * Description of dynamic rules.
  *
  * Dynamic rules are stored in lists accessed through a hash table
  * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
  * be modified through the sysctl variable dyn_buckets which is
  * updated when the table becomes empty.
  *
  * XXX currently there is only one list, ipfw_dyn.
  *
  * When a packet is received, its address fields are first masked
  * with the mask defined for the rule, then hashed, then matched
  * against the entries in the corresponding list.
  * Dynamic rules can be used for different purposes:
  *  + stateful rules;
  *  + enforcing limits on the number of sessions;
  *  + in-kernel NAT (not implemented yet)
  *
  * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
  * measured in seconds and depending on the flags.
  *
  * The total number of dynamic rules is stored in dyn_count.
  * The max number of dynamic rules is dyn_max. When we reach
  * the maximum number of rules we do not create anymore. This is
  * done to avoid consuming too much memory, but also too much
  * time when searching on each packet (ideally, we should try instead
  * to put a limit on the length of the list on each bucket...).
  *
  * Each dynamic rule holds a pointer to the parent ipfw rule so
  * we know what action to perform. Dynamic rules are removed when
  * the parent rule is deleted. XXX we should make them survive.
  *
  * There are some limitations with dynamic rules -- we do not
  * obey the 'randomized match', and we do not do multiple
  * passes through the firewall. XXX check the latter!!!
  */
 static ipfw_dyn_rule **ipfw_dyn_v = NULL;
 static u_int32_t dyn_buckets = 256; /* must be power of 2 */
 static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */
 
 static struct mtx ipfw_dyn_mtx;		/* mutex guarding dynamic rules */
 #define	IPFW_DYN_LOCK_INIT() \
 	mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF)
 #define	IPFW_DYN_LOCK_DESTROY()	mtx_destroy(&ipfw_dyn_mtx)
 #define	IPFW_DYN_LOCK()		mtx_lock(&ipfw_dyn_mtx)
 #define	IPFW_DYN_UNLOCK()	mtx_unlock(&ipfw_dyn_mtx)
 #define	IPFW_DYN_LOCK_ASSERT()	mtx_assert(&ipfw_dyn_mtx, MA_OWNED)
 
 /*
  * Timeouts for various events in handing dynamic rules.
  */
 static u_int32_t dyn_ack_lifetime = 300;
 static u_int32_t dyn_syn_lifetime = 20;
 static u_int32_t dyn_fin_lifetime = 1;
 static u_int32_t dyn_rst_lifetime = 1;
 static u_int32_t dyn_udp_lifetime = 10;
 static u_int32_t dyn_short_lifetime = 5;
 
 /*
  * Keepalives are sent if dyn_keepalive is set. They are sent every
  * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
  * seconds of lifetime of a rule.
  * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
  * than dyn_keepalive_period.
  */
 
 static u_int32_t dyn_keepalive_interval = 20;
 static u_int32_t dyn_keepalive_period = 5;
 static u_int32_t dyn_keepalive = 1;	/* do send keepalives */
 
 static u_int32_t static_count;	/* # of static rules */
 static u_int32_t static_len;	/* size in bytes of static rules */
 static u_int32_t dyn_count;		/* # of dynamic rules */
 static u_int32_t dyn_max = 4096;	/* max # of dynamic rules */
 
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW,
     &dyn_buckets, 0, "Number of dyn. buckets");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD,
     &curr_dyn_buckets, 0, "Current Number of dyn. buckets");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD,
     &dyn_count, 0, "Number of dyn. rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW,
     &dyn_max, 0, "Max number of dyn. rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD,
     &static_count, 0, "Number of static rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW,
     &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW,
     &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW,
     &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW,
     &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW,
     &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW,
     &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW,
     &dyn_keepalive, 0, "Enable keepalives for dyn. rules");
 
 #ifdef INET6
 /*
  * IPv6 specific variables
  */
 SYSCTL_DECL(_net_inet6_ip6);
 
 static struct sysctl_ctx_list ip6_fw_sysctl_ctx;
 static struct sysctl_oid *ip6_fw_sysctl_tree;
 #endif /* INET6 */
 #endif /* SYSCTL_NODE */
 
 static int fw_deny_unknown_exthdrs = 1;
 
 
 /*
  * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
  * Other macros just cast void * into the appropriate type
  */
 #define	L3HDR(T, ip)	((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 #define	TCP(p)		((struct tcphdr *)(p))
 #define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 #define	ICMP(p)		((struct icmphdr *)(p))
 #define	ICMP6(p)	((struct icmp6_hdr *)(p))
 
 static __inline int
 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
 }
 
 #define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
 
 static int
 is_icmp_query(struct icmphdr *icmp)
 {
 	int type = icmp->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
 }
 #undef TT
 
 /*
  * The following checks use two arrays of 8 or 16 bits to store the
  * bits that we want set or clear, respectively. They are in the
  * low and high half of cmd->arg1 or cmd->d[0].
  *
  * We scan options and store the bits we find set. We succeed if
  *
  *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
  *
  * The code is sometimes optimized not to store additional variables.
  */
 
 static int
 flags_match(ipfw_insn *cmd, u_int8_t bits)
 {
 	u_char want_clear;
 	bits = ~bits;
 
 	if ( ((cmd->arg1 & 0xff) & bits) != 0)
 		return 0; /* some bits we want set were clear */
 	want_clear = (cmd->arg1 >> 8) & 0xff;
 	if ( (want_clear & bits) != want_clear)
 		return 0; /* some bits we want clear were set */
 	return 1;
 }
 
 static int
 ipopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(ip + 1);
 	int x = (ip->ip_hl << 2) - sizeof (struct ip);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[IPOPT_OPTVAL];
 
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[IPOPT_OLEN];
 			if (optlen <= 0 || optlen > x)
 				return 0; /* invalid or truncated */
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 			bits |= IP_FW_IPOPT_LSRR;
 			break;
 
 		case IPOPT_SSRR:
 			bits |= IP_FW_IPOPT_SSRR;
 			break;
 
 		case IPOPT_RR:
 			bits |= IP_FW_IPOPT_RR;
 			break;
 
 		case IPOPT_TS:
 			bits |= IP_FW_IPOPT_TS;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(tcp + 1);
 	int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[1];
 			if (optlen <= 0)
 				break;
 		}
 
 		switch (opt) {
 
 		default:
 			break;
 
 		case TCPOPT_MAXSEG:
 			bits |= IP_FW_TCPOPT_MSS;
 			break;
 
 		case TCPOPT_WINDOW:
 			bits |= IP_FW_TCPOPT_WINDOW;
 			break;
 
 		case TCPOPT_SACK_PERMITTED:
 		case TCPOPT_SACK:
 			bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_TIMESTAMP:
 			bits |= IP_FW_TCPOPT_TS;
 			break;
 
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
 {
 	if (ifp == NULL)	/* no iface with this packet, match fails */
 		return 0;
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
 				return(1);
 		} else {
 			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
 				return(1);
 		}
 	} else {
 		struct ifaddr *ia;
 
 		/* XXX lock? */
 		TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr)
 				return(1);	/* match */
 		}
 	}
 	return(0);	/* no match, fail ... */
 }
 
 /*
  * The verify_path function checks if a route to the src exists and
  * if it is reachable via ifp (when provided).
  * 
  * The 'verrevpath' option checks that the interface that an IP packet
  * arrives on is the same interface that traffic destined for the
  * packet's source address would be routed out of.  The 'versrcreach'
  * option just checks that the source address is reachable via any route
  * (except default) in the routing table.  These two are a measure to block
  * forged packets.  This is also commonly known as "anti-spoofing" or Unicast
  * Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
  * is purposely reminiscent of the Cisco IOS command,
  *
  *   ip verify unicast reverse-path
  *   ip verify unicast source reachable-via any
  *
  * which implements the same functionality. But note that syntax is
  * misleading. The check may be performed on all IP packets whether unicast,
  * multicast, or broadcast.
  */
 static int
-verify_path(struct in_addr src, struct ifnet *ifp)
+verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
 {
 	struct route ro;
 	struct sockaddr_in *dst;
 
 	bzero(&ro, sizeof(ro));
 
 	dst = (struct sockaddr_in *)&(ro.ro_dst);
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_addr = src;
-	rtalloc_ign(&ro, RTF_CLONING);
+	in_rtalloc_ign(&ro, RTF_CLONING, fib);
 
 	if (ro.ro_rt == NULL)
 		return 0;
 
 	/*
 	 * If ifp is provided, check for equality with rtentry.
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * in order to pass packets injected back by if_simloop():
 	 * if useloopback == 1 routing entry (via lo0) for our own address
 	 * may exist, so we need to handle routing assymetry.
 	 */
 	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL &&
 	     satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* found valid route */
 	RTFREE(ro.ro_rt);
 	return 1;
 }
 
 #ifdef INET6
 /*
  * ipv6 specific rules here...
  */
 static __inline int
 icmp6type_match (int type, ipfw_insn_u32 *cmd)
 {
 	return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
 }
 
 static int
 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
 {
 	int i;
 	for (i=0; i <= cmd->o.arg1; ++i )
 		if (curr_flow == cmd->d[i] )
 			return 1;
 	return 0;
 }
 
 /* support for IP6_*_ME opcodes */
 static int
 search_ip6_addr_net (struct in6_addr * ip6_addr)
 {
 	struct ifnet *mdc;
 	struct ifaddr *mdc2;
 	struct in6_ifaddr *fdm;
 	struct in6_addr copia;
 
 	TAILQ_FOREACH(mdc, &ifnet, if_link)
 		TAILQ_FOREACH(mdc2, &mdc->if_addrlist, ifa_list) {
 			if (mdc2->ifa_addr->sa_family == AF_INET6) {
 				fdm = (struct in6_ifaddr *)mdc2;
 				copia = fdm->ia_addr.sin6_addr;
 				/* need for leaving scope_id in the sock_addr */
 				in6_clearscope(&copia);
 				if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia))
 					return 1;
 			}
 		}
 	return 0;
 }
 
 static int
 verify_path6(struct in6_addr *src, struct ifnet *ifp)
 {
 	struct route_in6 ro;
 	struct sockaddr_in6 *dst;
 
 	bzero(&ro, sizeof(ro));
 
 	dst = (struct sockaddr_in6 * )&(ro.ro_dst);
 	dst->sin6_family = AF_INET6;
 	dst->sin6_len = sizeof(*dst);
 	dst->sin6_addr = *src;
+	/* XXX MRT 0 for ipv6 at this time */
 	rtalloc_ign((struct route *)&ro, RTF_CLONING);
 
 	if (ro.ro_rt == NULL)
 		return 0;
 
 	/* 
 	 * if ifp is provided, check for equality with rtentry
 	 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
 	 * to support the case of sending packets to an address of our own.
 	 * (where the former interface is the first argument of if_simloop()
 	 *  (=ifp), the latter is lo0)
 	 */
 	if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* if no ifp provided, check if rtentry is not default route */
 	if (ifp == NULL &&
 	    IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* or if this is a blackhole/reject route */
 	if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 
 	/* found valid route */
 	RTFREE(ro.ro_rt);
 	return 1;
 
 }
 static __inline int
 hash_packet6(struct ipfw_flow_id *id)
 {
 	u_int32_t i;
 	i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
 	    (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
 	    (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
 	    (id->src_ip6.__u6_addr.__u6_addr32[3]) ^
 	    (id->dst_port) ^ (id->src_port);
 	return i;
 }
 
 static int
 is_icmp6_query(int icmp6_type)
 {
 	if ((icmp6_type <= ICMP6_MAXTYPE) &&
 	    (icmp6_type == ICMP6_ECHO_REQUEST ||
 	    icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
 	    icmp6_type == ICMP6_WRUREQUEST ||
 	    icmp6_type == ICMP6_FQDN_QUERY ||
 	    icmp6_type == ICMP6_NI_QUERY))
 		return (1);
 
 	return (0);
 }
 
 static void
 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
 {
 	struct mbuf *m;
 
 	m = args->m;
 	if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *tcp;
 		tcp_seq ack, seq;
 		int flags;
 		struct {
 			struct ip6_hdr ip6;
 			struct tcphdr th;
 		} ti;
 		tcp = (struct tcphdr *)((char *)ip6 + hlen);
 
 		if ((tcp->th_flags & TH_RST) != 0) {
 			m_freem(m);
 			args->m = NULL;
 			return;
 		}
 
 		ti.ip6 = *ip6;
 		ti.th = *tcp;
 		ti.th.th_seq = ntohl(ti.th.th_seq);
 		ti.th.th_ack = ntohl(ti.th.th_ack);
 		ti.ip6.ip6_nxt = IPPROTO_TCP;
 
 		if (ti.th.th_flags & TH_ACK) {
 			ack = 0;
 			seq = ti.th.th_ack;
 			flags = TH_RST;
 		} else {
 			ack = ti.th.th_seq;
 			if ((m->m_flags & M_PKTHDR) != 0) {
 				/*
 				 * total new data to ACK is:
 				 * total packet length,
 				 * minus the header length,
 				 * minus the tcp header length.
 				 */
 				ack += m->m_pkthdr.len - hlen
 					- (ti.th.th_off << 2);
 			} else if (ip6->ip6_plen) {
 				ack += ntohs(ip6->ip6_plen) + sizeof(*ip6) -
 				    hlen - (ti.th.th_off << 2);
 			} else {
 				m_freem(m);
 				return;
 			}
 			if (tcp->th_flags & TH_SYN)
 				ack++;
 			seq = 0;
 			flags = TH_RST|TH_ACK;
 		}
 		bcopy(&ti, ip6, sizeof(ti));
 		/*
 		 * m is only used to recycle the mbuf
 		 * The data in it is never read so we don't need
 		 * to correct the offsets or anything
 		 */
 		tcp_respond(NULL, ip6, tcp, m, ack, seq, flags);
 	} else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
 #if 0
 		/*
 		 * Unlike above, the mbufs need to line up with the ip6 hdr,
 		 * as the contents are read. We need to m_adj() the
 		 * needed amount.
 		 * The mbuf will however be thrown away so we can adjust it.
 		 * Remember we did an m_pullup on it already so we
 		 * can make some assumptions about contiguousness.
 		 */
 		if (args->L3offset)
 			m_adj(m, args->L3offset);
 #endif
 		icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
 	} else
 		m_freem(m);
 
 	args->m = NULL;
 }
 
 #endif /* INET6 */
 
 static u_int64_t norule_counter;	/* counter for ipfw_log(NULL...) */
 
 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
 #define SNP(buf) buf, sizeof(buf)
 
 /*
  * We enter here when we have a rule with O_LOG.
  * XXX this function alone takes about 2Kbytes of code!
  */
 static void
 ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
     struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
     struct ip *ip)
 {
 	struct ether_header *eh = args->eh;
 	char *action;
 	int limit_reached = 0;
 	char action2[40], proto[128], fragment[32];
 
 	fragment[0] = '\0';
 	proto[0] = '\0';
 
 	if (f == NULL) {	/* bogus pkt */
 		if (verbose_limit != 0 && norule_counter >= verbose_limit)
 			return;
 		norule_counter++;
 		if (norule_counter == verbose_limit)
 			limit_reached = verbose_limit;
 		action = "Refuse";
 	} else {	/* O_LOG is the first action, find the real one */
 		ipfw_insn *cmd = ACTION_PTR(f);
 		ipfw_insn_log *l = (ipfw_insn_log *)cmd;
 
 		if (l->max_log != 0 && l->log_left == 0)
 			return;
 		l->log_left--;
 		if (l->log_left == 0)
 			limit_reached = l->max_log;
 		cmd += F_LEN(cmd);	/* point to first action */
 		if (cmd->opcode == O_ALTQ) {
 			ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 			snprintf(SNPARGS(action2, 0), "Altq %d",
 				altq->qid);
 			cmd += F_LEN(cmd);
 		}
 		if (cmd->opcode == O_PROB)
 			cmd += F_LEN(cmd);
 
 		if (cmd->opcode == O_TAG)
 			cmd += F_LEN(cmd);
 
 		action = action2;
 		switch (cmd->opcode) {
 		case O_DENY:
 			action = "Deny";
 			break;
 
 		case O_REJECT:
 			if (cmd->arg1==ICMP_REJECT_RST)
 				action = "Reset";
 			else if (cmd->arg1==ICMP_UNREACH_HOST)
 				action = "Reject";
 			else
 				snprintf(SNPARGS(action2, 0), "Unreach %d",
 					cmd->arg1);
 			break;
 
 		case O_UNREACH6:
 			if (cmd->arg1==ICMP6_UNREACH_RST)
 				action = "Reset";
 			else
 				snprintf(SNPARGS(action2, 0), "Unreach %d",
 					cmd->arg1);
 			break;
 
 		case O_ACCEPT:
 			action = "Accept";
 			break;
 		case O_COUNT:
 			action = "Count";
 			break;
 		case O_DIVERT:
 			snprintf(SNPARGS(action2, 0), "Divert %d",
 				cmd->arg1);
 			break;
 		case O_TEE:
 			snprintf(SNPARGS(action2, 0), "Tee %d",
 				cmd->arg1);
 			break;
+		case O_SETFIB:
+			snprintf(SNPARGS(action2, 0), "SetFib %d",
+				cmd->arg1);
+			break;
 		case O_SKIPTO:
 			snprintf(SNPARGS(action2, 0), "SkipTo %d",
 				cmd->arg1);
 			break;
 		case O_PIPE:
 			snprintf(SNPARGS(action2, 0), "Pipe %d",
 				cmd->arg1);
 			break;
 		case O_QUEUE:
 			snprintf(SNPARGS(action2, 0), "Queue %d",
 				cmd->arg1);
 			break;
 		case O_FORWARD_IP: {
 			ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
 			int len;
 			struct in_addr dummyaddr;
 			if (sa->sa.sin_addr.s_addr == INADDR_ANY)
 				dummyaddr.s_addr = htonl(tablearg);
 			else
 				dummyaddr.s_addr = sa->sa.sin_addr.s_addr;
 
 			len = snprintf(SNPARGS(action2, 0), "Forward to %s",
 				inet_ntoa(dummyaddr));
 
 			if (sa->sa.sin_port)
 				snprintf(SNPARGS(action2, len), ":%d",
 				    sa->sa.sin_port);
 			}
 			break;
 		case O_NETGRAPH:
 			snprintf(SNPARGS(action2, 0), "Netgraph %d",
 				cmd->arg1);
 			break;
 		case O_NGTEE:
 			snprintf(SNPARGS(action2, 0), "Ngtee %d",
 				cmd->arg1);
 			break;
 		case O_NAT:
 			action = "Nat";
  			break;
 		default:
 			action = "UNKNOWN";
 			break;
 		}
 	}
 
 	if (hlen == 0) {	/* non-ip */
 		snprintf(SNPARGS(proto, 0), "MAC");
 
 	} else {
 		int len;
 		char src[48], dst[48];
 		struct icmphdr *icmp;
 		struct tcphdr *tcp;
 		struct udphdr *udp;
 #ifdef INET6
 		struct ip6_hdr *ip6 = NULL;
 		struct icmp6_hdr *icmp6;
 #endif
 		src[0] = '\0';
 		dst[0] = '\0';
 #ifdef INET6
 		if (IS_IP6_FLOW_ID(&(args->f_id))) {
 			char ip6buf[INET6_ADDRSTRLEN];
 			snprintf(src, sizeof(src), "[%s]",
 			    ip6_sprintf(ip6buf, &args->f_id.src_ip6));
 			snprintf(dst, sizeof(dst), "[%s]",
 			    ip6_sprintf(ip6buf, &args->f_id.dst_ip6));
 
 			ip6 = (struct ip6_hdr *)ip;
 			tcp = (struct tcphdr *)(((char *)ip) + hlen);
 			udp = (struct udphdr *)(((char *)ip) + hlen);
 		} else
 #endif
 		{
 			tcp = L3HDR(struct tcphdr, ip);
 			udp = L3HDR(struct udphdr, ip);
 
 			inet_ntoa_r(ip->ip_src, src);
 			inet_ntoa_r(ip->ip_dst, dst);
 		}
 
 		switch (args->f_id.proto) {
 		case IPPROTO_TCP:
 			len = snprintf(SNPARGS(proto, 0), "TCP %s", src);
 			if (offset == 0)
 				snprintf(SNPARGS(proto, len), ":%d %s:%d",
 				    ntohs(tcp->th_sport),
 				    dst,
 				    ntohs(tcp->th_dport));
 			else
 				snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 
 		case IPPROTO_UDP:
 			len = snprintf(SNPARGS(proto, 0), "UDP %s", src);
 			if (offset == 0)
 				snprintf(SNPARGS(proto, len), ":%d %s:%d",
 				    ntohs(udp->uh_sport),
 				    dst,
 				    ntohs(udp->uh_dport));
 			else
 				snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 
 		case IPPROTO_ICMP:
 			icmp = L3HDR(struct icmphdr, ip);
 			if (offset == 0)
 				len = snprintf(SNPARGS(proto, 0),
 				    "ICMP:%u.%u ",
 				    icmp->icmp_type, icmp->icmp_code);
 			else
 				len = snprintf(SNPARGS(proto, 0), "ICMP ");
 			len += snprintf(SNPARGS(proto, len), "%s", src);
 			snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 			icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen);
 			if (offset == 0)
 				len = snprintf(SNPARGS(proto, 0),
 				    "ICMPv6:%u.%u ",
 				    icmp6->icmp6_type, icmp6->icmp6_code);
 			else
 				len = snprintf(SNPARGS(proto, 0), "ICMPv6 ");
 			len += snprintf(SNPARGS(proto, len), "%s", src);
 			snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 #endif
 		default:
 			len = snprintf(SNPARGS(proto, 0), "P:%d %s",
 			    args->f_id.proto, src);
 			snprintf(SNPARGS(proto, len), " %s", dst);
 			break;
 		}
 
 #ifdef INET6
 		if (IS_IP6_FLOW_ID(&(args->f_id))) {
 			if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
 				snprintf(SNPARGS(fragment, 0),
 				    " (frag %08x:%d@%d%s)",
 				    args->f_id.frag_id6,
 				    ntohs(ip6->ip6_plen) - hlen,
 				    ntohs(offset & IP6F_OFF_MASK) << 3,
 				    (offset & IP6F_MORE_FRAG) ? "+" : "");
 		} else
 #endif
 		{
 			int ip_off, ip_len;
 			if (eh != NULL) { /* layer 2 packets are as on the wire */
 				ip_off = ntohs(ip->ip_off);
 				ip_len = ntohs(ip->ip_len);
 			} else {
 				ip_off = ip->ip_off;
 				ip_len = ip->ip_len;
 			}
 			if (ip_off & (IP_MF | IP_OFFMASK))
 				snprintf(SNPARGS(fragment, 0),
 				    " (frag %d:%d@%d%s)",
 				    ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2),
 				    offset << 3,
 				    (ip_off & IP_MF) ? "+" : "");
 		}
 	}
 	if (oif || m->m_pkthdr.rcvif)
 		log(LOG_SECURITY | LOG_INFO,
 		    "ipfw: %d %s %s %s via %s%s\n",
 		    f ? f->rulenum : -1,
 		    action, proto, oif ? "out" : "in",
 		    oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
 		    fragment);
 	else
 		log(LOG_SECURITY | LOG_INFO,
 		    "ipfw: %d %s %s [no if info]%s\n",
 		    f ? f->rulenum : -1,
 		    action, proto, fragment);
 	if (limit_reached)
 		log(LOG_SECURITY | LOG_NOTICE,
 		    "ipfw: limit %d reached on entry %d\n",
 		    limit_reached, f ? f->rulenum : -1);
 }
 
 /*
  * IMPORTANT: the hash function for dynamic rules must be commutative
  * in source and destination (ip,port), because rules are bidirectional
  * and we want to find both in the same bucket.
  */
 static __inline int
 hash_packet(struct ipfw_flow_id *id)
 {
 	u_int32_t i;
 
 #ifdef INET6
 	if (IS_IP6_FLOW_ID(id)) 
 		i = hash_packet6(id);
 	else
 #endif /* INET6 */
 	i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port);
 	i &= (curr_dyn_buckets - 1);
 	return i;
 }
 
 /**
  * unlink a dynamic rule from a chain. prev is a pointer to
  * the previous one, q is a pointer to the rule to delete,
  * head is a pointer to the head of the queue.
  * Modifies q and potentially also head.
  */
 #define UNLINK_DYN_RULE(prev, head, q) {				\
 	ipfw_dyn_rule *old_q = q;					\
 									\
 	/* remove a refcount to the parent */				\
 	if (q->dyn_type == O_LIMIT)					\
 		q->parent->count--;					\
 	DEB(printf("ipfw: unlink entry 0x%08x %d -> 0x%08x %d, %d left\n",\
 		(q->id.src_ip), (q->id.src_port),			\
 		(q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); )	\
 	if (prev != NULL)						\
 		prev->next = q = q->next;				\
 	else								\
 		head = q = q->next;					\
 	dyn_count--;							\
 	uma_zfree(ipfw_dyn_rule_zone, old_q); }
 
 #define TIME_LEQ(a,b)       ((int)((a)-(b)) <= 0)
 
 /**
  * Remove dynamic rules pointing to "rule", or all of them if rule == NULL.
  *
  * If keep_me == NULL, rules are deleted even if not expired,
  * otherwise only expired rules are removed.
  *
  * The value of the second parameter is also used to point to identify
  * a rule we absolutely do not want to remove (e.g. because we are
  * holding a reference to it -- this is the case with O_LIMIT_PARENT
  * rules). The pointer is only used for comparison, so any non-null
  * value will do.
  */
 static void
 remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me)
 {
 	static u_int32_t last_remove = 0;
 
 #define FORCE (keep_me == NULL)
 
 	ipfw_dyn_rule *prev, *q;
 	int i, pass = 0, max_pass = 0;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL || dyn_count == 0)
 		return;
 	/* do not expire more than once per second, it is useless */
 	if (!FORCE && last_remove == time_uptime)
 		return;
 	last_remove = time_uptime;
 
 	/*
 	 * because O_LIMIT refer to parent rules, during the first pass only
 	 * remove child and mark any pending LIMIT_PARENT, and remove
 	 * them in a second pass.
 	 */
 next_pass:
 	for (i = 0 ; i < curr_dyn_buckets ; i++) {
 		for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) {
 			/*
 			 * Logic can become complex here, so we split tests.
 			 */
 			if (q == keep_me)
 				goto next;
 			if (rule != NULL && rule != q->rule)
 				goto next; /* not the one we are looking for */
 			if (q->dyn_type == O_LIMIT_PARENT) {
 				/*
 				 * handle parent in the second pass,
 				 * record we need one.
 				 */
 				max_pass = 1;
 				if (pass == 0)
 					goto next;
 				if (FORCE && q->count != 0 ) {
 					/* XXX should not happen! */
 					printf("ipfw: OUCH! cannot remove rule,"
 					     " count %d\n", q->count);
 				}
 			} else {
 				if (!FORCE &&
 				    !TIME_LEQ( q->expire, time_uptime ))
 					goto next;
 			}
              if (q->dyn_type != O_LIMIT_PARENT || !q->count) {
                      UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q);
                      continue;
              }
 next:
 			prev=q;
 			q=q->next;
 		}
 	}
 	if (pass++ < max_pass)
 		goto next_pass;
 }
 
 
 /**
  * lookup a dynamic rule.
  */
 static ipfw_dyn_rule *
 lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction,
     struct tcphdr *tcp)
 {
 	/*
 	 * stateful ipfw extensions.
 	 * Lookup into dynamic session queue
 	 */
 #define MATCH_REVERSE	0
 #define MATCH_FORWARD	1
 #define MATCH_NONE	2
 #define MATCH_UNKNOWN	3
 	int i, dir = MATCH_NONE;
 	ipfw_dyn_rule *prev, *q=NULL;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL)
 		goto done;	/* not found */
 	i = hash_packet( pkt );
 	for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) {
 		if (q->dyn_type == O_LIMIT_PARENT && q->count)
 			goto next;
 		if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */
 			UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q);
 			continue;
 		}
 		if (pkt->proto == q->id.proto &&
 		    q->dyn_type != O_LIMIT_PARENT) {
 			if (IS_IP6_FLOW_ID(pkt)) {
 			    if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
 				&(q->id.src_ip6)) &&
 			    IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
 				&(q->id.dst_ip6)) &&
 			    pkt->src_port == q->id.src_port &&
 			    pkt->dst_port == q->id.dst_port ) {
 				dir = MATCH_FORWARD;
 				break;
 			    }
 			    if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
 				    &(q->id.dst_ip6)) &&
 				IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
 				    &(q->id.src_ip6)) &&
 				pkt->src_port == q->id.dst_port &&
 				pkt->dst_port == q->id.src_port ) {
 				    dir = MATCH_REVERSE;
 				    break;
 			    }
 			} else {
 			    if (pkt->src_ip == q->id.src_ip &&
 				pkt->dst_ip == q->id.dst_ip &&
 				pkt->src_port == q->id.src_port &&
 				pkt->dst_port == q->id.dst_port ) {
 				    dir = MATCH_FORWARD;
 				    break;
 			    }
 			    if (pkt->src_ip == q->id.dst_ip &&
 				pkt->dst_ip == q->id.src_ip &&
 				pkt->src_port == q->id.dst_port &&
 				pkt->dst_port == q->id.src_port ) {
 				    dir = MATCH_REVERSE;
 				    break;
 			    }
 			}
 		}
 next:
 		prev = q;
 		q = q->next;
 	}
 	if (q == NULL)
 		goto done; /* q = NULL, not found */
 
 	if ( prev != NULL) { /* found and not in front */
 		prev->next = q->next;
 		q->next = ipfw_dyn_v[i];
 		ipfw_dyn_v[i] = q;
 	}
 	if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
 		u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST);
 
 #define BOTH_SYN	(TH_SYN | (TH_SYN << 8))
 #define BOTH_FIN	(TH_FIN | (TH_FIN << 8))
 		q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8);
 		switch (q->state) {
 		case TH_SYN:				/* opening */
 			q->expire = time_uptime + dyn_syn_lifetime;
 			break;
 
 		case BOTH_SYN:			/* move to established */
 		case BOTH_SYN | TH_FIN :	/* one side tries to close */
 		case BOTH_SYN | (TH_FIN << 8) :
  			if (tcp) {
 #define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0)
 			    u_int32_t ack = ntohl(tcp->th_ack);
 			    if (dir == MATCH_FORWARD) {
 				if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd))
 				    q->ack_fwd = ack;
 				else { /* ignore out-of-sequence */
 				    break;
 				}
 			    } else {
 				if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev))
 				    q->ack_rev = ack;
 				else { /* ignore out-of-sequence */
 				    break;
 				}
 			    }
 			}
 			q->expire = time_uptime + dyn_ack_lifetime;
 			break;
 
 		case BOTH_SYN | BOTH_FIN:	/* both sides closed */
 			if (dyn_fin_lifetime >= dyn_keepalive_period)
 				dyn_fin_lifetime = dyn_keepalive_period - 1;
 			q->expire = time_uptime + dyn_fin_lifetime;
 			break;
 
 		default:
 #if 0
 			/*
 			 * reset or some invalid combination, but can also
 			 * occur if we use keep-state the wrong way.
 			 */
 			if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0)
 				printf("invalid state: 0x%x\n", q->state);
 #endif
 			if (dyn_rst_lifetime >= dyn_keepalive_period)
 				dyn_rst_lifetime = dyn_keepalive_period - 1;
 			q->expire = time_uptime + dyn_rst_lifetime;
 			break;
 		}
 	} else if (pkt->proto == IPPROTO_UDP) {
 		q->expire = time_uptime + dyn_udp_lifetime;
 	} else {
 		/* other protocols */
 		q->expire = time_uptime + dyn_short_lifetime;
 	}
 done:
 	if (match_direction)
 		*match_direction = dir;
 	return q;
 }
 
 static ipfw_dyn_rule *
 lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
     struct tcphdr *tcp)
 {
 	ipfw_dyn_rule *q;
 
 	IPFW_DYN_LOCK();
 	q = lookup_dyn_rule_locked(pkt, match_direction, tcp);
 	if (q == NULL)
 		IPFW_DYN_UNLOCK();
 	/* NB: return table locked when q is not NULL */
 	return q;
 }
 
 static void
 realloc_dynamic_table(void)
 {
 	IPFW_DYN_LOCK_ASSERT();
 
 	/*
 	 * Try reallocation, make sure we have a power of 2 and do
 	 * not allow more than 64k entries. In case of overflow,
 	 * default to 1024.
 	 */
 
 	if (dyn_buckets > 65536)
 		dyn_buckets = 1024;
 	if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */
 		dyn_buckets = curr_dyn_buckets; /* reset */
 		return;
 	}
 	curr_dyn_buckets = dyn_buckets;
 	if (ipfw_dyn_v != NULL)
 		free(ipfw_dyn_v, M_IPFW);
 	for (;;) {
 		ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *),
 		       M_IPFW, M_NOWAIT | M_ZERO);
 		if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2)
 			break;
 		curr_dyn_buckets /= 2;
 	}
 }
 
 /**
  * Install state of type 'type' for a dynamic session.
  * The hash table contains two type of rules:
  * - regular rules (O_KEEP_STATE)
  * - rules for sessions with limited number of sess per user
  *   (O_LIMIT). When they are created, the parent is
  *   increased by 1, and decreased on delete. In this case,
  *   the third parameter is the parent rule and not the chain.
  * - "parent" rules for the above (O_LIMIT_PARENT).
  */
 static ipfw_dyn_rule *
 add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule)
 {
 	ipfw_dyn_rule *r;
 	int i;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL ||
 	    (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) {
 		realloc_dynamic_table();
 		if (ipfw_dyn_v == NULL)
 			return NULL; /* failed ! */
 	}
 	i = hash_packet(id);
 
 	r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO);
 	if (r == NULL) {
 		printf ("ipfw: sorry cannot allocate state\n");
 		return NULL;
 	}
 
 	/* increase refcount on parent, and set pointer */
 	if (dyn_type == O_LIMIT) {
 		ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule;
 		if ( parent->dyn_type != O_LIMIT_PARENT)
 			panic("invalid parent");
 		parent->count++;
 		r->parent = parent;
 		rule = parent->rule;
 	}
 
 	r->id = *id;
 	r->expire = time_uptime + dyn_syn_lifetime;
 	r->rule = rule;
 	r->dyn_type = dyn_type;
 	r->pcnt = r->bcnt = 0;
 	r->count = 0;
 
 	r->bucket = i;
 	r->next = ipfw_dyn_v[i];
 	ipfw_dyn_v[i] = r;
 	dyn_count++;
 	DEB(printf("ipfw: add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n",
 	   dyn_type,
 	   (r->id.src_ip), (r->id.src_port),
 	   (r->id.dst_ip), (r->id.dst_port),
 	   dyn_count ); )
 	return r;
 }
 
 /**
  * lookup dynamic parent rule using pkt and rule as search keys.
  * If the lookup fails, then install one.
  */
 static ipfw_dyn_rule *
 lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule)
 {
 	ipfw_dyn_rule *q;
 	int i;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v) {
 		int is_v6 = IS_IP6_FLOW_ID(pkt);
 		i = hash_packet( pkt );
 		for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next)
 			if (q->dyn_type == O_LIMIT_PARENT &&
 			    rule== q->rule &&
 			    pkt->proto == q->id.proto &&
 			    pkt->src_port == q->id.src_port &&
 			    pkt->dst_port == q->id.dst_port &&
 			    (
 				(is_v6 &&
 				 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
 					&(q->id.src_ip6)) &&
 				 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
 					&(q->id.dst_ip6))) ||
 				(!is_v6 &&
 				 pkt->src_ip == q->id.src_ip &&
 				 pkt->dst_ip == q->id.dst_ip)
 			    )
 			) {
 				q->expire = time_uptime + dyn_short_lifetime;
 				DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);)
 				return q;
 			}
 	}
 	return add_dyn_rule(pkt, O_LIMIT_PARENT, rule);
 }
 
 /**
  * Install dynamic state for rule type cmd->o.opcode
  *
  * Returns 1 (failure) if state is not installed because of errors or because
  * session limitations are enforced.
  */
 static int
 install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
     struct ip_fw_args *args, uint32_t tablearg)
 {
 	static int last_log;
 	ipfw_dyn_rule *q;
 	struct in_addr da;
 	char src[48], dst[48];
 
 	src[0] = '\0';
 	dst[0] = '\0';
 
 	DEB(
 	printf("ipfw: %s: type %d 0x%08x %u -> 0x%08x %u\n",
 	    __func__, cmd->o.opcode,
 	    (args->f_id.src_ip), (args->f_id.src_port),
 	    (args->f_id.dst_ip), (args->f_id.dst_port));
 	)
 
 	IPFW_DYN_LOCK();
 
 	q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
 
 	if (q != NULL) {	/* should never occur */
 		if (last_log != time_uptime) {
 			last_log = time_uptime;
 			printf("ipfw: %s: entry already present, done\n",
 			    __func__);
 		}
 		IPFW_DYN_UNLOCK();
 		return (0);
 	}
 
 	if (dyn_count >= dyn_max)
 		/* Run out of slots, try to remove any expired rule. */
 		remove_dyn_rule(NULL, (ipfw_dyn_rule *)1);
 
 	if (dyn_count >= dyn_max) {
 		if (last_log != time_uptime) {
 			last_log = time_uptime;
 			printf("ipfw: %s: Too many dynamic rules\n", __func__);
 		}
 		IPFW_DYN_UNLOCK();
 		return (1);	/* cannot install, notify caller */
 	}
 
 	switch (cmd->o.opcode) {
 	case O_KEEP_STATE:	/* bidir rule */
 		add_dyn_rule(&args->f_id, O_KEEP_STATE, rule);
 		break;
 
 	case O_LIMIT: {		/* limit number of sessions */
 		struct ipfw_flow_id id;
 		ipfw_dyn_rule *parent;
 		uint32_t conn_limit;
 		uint16_t limit_mask = cmd->limit_mask;
 
 		conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ?
 		    tablearg : cmd->conn_limit;
 		  
 		DEB(
 		if (cmd->conn_limit == IP_FW_TABLEARG)
 			printf("ipfw: %s: O_LIMIT rule, conn_limit: %u "
 			    "(tablearg)\n", __func__, conn_limit);
 		else
 			printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n",
 			    __func__, conn_limit);
 		)
 
 		id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0;
 		id.proto = args->f_id.proto;
 		id.addr_type = args->f_id.addr_type;
+		id.fib = M_GETFIB(args->m);
 
 		if (IS_IP6_FLOW_ID (&(args->f_id))) {
 			if (limit_mask & DYN_SRC_ADDR)
 				id.src_ip6 = args->f_id.src_ip6;
 			if (limit_mask & DYN_DST_ADDR)
 				id.dst_ip6 = args->f_id.dst_ip6;
 		} else {
 			if (limit_mask & DYN_SRC_ADDR)
 				id.src_ip = args->f_id.src_ip;
 			if (limit_mask & DYN_DST_ADDR)
 				id.dst_ip = args->f_id.dst_ip;
 		}
 		if (limit_mask & DYN_SRC_PORT)
 			id.src_port = args->f_id.src_port;
 		if (limit_mask & DYN_DST_PORT)
 			id.dst_port = args->f_id.dst_port;
 		if ((parent = lookup_dyn_parent(&id, rule)) == NULL) {
 			printf("ipfw: %s: add parent failed\n", __func__);
 			IPFW_DYN_UNLOCK();
 			return (1);
 		}
 
 		if (parent->count >= conn_limit) {
 			/* See if we can remove some expired rule. */
 			remove_dyn_rule(rule, parent);
 			if (parent->count >= conn_limit) {
 				if (fw_verbose && last_log != time_uptime) {
 					last_log = time_uptime;
 #ifdef INET6
 					/*
 					 * XXX IPv6 flows are not
 					 * supported yet.
 					 */
 					if (IS_IP6_FLOW_ID(&(args->f_id))) {
 						char ip6buf[INET6_ADDRSTRLEN];
 						snprintf(src, sizeof(src),
 						    "[%s]", ip6_sprintf(ip6buf,
 							&args->f_id.src_ip6));
 						snprintf(dst, sizeof(dst),
 						    "[%s]", ip6_sprintf(ip6buf,
 							&args->f_id.dst_ip6));
 					} else
 #endif
 					{
 						da.s_addr =
 						    htonl(args->f_id.src_ip);
 						inet_ntoa_r(da, src);
 						da.s_addr =
 						    htonl(args->f_id.dst_ip);
 						inet_ntoa_r(da, dst);
 					}
 					log(LOG_SECURITY | LOG_DEBUG,
 					    "ipfw: %d %s %s:%u -> %s:%u, %s\n",
 					    parent->rule->rulenum,
 					    "drop session",
 					    src, (args->f_id.src_port),
 					    dst, (args->f_id.dst_port),
 					    "too many entries");
 				}
 				IPFW_DYN_UNLOCK();
 				return (1);
 			}
 		}
 		add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent);
 		break;
 	}
 	default:
 		printf("ipfw: %s: unknown dynamic rule type %u\n",
 		    __func__, cmd->o.opcode);
 		IPFW_DYN_UNLOCK();
 		return (1);
 	}
 
 	/* XXX just set lifetime */
 	lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
 
 	IPFW_DYN_UNLOCK();
 	return (0);
 }
 
 /*
  * Generate a TCP packet, containing either a RST or a keepalive.
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
  * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
  * so that MAC can label the reply appropriately.
  */
 static struct mbuf *
 send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
     u_int32_t ack, int flags)
 {
 	struct mbuf *m;
 	struct ip *ip;
 	struct tcphdr *tcp;
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == 0)
 		return (NULL);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 
+	M_SETFIB(m, id->fib);
 #ifdef MAC
 	if (replyto != NULL)
 		mac_netinet_firewall_reply(replyto, m);
 	else
 		mac_netinet_firewall_send(m);
 #else
 	(void)replyto;		/* don't warn about unused arg */
 #endif
 
 	m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
 	m->m_data += max_linkhdr;
 
 	ip = mtod(m, struct ip *);
 	bzero(ip, m->m_len);
 	tcp = (struct tcphdr *)(ip + 1); /* no IP options */
 	ip->ip_p = IPPROTO_TCP;
 	tcp->th_off = 5;
 	/*
 	 * Assume we are sending a RST (or a keepalive in the reverse
 	 * direction), swap src and destination addresses and ports.
 	 */
 	ip->ip_src.s_addr = htonl(id->dst_ip);
 	ip->ip_dst.s_addr = htonl(id->src_ip);
 	tcp->th_sport = htons(id->dst_port);
 	tcp->th_dport = htons(id->src_port);
 	if (flags & TH_RST) {	/* we are sending a RST */
 		if (flags & TH_ACK) {
 			tcp->th_seq = htonl(ack);
 			tcp->th_ack = htonl(0);
 			tcp->th_flags = TH_RST;
 		} else {
 			if (flags & TH_SYN)
 				seq++;
 			tcp->th_seq = htonl(0);
 			tcp->th_ack = htonl(seq);
 			tcp->th_flags = TH_RST | TH_ACK;
 		}
 	} else {
 		/*
 		 * We are sending a keepalive. flags & TH_SYN determines
 		 * the direction, forward if set, reverse if clear.
 		 * NOTE: seq and ack are always assumed to be correct
 		 * as set by the caller. This may be confusing...
 		 */
 		if (flags & TH_SYN) {
 			/*
 			 * we have to rewrite the correct addresses!
 			 */
 			ip->ip_dst.s_addr = htonl(id->dst_ip);
 			ip->ip_src.s_addr = htonl(id->src_ip);
 			tcp->th_dport = htons(id->dst_port);
 			tcp->th_sport = htons(id->src_port);
 		}
 		tcp->th_seq = htonl(seq);
 		tcp->th_ack = htonl(ack);
 		tcp->th_flags = TH_ACK;
 	}
 	/*
 	 * set ip_len to the payload size so we can compute
 	 * the tcp checksum on the pseudoheader
 	 * XXX check this, could save a couple of words ?
 	 */
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	tcp->th_sum = in_cksum(m, m->m_pkthdr.len);
 	/*
 	 * now fill fields left out earlier
 	 */
 	ip->ip_ttl = ip_defttl;
 	ip->ip_len = m->m_pkthdr.len;
 	m->m_flags |= M_SKIP_FIREWALL;
 	return (m);
 }
 
 /*
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
 send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip)
 {
 
 #if 0
 	/* XXX When ip is not guaranteed to be at mtod() we will
 	 * need to account for this */
 	 * The mbuf will however be thrown away so we can adjust it.
 	 * Remember we did an m_pullup on it already so we
 	 * can make some assumptions about contiguousness.
 	 */
 	if (args->L3offset)
 		m_adj(m, args->L3offset);
 #endif
 	if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
 		/* We need the IP header in host order for icmp_error(). */
 		if (args->eh != NULL) {
 			ip->ip_len = ntohs(ip->ip_len);
 			ip->ip_off = ntohs(ip->ip_off);
 		}
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0) {
 			struct mbuf *m;
 			m = send_pkt(args->m, &(args->f_id),
 				ntohl(tcp->th_seq), ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 			if (m != NULL)
 				ip_output(m, NULL, NULL, 0, NULL, NULL);
 		}
 		m_freem(args->m);
 	} else
 		m_freem(args->m);
 	args->m = NULL;
 }
 
 /**
  *
  * Given an ip_fw *, lookup_next_rule will return a pointer
  * to the next rule, which can be either the jump
  * target (for skipto instructions) or the next one in the list (in
  * all other cases including a missing jump target).
  * The result is also written in the "next_rule" field of the rule.
  * Backward jumps are not allowed, so start looking from the next
  * rule...
  *
  * This never returns NULL -- in case we do not have an exact match,
  * the next rule is returned. When the ruleset is changed,
  * pointers are flushed so we are always correct.
  */
 
 static struct ip_fw *
 lookup_next_rule(struct ip_fw *me)
 {
 	struct ip_fw *rule = NULL;
 	ipfw_insn *cmd;
 
 	/* look for action, in case it is a skipto */
 	cmd = ACTION_PTR(me);
 	if (cmd->opcode == O_LOG)
 		cmd += F_LEN(cmd);
 	if (cmd->opcode == O_ALTQ)
 		cmd += F_LEN(cmd);
 	if (cmd->opcode == O_TAG)
 		cmd += F_LEN(cmd);
 	if ( cmd->opcode == O_SKIPTO )
 		for (rule = me->next; rule ; rule = rule->next)
 			if (rule->rulenum >= cmd->arg1)
 				break;
 	if (rule == NULL)			/* failure or not a skipto */
 		rule = me->next;
 	me->next_rule = rule;
 	return rule;
 }
 
 static int
 add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
     uint8_t mlen, uint32_t value)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl];
 	ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO);
 	if (ent == NULL)
 		return (ENOMEM);
 	ent->value = value;
 	ent->addr.sin_len = ent->mask.sin_len = 8;
 	ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
 	ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
 	IPFW_WLOCK(&layer3_chain);
 	if (rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent) ==
 	    NULL) {
 		IPFW_WUNLOCK(&layer3_chain);
 		free(ent, M_IPFW_TBL);
 		return (EEXIST);
 	}
 	IPFW_WUNLOCK(&layer3_chain);
 	return (0);
 }
 
 static int
 del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
     uint8_t mlen)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
 	struct sockaddr_in sa, mask;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl];
 	sa.sin_len = mask.sin_len = 8;
 	mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
 	sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
 	IPFW_WLOCK(ch);
 	ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh);
 	if (ent == NULL) {
 		IPFW_WUNLOCK(ch);
 		return (ESRCH);
 	}
 	IPFW_WUNLOCK(ch);
 	free(ent, M_IPFW_TBL);
 	return (0);
 }
 
 static int
 flush_table_entry(struct radix_node *rn, void *arg)
 {
 	struct radix_node_head * const rnh = arg;
 	struct table_entry *ent;
 
 	ent = (struct table_entry *)
 	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
 	if (ent != NULL)
 		free(ent, M_IPFW_TBL);
 	return (0);
 }
 
 static int
 flush_table(struct ip_fw_chain *ch, uint16_t tbl)
 {
 	struct radix_node_head *rnh;
 
 	IPFW_WLOCK_ASSERT(ch);
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl];
 	KASSERT(rnh != NULL, ("NULL IPFW table"));
 	rnh->rnh_walktree(rnh, flush_table_entry, rnh);
 	return (0);
 }
 
 static void
 flush_tables(struct ip_fw_chain *ch)
 {
 	uint16_t tbl;
 
 	IPFW_WLOCK_ASSERT(ch);
 
 	for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++)
 		flush_table(ch, tbl);
 }
 
 static int
 init_tables(struct ip_fw_chain *ch)
 { 
 	int i;
 	uint16_t j;
 
 	for (i = 0; i < IPFW_TABLES_MAX; i++) {
 		if (!rn_inithead((void **)&ch->tables[i], 32)) {
 			for (j = 0; j < i; j++) {
 				(void) flush_table(ch, j);
 			}
 			return (ENOMEM);
 		}
 	}
 	return (0);
 }
 
 static int
 lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
     uint32_t *val)
 {
 	struct radix_node_head *rnh;
 	struct table_entry *ent;
 	struct sockaddr_in sa;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (0);
 	rnh = ch->tables[tbl];
 	sa.sin_len = 8;
 	sa.sin_addr.s_addr = addr;
 	ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
 	if (ent != NULL) {
 		*val = ent->value;
 		return (1);
 	}
 	return (0);
 }
 
 static int
 count_table_entry(struct radix_node *rn, void *arg)
 {
 	u_int32_t * const cnt = arg;
 
 	(*cnt)++;
 	return (0);
 }
 
 static int
 count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
 {
 	struct radix_node_head *rnh;
 
 	if (tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl];
 	*cnt = 0;
 	rnh->rnh_walktree(rnh, count_table_entry, cnt);
 	return (0);
 }
 
 static int
 dump_table_entry(struct radix_node *rn, void *arg)
 {
 	struct table_entry * const n = (struct table_entry *)rn;
 	ipfw_table * const tbl = arg;
 	ipfw_table_entry *ent;
 
 	if (tbl->cnt == tbl->size)
 		return (1);
 	ent = &tbl->ent[tbl->cnt];
 	ent->tbl = tbl->tbl;
 	if (in_nullhost(n->mask.sin_addr))
 		ent->masklen = 0;
 	else
 		ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
 	ent->addr = n->addr.sin_addr.s_addr;
 	ent->value = n->value;
 	tbl->cnt++;
 	return (0);
 }
 
 static int
 dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
 {
 	struct radix_node_head *rnh;
 
 	if (tbl->tbl >= IPFW_TABLES_MAX)
 		return (EINVAL);
 	rnh = ch->tables[tbl->tbl];
 	tbl->cnt = 0;
 	rnh->rnh_walktree(rnh, dump_table_entry, tbl);
 	return (0);
 }
 
 static void
 fill_ugid_cache(struct inpcb *inp, struct ip_fw_ugid *ugp)
 {
 	struct ucred *cr;
 
 	if (inp->inp_socket != NULL) {
 		cr = inp->inp_socket->so_cred;
 		ugp->fw_prid = jailed(cr) ?
 		    cr->cr_prison->pr_id : -1;
 		ugp->fw_uid = cr->cr_uid;
 		ugp->fw_ngroups = cr->cr_ngroups;
 		bcopy(cr->cr_groups, ugp->fw_groups,
 		    sizeof(ugp->fw_groups));
 	}
 }
 
 static int
 check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
     struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
     u_int16_t src_port, struct ip_fw_ugid *ugp, int *lookup,
     struct inpcb *inp)
 {
 	struct inpcbinfo *pi;
 	int wildcard;
 	struct inpcb *pcb;
 	int match;
 	gid_t *gp;
 
 	/*
 	 * Check to see if the UDP or TCP stack supplied us with
 	 * the PCB. If so, rather then holding a lock and looking
 	 * up the PCB, we can use the one that was supplied.
 	 */
 	if (inp && *lookup == 0) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_socket != NULL) {
 			fill_ugid_cache(inp, ugp);
 			*lookup = 1;
 		}
 	}
 	/*
 	 * If we have already been here and the packet has no
 	 * PCB entry associated with it, then we can safely
 	 * assume that this is a no match.
 	 */
 	if (*lookup == -1)
 		return (0);
 	if (proto == IPPROTO_TCP) {
 		wildcard = 0;
 		pi = &tcbinfo;
 	} else if (proto == IPPROTO_UDP) {
 		wildcard = INPLOOKUP_WILDCARD;
 		pi = &udbinfo;
 	} else
 		return 0;
 	match = 0;
 	if (*lookup == 0) {
 		INP_INFO_RLOCK(pi);
 		pcb =  (oif) ?
 			in_pcblookup_hash(pi,
 				dst_ip, htons(dst_port),
 				src_ip, htons(src_port),
 				wildcard, oif) :
 			in_pcblookup_hash(pi,
 				src_ip, htons(src_port),
 				dst_ip, htons(dst_port),
 				wildcard, NULL);
 		if (pcb != NULL) {
 			INP_RLOCK(pcb);
 			if (pcb->inp_socket != NULL) {
 				fill_ugid_cache(pcb, ugp);
 				*lookup = 1;
 			}
 			INP_RUNLOCK(pcb);
 		}
 		INP_INFO_RUNLOCK(pi);
 		if (*lookup == 0) {
 			/*
 			 * If the lookup did not yield any results, there
 			 * is no sense in coming back and trying again. So
 			 * we can set lookup to -1 and ensure that we wont
 			 * bother the pcb system again.
 			 */
 			*lookup = -1;
 			return (0);
 		}
 	} 
 	if (insn->o.opcode == O_UID)
 		match = (ugp->fw_uid == (uid_t)insn->d[0]);
 	else if (insn->o.opcode == O_GID) {
 		for (gp = ugp->fw_groups;
 			gp < &ugp->fw_groups[ugp->fw_ngroups]; gp++)
 			if (*gp == (gid_t)insn->d[0]) {
 				match = 1;
 				break;
 			}
 	} else if (insn->o.opcode == O_JAIL)
 		match = (ugp->fw_prid == (int)insn->d[0]);
 	return match;
 }
 
 /*
  * The main check routine for the firewall.
  *
  * All arguments are in args so we can modify them and return them
  * back to the caller.
  *
  * Parameters:
  *
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->eh (in)	Mac header if present, or NULL for layer3 packet.
  *	args->L3offset	Number of bytes bypassed if we came from L2.
  *			e.g. often sizeof(eh)  ** NOTYET **
  *	args->oif	Outgoing interface, or NULL if packet is incoming.
  *		The incoming interface is in the mbuf. (in)
  *	args->divert_rule (in/out)
  *		Skip up to the first rule past this rule number;
  *		upon return, non-zero port number for divert or tee.
  *
  *	args->rule	Pointer to the last matching rule (in/out)
  *	args->next_hop	Socket we are forwarding to (out).
  *	args->f_id	Addresses grabbed from the packet (out)
  * 	args->cookie	a cookie depending on rule action
  *
  * Return value:
  *
  *	IP_FW_PASS	the packet must be accepted
  *	IP_FW_DENY	the packet must be dropped
  *	IP_FW_DIVERT	divert packet, port in m_tag
  *	IP_FW_TEE	tee packet, port in m_tag
  *	IP_FW_DUMMYNET	to dummynet, pipe in args->cookie
  *	IP_FW_NETGRAPH	into netgraph, cookie args->cookie
  *
  */
 int
 ipfw_chk(struct ip_fw_args *args)
 {
 	/*
 	 * Local variables holding state during the processing of a packet:
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
 	 * are documented here. Should you change them, please check
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
 	 * args->eh	The MAC header. It is non-null for a layer2
 	 *	packet, it is NULL for a layer-3 packet.
 	 * **notyet**
 	 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
 	 * ip	is the beginning of the ip(4 or 6) header.
 	 *	Calculated by adding the L3offset to the start of data.
 	 *	(Until we start using L3offset, the packet is
 	 *	supposed to start with the ip header).
 	 */
 	struct mbuf *m = args->m;
 	struct ip *ip = mtod(m, struct ip *);
 
 	/*
 	 * For rules which contain uid/gid or jail constraints, cache
 	 * a copy of the users credentials after the pcb lookup has been
 	 * executed. This will speed up the processing of rules with
 	 * these types of constraints, as well as decrease contention
 	 * on pcb related locks.
 	 */
 	struct ip_fw_ugid fw_ugid_cache;
 	int ugid_lookup = 0;
 
 	/*
 	 * divinput_flags	If non-zero, set to the IP_FW_DIVERT_*_FLAG
 	 *	associated with a packet input on a divert socket.  This
 	 *	will allow to distinguish traffic and its direction when
 	 *	it originates from a divert socket.
 	 */
 	u_int divinput_flags = 0;
 
 	/*
 	 * oif | args->oif	If NULL, ipfw_chk has been called on the
 	 *	inbound path (ether_input, ip_input).
 	 *	If non-NULL, ipfw_chk has been called on the outbound path
 	 *	(ether_output, ip_output).
 	 */
 	struct ifnet *oif = args->oif;
 
 	struct ip_fw *f = NULL;		/* matching rule */
 	int retval = 0;
 
 	/*
 	 * hlen	The length of the IP header.
 	 */
 	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
 
 	/*
 	 * offset	The offset of a fragment. offset != 0 means that
 	 *	we have a fragment at this offset of an IPv4 packet.
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 *	For IPv6 offset == 0 means there is no Fragment Header. 
 	 *	If offset != 0 for IPv6 always use correct mask to
 	 *	get the correct offset because we add IP6F_MORE_FRAG
 	 *	to be able to dectect the first fragment which would
 	 *	otherwise have offset = 0.
 	 */
 	u_short offset = 0;
 
 	/*
 	 * Local copies of addresses. They are only valid if we have
 	 * an IP packet.
 	 *
 	 * proto	The protocol. Set to 0 for non-ip packets,
 	 *	or to the protocol read from the packet otherwise.
 	 *	proto != 0 means that we have an IPv4 packet.
 	 *
 	 * src_port, dst_port	port numbers, in HOST format. Only
 	 *	valid for TCP and UDP packets.
 	 *
 	 * src_ip, dst_ip	ip addresses, in NETWORK format.
 	 *	Only valid for IPv4 packets.
 	 */
 	u_int8_t proto;
 	u_int16_t src_port = 0, dst_port = 0;	/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	u_int16_t ip_len=0;
 	int pktlen;
 	u_int16_t	etype = 0;	/* Host order stored ether type */
 
 	/*
 	 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
 	 * 	MATCH_NONE when checked and not matched (q = NULL),
 	 *	MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
 	 */
 	int dyn_dir = MATCH_UNKNOWN;
 	ipfw_dyn_rule *q = NULL;
 	struct ip_fw_chain *chain = &layer3_chain;
 	struct m_tag *mtag;
 
 	/*
 	 * We store in ulp a pointer to the upper layer protocol header.
 	 * In the ipv4 case this is easy to determine from the header,
 	 * but for ipv6 we might have some additional headers in the middle.
 	 * ulp is NULL if not found.
 	 */
 	void *ulp = NULL;		/* upper layer protocol pointer. */
 	/* XXX ipv6 variables */
 	int is_ipv6 = 0;
 	u_int16_t ext_hd = 0;	/* bits vector for extension header filtering */
 	/* end of ipv6 variables */
 	int is_ipv4 = 0;
 
 	if (m->m_flags & M_SKIP_FIREWALL)
 		return (IP_FW_PASS);	/* accept */
 
 	pktlen = m->m_pkthdr.len;
+	args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
 	proto = args->f_id.proto = 0;	/* mark f_id invalid */
 		/* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
 
 /*
  * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
  * then it sets p to point at the offset "len" in the mbuf. WARNING: the
  * pointer might become stale after other pullups (but we never use it
  * this way).
  */
 #define PULLUP_TO(len, p, T)						\
 do {									\
 	int x = (len) + sizeof(T);					\
 	if ((m)->m_len < x) {						\
 		args->m = m = m_pullup(m, x);				\
 		if (m == NULL)						\
 			goto pullup_failed;				\
 	}								\
 	p = (mtod(m, char *) + (len));					\
 } while (0)
 
 	/*
 	 * if we have an ether header,
 	 */
 	if (args->eh)
 		etype = ntohs(args->eh->ether_type);
 
 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
 	    (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 		is_ipv6 = 1;
 		args->f_id.addr_type = 6;
 		hlen = sizeof(struct ip6_hdr);
 		proto = ip6->ip6_nxt;
 
 		/* Search extension headers to find upper layer protocols */
 		while (ulp == NULL) {
 			switch (proto) {
 			case IPPROTO_ICMPV6:
 				PULLUP_TO(hlen, ulp, struct icmp6_hdr);
 				args->f_id.flags = ICMP6(ulp)->icmp6_type;
 				break;
 
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				args->f_id.flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_SCTP:
 				PULLUP_TO(hlen, ulp, struct sctphdr);
 				src_port = SCTP(ulp)->src_port;
 				dst_port = SCTP(ulp)->dest_port;
 				break;
 
 			case IPPROTO_UDP:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_HOPOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_HOPOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ROUTING:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 				switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
 				case 0:
 					ext_hd |= EXT_RTHDR0;
 					break;
 				case 2:
 					ext_hd |= EXT_RTHDR2;
 					break;
 				default:
 					printf("IPFW2: IPV6 - Unknown Routing "
 					    "Header type(%d)\n",
 					    ((struct ip6_rthdr *)ulp)->ip6r_type);
 					if (fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				ext_hd |= EXT_ROUTING;
 				hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 				proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_FRAGMENT:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_frag);
 				ext_hd |= EXT_FRAGMENT;
 				hlen += sizeof (struct ip6_frag);
 				proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
 				offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_OFF_MASK;
 				/* Add IP6F_MORE_FRAG for offset of first
 				 * fragment to be != 0. */
 				offset |= ((struct ip6_frag *)ulp)->ip6f_offlg &
 					IP6F_MORE_FRAG;
 				if (offset == 0) {
 					printf("IPFW2: IPV6 - Invalid Fragment "
 					    "Header\n");
 					if (fw_deny_unknown_exthdrs)
 					    return (IP_FW_DENY);
 					break;
 				}
 				args->f_id.frag_id6 =
 				    ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
 				ulp = NULL;
 				break;
 
 			case IPPROTO_DSTOPTS:	/* RFC 2460 */
 				PULLUP_TO(hlen, ulp, struct ip6_hbh);
 				ext_hd |= EXT_DSTOPTS;
 				hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
 				proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_AH:	/* RFC 2402 */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				ext_hd |= EXT_AH;
 				hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
 				proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
 				ulp = NULL;
 				break;
 
 			case IPPROTO_ESP:	/* RFC 2406 */
 				PULLUP_TO(hlen, ulp, uint32_t);	/* SPI, Seq# */
 				/* Anything past Seq# is variable length and
 				 * data past this ext. header is encrypted. */
 				ext_hd |= EXT_ESP;
 				break;
 
 			case IPPROTO_NONE:	/* RFC 2460 */
 				/*
 				 * Packet ends here, and IPv6 header has
 				 * already been pulled up. If ip6e_len!=0
 				 * then octets must be ignored.
 				 */
 				ulp = ip; /* non-NULL to get out of loop. */
 				break;
 
 			case IPPROTO_OSPFIGP:
 				/* XXX OSPF header check? */
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 
 			case IPPROTO_PIM:
 				/* XXX PIM header check? */
 				PULLUP_TO(hlen, ulp, struct pim);
 				break;
 
 			case IPPROTO_CARP:
 				PULLUP_TO(hlen, ulp, struct carp_header);
 				if (((struct carp_header *)ulp)->carp_version !=
 				    CARP_VERSION) 
 					return (IP_FW_DENY);
 				if (((struct carp_header *)ulp)->carp_type !=
 				    CARP_ADVERTISEMENT) 
 					return (IP_FW_DENY);
 				break;
 
 			case IPPROTO_IPV6:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip6_hdr);
 				break;
 
 			case IPPROTO_IPV4:	/* RFC 2893 */
 				PULLUP_TO(hlen, ulp, struct ip);
 				break;
 
 			default:
 				printf("IPFW2: IPV6 - Unknown Extension "
 				    "Header(%d), ext_hd=%x\n", proto, ext_hd);
 				if (fw_deny_unknown_exthdrs)
 				    return (IP_FW_DENY);
 				PULLUP_TO(hlen, ulp, struct ip6_ext);
 				break;
 			} /*switch */
 		}
 		ip = mtod(m, struct ip *);
 		ip6 = (struct ip6_hdr *)ip;
 		args->f_id.src_ip6 = ip6->ip6_src;
 		args->f_id.dst_ip6 = ip6->ip6_dst;
 		args->f_id.src_ip = 0;
 		args->f_id.dst_ip = 0;
 		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
 	} else if (pktlen >= sizeof(struct ip) &&
 	    (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
 	    	is_ipv4 = 1;
 		hlen = ip->ip_hl << 2;
 		args->f_id.addr_type = 4;
 
 		/*
 		 * Collect parameters into local variables for faster matching.
 		 */
 		proto = ip->ip_p;
 		src_ip = ip->ip_src;
 		dst_ip = ip->ip_dst;
 		if (args->eh != NULL) { /* layer 2 packets are as on the wire */
 			offset = ntohs(ip->ip_off) & IP_OFFMASK;
 			ip_len = ntohs(ip->ip_len);
 		} else {
 			offset = ip->ip_off & IP_OFFMASK;
 			ip_len = ip->ip_len;
 		}
 		pktlen = ip_len < pktlen ? ip_len : pktlen;
 
 		if (offset == 0) {
 			switch (proto) {
 			case IPPROTO_TCP:
 				PULLUP_TO(hlen, ulp, struct tcphdr);
 				dst_port = TCP(ulp)->th_dport;
 				src_port = TCP(ulp)->th_sport;
 				args->f_id.flags = TCP(ulp)->th_flags;
 				break;
 
 			case IPPROTO_UDP:
 				PULLUP_TO(hlen, ulp, struct udphdr);
 				dst_port = UDP(ulp)->uh_dport;
 				src_port = UDP(ulp)->uh_sport;
 				break;
 
 			case IPPROTO_ICMP:
 				PULLUP_TO(hlen, ulp, struct icmphdr);
 				args->f_id.flags = ICMP(ulp)->icmp_type;
 				break;
 
 			default:
 				break;
 			}
 		}
 
 		ip = mtod(m, struct ip *);
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	}
 #undef PULLUP_TO
 	if (proto) { /* we may have port numbers, store them */
 		args->f_id.proto = proto;
 		args->f_id.src_port = src_port = ntohs(src_port);
 		args->f_id.dst_port = dst_port = ntohs(dst_port);
 	}
 
 	IPFW_RLOCK(chain);
 	mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
 	if (args->rule) {
 		/*
 		 * Packet has already been tagged. Look for the next rule
 		 * to restart processing.
 		 *
 		 * If fw_one_pass != 0 then just accept it.
 		 * XXX should not happen here, but optimized out in
 		 * the caller.
 		 */
 		if (fw_one_pass) {
 			IPFW_RUNLOCK(chain);
 			return (IP_FW_PASS);
 		}
 
 		f = args->rule->next_rule;
 		if (f == NULL)
 			f = lookup_next_rule(args->rule);
 	} else {
 		/*
 		 * Find the starting rule. It can be either the first
 		 * one, or the one after divert_rule if asked so.
 		 */
 		int skipto = mtag ? divert_cookie(mtag) : 0;
 
 		f = chain->rules;
 		if (args->eh == NULL && skipto != 0) {
 			if (skipto >= IPFW_DEFAULT_RULE) {
 				IPFW_RUNLOCK(chain);
 				return (IP_FW_DENY); /* invalid */
 			}
 			while (f && f->rulenum <= skipto)
 				f = f->next;
 			if (f == NULL) {	/* drop packet */
 				IPFW_RUNLOCK(chain);
 				return (IP_FW_DENY);
 			}
 		}
 	}
 	/* reset divert rule to avoid confusion later */
 	if (mtag) {
 		divinput_flags = divert_info(mtag) &
 		    (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG);
 		m_tag_delete(m, mtag);
 	}
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
 	 */
 	for (; f; f = f->next) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
 
 again:
 		if (set_disable & (1 << f->set) )
 			continue;
 
 		skip_or = 0;
 		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
 		    l -= cmdlen, cmd += cmdlen) {
 			int match;
 
 			/*
 			 * check_body is a jump target used when we find a
 			 * CHECK_STATE, and need to jump to the body of
 			 * the target rule.
 			 */
 
 check_body:
 			cmdlen = F_LEN(cmd);
 			/*
 			 * An OR block (insn_1 || .. || insn_n) has the
 			 * F_OR bit set in all but the last instruction.
 			 * The first match will set "skip_or", and cause
 			 * the following instructions to be skipped until
 			 * past the one with the F_OR bit clear.
 			 */
 			if (skip_or) {		/* skip this instruction */
 				if ((cmd->len & F_OR) == 0)
 					skip_or = 0;	/* next one is good */
 				continue;
 			}
 			match = 0; /* set to 1 if we succeed */
 
 			switch (cmd->opcode) {
 			/*
 			 * The first set of opcodes compares the packet's
 			 * fields with some pattern, setting 'match' if a
 			 * match is found. At the end of the loop there is
 			 * logic to deal with F_NOT and F_OR flags associated
 			 * with the opcode.
 			 */
 			case O_NOP:
 				match = 1;
 				break;
 
 			case O_FORWARD_MAC:
 				printf("ipfw: opcode %d unimplemented\n",
 				    cmd->opcode);
 				break;
 
 			case O_GID:
 			case O_UID:
 			case O_JAIL:
 				/*
 				 * We only check offset == 0 && proto != 0,
 				 * as this ensures that we have a
 				 * packet with the ports info.
 				 */
 				if (offset!=0)
 					break;
 				if (is_ipv6) /* XXX to be fixed later */
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP)
 					match = check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    proto, oif,
 						    dst_ip, dst_port,
 						    src_ip, src_port, &fw_ugid_cache,
 						    &ugid_lookup, args->inp);
 				break;
 
 			case O_RECV:
 				match = iface_match(m->m_pkthdr.rcvif,
 				    (ipfw_insn_if *)cmd);
 				break;
 
 			case O_XMIT:
 				match = iface_match(oif, (ipfw_insn_if *)cmd);
 				break;
 
 			case O_VIA:
 				match = iface_match(oif ? oif :
 				    m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
 				break;
 
 			case O_MACADDR2:
 				if (args->eh != NULL) {	/* have MAC header */
 					u_int32_t *want = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
 					u_int32_t *hdr = (u_int32_t *)args->eh;
 
 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
 					      want[1] == (hdr[1] & mask[1]) &&
 					      want[2] == (hdr[2] & mask[2]) );
 				}
 				break;
 
 			case O_MAC_TYPE:
 				if (args->eh != NULL) {
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (etype >= p[0] &&
 						    etype <= p[1]);
 				}
 				break;
 
 			case O_FRAG:
 				match = (offset != 0);
 				break;
 
 			case O_IN:	/* "out" is "not in" */
 				match = (oif == NULL);
 				break;
 
 			case O_LAYER2:
 				match = (args->eh != NULL);
 				break;
 
 			case O_DIVERTED:
 				match = (cmd->arg1 & 1 && divinput_flags &
 				    IP_FW_DIVERT_LOOPBACK_FLAG) ||
 					(cmd->arg1 & 2 && divinput_flags &
 				    IP_FW_DIVERT_OUTPUT_FLAG);
 				break;
 
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
 				 * check of "proto" only suffices.
 				 */
 				match = (proto == cmd->arg1);
 				break;
 
 			case O_IP_SRC:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    src_ip.s_addr);
 				break;
 
 			case O_IP_SRC_LOOKUP:
 			case O_IP_DST_LOOKUP:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_LOOKUP) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t v;
 
 				    match = lookup_table(chain, cmd->arg1, a,
 					&v);
 				    if (!match)
 					break;
 				    if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
 					match =
 					    ((ipfw_insn_u32 *)cmd)->d[0] == v;
 				    else
 					tablearg = v;
 				}
 				break;
 
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (is_ipv4) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_MASK) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
 				    int i = cmdlen-1;
 
 				    for (; !match && i>0; i-= 2, p+= 2)
 					match = (p[0] == (a & p[1]));
 				}
 				break;
 
 			case O_IP_SRC_ME:
 				if (is_ipv4) {
 					struct ifnet *tif;
 
 					INADDR_TO_IFP(src_ip, tif);
 					match = (tif != NULL);
 				}
 				break;
 
 			case O_IP_DST_SET:
 			case O_IP_SRC_SET:
 				if (is_ipv4) {
 					u_int32_t *d = (u_int32_t *)(cmd+1);
 					u_int32_t addr =
 					    cmd->opcode == O_IP_DST_SET ?
 						args->f_id.dst_ip :
 						args->f_id.src_ip;
 
 					    if (addr < d[0])
 						    break;
 					    addr -= d[0]; /* subtract base */
 					    match = (addr < cmd->arg1) &&
 						( d[ 1 + (addr>>5)] &
 						  (1<<(addr & 0x1f)) );
 				}
 				break;
 
 			case O_IP_DST:
 				match = is_ipv4 &&
 				    (((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    dst_ip.s_addr);
 				break;
 
 			case O_IP_DST_ME:
 				if (is_ipv4) {
 					struct ifnet *tif;
 
 					INADDR_TO_IFP(dst_ip, tif);
 					match = (tif != NULL);
 				}
 				break;
 
 			case O_IP_SRCPORT:
 			case O_IP_DSTPORT:
 				/*
 				 * offset == 0 && proto != 0 is enough
 				 * to guarantee that we have a
 				 * packet with port info.
 				 */
 				if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
 				    && offset == 0) {
 					u_int16_t x =
 					    (cmd->opcode == O_IP_SRCPORT) ?
 						src_port : dst_port ;
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (x>=p[0] && x<=p[1]);
 				}
 				break;
 
 			case O_ICMPTYPE:
 				match = (offset == 0 && proto==IPPROTO_ICMP &&
 				    icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
 				break;
 
 #ifdef INET6
 			case O_ICMP6TYPE:
 				match = is_ipv6 && offset == 0 &&
 				    proto==IPPROTO_ICMPV6 &&
 				    icmp6type_match(
 					ICMP6(ulp)->icmp6_type,
 					(ipfw_insn_u32 *)cmd);
 				break;
 #endif /* INET6 */
 
 			case O_IPOPT:
 				match = (is_ipv4 &&
 				    ipopts_match(ip, cmd) );
 				break;
 
 			case O_IPVER:
 				match = (is_ipv4 &&
 				    cmd->arg1 == ip->ip_v);
 				break;
 
 			case O_IPID:
 			case O_IPLEN:
 			case O_IPTTL:
 				if (is_ipv4) {	/* only for IP packets */
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    if (cmd->opcode == O_IPLEN)
 					x = ip_len;
 				    else if (cmd->opcode == O_IPTTL)
 					x = ip->ip_ttl;
 				    else /* must be IPID */
 					x = ntohs(ip->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_IPPRECEDENCE:
 				match = (is_ipv4 &&
 				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (is_ipv4 &&
 				    flags_match(cmd, ip->ip_tos));
 				break;
 
 			case O_TCPDATALEN:
 				if (proto == IPPROTO_TCP && offset == 0) {
 				    struct tcphdr *tcp;
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    tcp = TCP(ulp);
 				    x = ip_len -
 					((ip->ip_hl + tcp->th_off) << 2);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd, TCP(ulp)->th_flags));
 				break;
 
 			case O_TCPOPTS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    tcpopts_match(TCP(ulp), cmd));
 				break;
 
 			case O_TCPSEQ:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_seq);
 				break;
 
 			case O_TCPACK:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					TCP(ulp)->th_ack);
 				break;
 
 			case O_TCPWIN:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    cmd->arg1 == TCP(ulp)->th_win);
 				break;
 
 			case O_ESTAB:
 				/* reject packets which have SYN only */
 				/* XXX should i also check for TH_ACK ? */
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    (TCP(ulp)->th_flags &
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
 			case O_ALTQ: {
 				struct pf_mtag *at;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
 				match = 1;
 				at = pf_find_mtag(m);
 				if (at != NULL && at->qid != 0)
 					break;
 				at = pf_get_mtag(m);
 				if (at == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
 					 */
 					break;
 				}
 				at->qid = altq->qid;
 				if (is_ipv4)
 					at->af = AF_INET;
 				else
 					at->af = AF_LINK;
 				at->hdr = ip;
 				break;
 			}
 
 			case O_LOG:
 				if (fw_verbose)
 					ipfw_log(f, hlen, args, m,
 					    oif, offset, tablearg, ip);
 				match = 1;
 				break;
 
 			case O_PROB:
 				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				/* Outgoing packets automatically pass/match */
 				match = ((oif != NULL) ||
 				    (m->m_pkthdr.rcvif == NULL) ||
 				    (
 #ifdef INET6
 				    is_ipv6 ?
 					verify_path6(&(args->f_id.src_ip6),
 					    m->m_pkthdr.rcvif) :
 #endif
-				    verify_path(src_ip, m->m_pkthdr.rcvif)));
+				    verify_path(src_ip, m->m_pkthdr.rcvif,
+				        args->f_id.fib)));
 				break;
 
 			case O_VERSRCREACH:
 				/* Outgoing packets automatically pass/match */
 				match = (hlen > 0 && ((oif != NULL) ||
 #ifdef INET6
 				    is_ipv6 ?
 				        verify_path6(&(args->f_id.src_ip6),
 				            NULL) :
 #endif
-				    verify_path(src_ip, NULL)));
+				    verify_path(src_ip, NULL, args->f_id.fib)));
 				break;
 
 			case O_ANTISPOOF:
 				/* Outgoing packets automatically pass/match */
 				if (oif == NULL && hlen > 0 &&
 				    (  (is_ipv4 && in_localaddr(src_ip))
 #ifdef INET6
 				    || (is_ipv6 &&
 				        in6_localaddr(&(args->f_id.src_ip6)))
 #endif
 				    ))
 					match =
 #ifdef INET6
 					    is_ipv6 ? verify_path6(
 					        &(args->f_id.src_ip6),
 					        m->m_pkthdr.rcvif) :
 #endif
 					    verify_path(src_ip,
-					        m->m_pkthdr.rcvif);
+					    	m->m_pkthdr.rcvif,
+					        args->f_id.fib);
 				else
 					match = 1;
 				break;
 
 			case O_IPSEC:
 #ifdef IPSEC
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 #endif
 				/* otherwise no match */
 				break;
 
 #ifdef INET6
 			case O_IP6_SRC:
 				match = is_ipv6 &&
 				    IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 
 			case O_IP6_DST:
 				match = is_ipv6 &&
 				IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
 				    &((ipfw_insn_ip6 *)cmd)->addr6);
 				break;
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 				if (is_ipv6) {
 					int i = cmdlen - 1;
 					struct in6_addr p;
 					struct in6_addr *d =
 					    &((ipfw_insn_ip6 *)cmd)->addr6;
 
 					for (; !match && i > 0; d += 2,
 					    i -= F_INSN_SIZE(struct in6_addr)
 					    * 2) {
 						p = (cmd->opcode ==
 						    O_IP6_SRC_MASK) ?
 						    args->f_id.src_ip6:
 						    args->f_id.dst_ip6;
 						APPLY_MASK(&p, &d[1]);
 						match =
 						    IN6_ARE_ADDR_EQUAL(&d[0],
 						    &p);
 					}
 				}
 				break;
 
 			case O_IP6_SRC_ME:
 				match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
 				break;
 
 			case O_IP6_DST_ME:
 				match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
 				break;
 
 			case O_FLOW6ID:
 				match = is_ipv6 &&
 				    flow6id_match(args->f_id.flow_id6,
 				    (ipfw_insn_u32 *) cmd);
 				break;
 
 			case O_EXT_HDR:
 				match = is_ipv6 &&
 				    (ext_hd & ((ipfw_insn *) cmd)->arg1);
 				break;
 
 			case O_IP6:
 				match = is_ipv6;
 				break;
 #endif
 
 			case O_IP4:
 				match = is_ipv4;
 				break;
 
 			case O_TAG: {
 				uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
 				    tablearg : cmd->arg1;
 
 				/* Packet is already tagged with this tag? */
 				mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
 
 				/* We have `untag' action when F_NOT flag is
 				 * present. And we must remove this mtag from
 				 * mbuf and reset `match' to zero (`match' will
 				 * be inversed later).
 				 * Otherwise we should allocate new mtag and
 				 * push it into mbuf.
 				 */
 				if (cmd->len & F_NOT) { /* `untag' action */
 					if (mtag != NULL)
 						m_tag_delete(m, mtag);
 				} else if (mtag == NULL) {
 					if ((mtag = m_tag_alloc(MTAG_IPFW,
 					    tag, 0, M_NOWAIT)) != NULL)
 						m_tag_prepend(m, mtag);
 				}
 				match = (cmd->len & F_NOT) ? 0: 1;
 				break;
 			}
 
+			case O_FIB: /* try match the specified fib */
+				if (args->f_id.fib == cmd->arg1)
+					match = 1;
+				break;
+
 			case O_TAGGED: {
 				uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
 				    tablearg : cmd->arg1;
 
 				if (cmdlen == 1) {
 					match = m_tag_locate(m, MTAG_IPFW,
 					    tag, NULL) != NULL;
 					break;
 				}
 
 				/* we have ranges */
 				for (mtag = m_tag_first(m);
 				    mtag != NULL && !match;
 				    mtag = m_tag_next(m, mtag)) {
 					uint16_t *p;
 					int i;
 
 					if (mtag->m_tag_cookie != MTAG_IPFW)
 						continue;
 
 					p = ((ipfw_insn_u16 *)cmd)->ports;
 					i = cmdlen - 1;
 					for(; !match && i > 0; i--, p += 2)
 						match =
 						    mtag->m_tag_id >= p[0] &&
 						    mtag->m_tag_id <= p[1];
 				}
 				break;
 			}
 				
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
 			 * matches all previous patterns.
 			 * Typically there is only one action for each rule,
 			 * and the opcode is stored at the end of the rule
 			 * (but there are exceptions -- see below).
 			 *
 			 * In general, here we set retval and terminate the
 			 * outer loop (would be a 'break 3' in some language,
 			 * but we need to do a 'goto done').
 			 *
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
 			 *   ('goto next_rule', equivalent to a 'break 2'),
 			 *   or to the SKIPTO target ('goto again' after
 			 *   having set f, cmd and l), respectively.
 			 *
 			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule.
 			 *   These opcodes try to install an entry in the
 			 *   state tables; if successful, we continue with
 			 *   the next opcode (match=1; break;), otherwise
 			 *   the packet *   must be dropped
 			 *   ('goto done' after setting retval);
 			 *
 			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
 			 *   cause a lookup of the state table, and a jump
 			 *   to the 'action' part of the parent rule
 			 *   ('goto check_body') if an entry is found, or
 			 *   (CHECK_STATE only) a jump to the next rule if
 			 *   the entry is not found ('goto next_rule').
 			 *   The result of the lookup is cached to make
 			 *   further instances of these opcodes are
 			 *   effectively NOPs.
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (install_state(f,
 				    (ipfw_insn_limit *)cmd, args, tablearg)) {
 					retval = IP_FW_DENY;
 					goto done; /* error/limit violation */
 				}
 				match = 1;
 				break;
 
 			case O_PROBE_STATE:
 			case O_CHECK_STATE:
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
 				 * with the result being stored in dyn_dir.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
 				 */
 				if (dyn_dir == MATCH_UNKNOWN &&
 				    (q = lookup_dyn_rule(&args->f_id,
 				     &dyn_dir, proto == IPPROTO_TCP ?
 					TCP(ulp) : NULL))
 					!= NULL) {
 					/*
 					 * Found dynamic entry, update stats
 					 * and jump to the 'action' part of
 					 * the parent rule.
 					 */
 					q->pcnt++;
 					q->bcnt += pktlen;
 					f = q->rule;
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					IPFW_DYN_UNLOCK();
 					goto check_body;
 				}
 				/*
 				 * Dynamic entry not found. If CHECK_STATE,
 				 * skip to next rule, if PROBE_STATE just
 				 * ignore and continue with next opcode.
 				 */
 				if (cmd->opcode == O_CHECK_STATE)
 					goto next_rule;
 				match = 1;
 				break;
 
 			case O_ACCEPT:
 				retval = 0;	/* accept */
 				goto done;
 
 			case O_PIPE:
 			case O_QUEUE:
 				args->rule = f; /* report matching rule */
 				if (cmd->arg1 == IP_FW_TABLEARG)
 					args->cookie = tablearg;
 				else
 					args->cookie = cmd->arg1;
 				retval = IP_FW_DUMMYNET;
 				goto done;
 
 			case O_DIVERT:
 			case O_TEE: {
 				struct divert_tag *dt;
 
 				if (args->eh) /* not on layer 2 */
 					break;
 				mtag = m_tag_get(PACKET_TAG_DIVERT,
 						sizeof(struct divert_tag),
 						M_NOWAIT);
 				if (mtag == NULL) {
 					/* XXX statistic */
 					/* drop packet */
 					IPFW_RUNLOCK(chain);
 					return (IP_FW_DENY);
 				}
 				dt = (struct divert_tag *)(mtag+1);
 				dt->cookie = f->rulenum;
 				if (cmd->arg1 == IP_FW_TABLEARG)
 					dt->info = tablearg;
 				else
 					dt->info = cmd->arg1;
 				m_tag_prepend(m, mtag);
 				retval = (cmd->opcode == O_DIVERT) ?
 				    IP_FW_DIVERT : IP_FW_TEE;
 				goto done;
 			}
-
 			case O_COUNT:
 			case O_SKIPTO:
 				f->pcnt++;	/* update stats */
 				f->bcnt += pktlen;
 				f->timestamp = time_uptime;
 				if (cmd->opcode == O_COUNT)
 					goto next_rule;
 				/* handle skipto */
 				if (f->next_rule == NULL)
 					lookup_next_rule(f);
 				f = f->next_rule;
 				goto again;
 
 			case O_REJECT:
 				/*
 				 * Drop the packet and send a reject notice
 				 * if the packet is not ICMP (or is an ICMP
 				 * query), and it is not multicast/broadcast.
 				 */
 				if (hlen > 0 && is_ipv4 && offset == 0 &&
 				    (proto != IPPROTO_ICMP ||
 				     is_icmp_query(ICMP(ulp))) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
 					send_reject(args, cmd->arg1, ip_len, ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #ifdef INET6
 			case O_UNREACH6:
 				if (hlen > 0 && is_ipv6 &&
 				    ((offset & IP6F_OFF_MASK) == 0) &&
 				    (proto != IPPROTO_ICMPV6 ||
 				     (is_icmp6_query(args->f_id.flags) == 1)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
 					send_reject6(
 					    args, cmd->arg1, hlen,
 					    (struct ip6_hdr *)ip);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 #endif
 			case O_DENY:
 				retval = IP_FW_DENY;
 				goto done;
 
 			case O_FORWARD_IP: {
 				struct sockaddr_in *sa;
 				sa = &(((ipfw_insn_sa *)cmd)->sa);
 				if (args->eh)	/* not valid on layer2 pkts */
 					break;
 				if (!q || dyn_dir == MATCH_FORWARD) {
 					if (sa->sin_addr.s_addr == INADDR_ANY) {
 						bcopy(sa, &args->hopstore,
 							sizeof(*sa));
 						args->hopstore.sin_addr.s_addr =
 						    htonl(tablearg);
 						args->next_hop =
 						    &args->hopstore;
 					} else {
 						args->next_hop = sa;
 					}
 				}
 				retval = IP_FW_PASS;
 			    }
 			    goto done;
 
 			case O_NETGRAPH:
 			case O_NGTEE:
 				args->rule = f;	/* report matching rule */
 				if (cmd->arg1 == IP_FW_TABLEARG)
 					args->cookie = tablearg;
 				else
 					args->cookie = cmd->arg1;
 				retval = (cmd->opcode == O_NETGRAPH) ?
 				    IP_FW_NETGRAPH : IP_FW_NGTEE;
 				goto done;
 
+			case O_SETFIB:
+				f->pcnt++;	/* update stats */
+				f->bcnt += pktlen;
+				f->timestamp = time_uptime;
+				M_SETFIB(m, cmd->arg1);
+				args->f_id.fib = cmd->arg1;
+				goto next_rule;
+
 			case O_NAT: {
                         	struct cfg_nat *t;
                         	int nat_id;
 
  				if (IPFW_NAT_LOADED) {
 					args->rule = f; /* Report matching rule. */
 					t = ((ipfw_insn_nat *)cmd)->nat;
 					if (t == NULL) {
 						nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
 						    tablearg : cmd->arg1;
 						LOOKUP_NAT(layer3_chain, nat_id, t);
 						if (t == NULL) {
 							retval = IP_FW_DENY;
 							goto done;
 						}
 						if (cmd->arg1 != IP_FW_TABLEARG)
 							((ipfw_insn_nat *)cmd)->nat = t;
 					}
 					retval = ipfw_nat_ptr(args, t, m);
 				} else
 					retval = IP_FW_DENY;
 				goto done;
 			}
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
 
 			if (cmd->len & F_NOT)
 				match = !match;
 
 			if (match) {
 				if (cmd->len & F_OR)
 					skip_or = 1;
 			} else {
 				if (!(cmd->len & F_OR)) /* not an OR block, */
 					break;		/* try next rule    */
 			}
 
 		}	/* end of inner for, scan opcodes */
 
 next_rule:;		/* try next rule		*/
 
 	}		/* end of outer for, scan rules */
 	printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	IPFW_RUNLOCK(chain);
 	return (IP_FW_DENY);
 
 done:
 	/* Update statistics */
 	f->pcnt++;
 	f->bcnt += pktlen;
 	f->timestamp = time_uptime;
 	IPFW_RUNLOCK(chain);
 	return (retval);
 
 pullup_failed:
 	if (fw_verbose)
 		printf("ipfw: pullup failed\n");
 	return (IP_FW_DENY);
 }
 
 /*
  * When a rule is added/deleted, clear the next_rule pointers in all rules.
  * These will be reconstructed on the fly as packets are matched.
  */
 static void
 flush_rule_ptrs(struct ip_fw_chain *chain)
 {
 	struct ip_fw *rule;
 
 	IPFW_WLOCK_ASSERT(chain);
 
 	for (rule = chain->rules; rule; rule = rule->next)
 		rule->next_rule = NULL;
 }
 
 /*
  * Add a new rule to the list. Copy the rule into a malloc'ed area, then
  * possibly create a rule number and add the rule to the list.
  * Update the rule_number in the input struct so the caller knows it as well.
  */
 static int
 add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
 {
 	struct ip_fw *rule, *f, *prev;
 	int l = RULESIZE(input_rule);
 
 	if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE)
 		return (EINVAL);
 
 	rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO);
 	if (rule == NULL)
 		return (ENOSPC);
 
 	bcopy(input_rule, rule, l);
 
 	rule->next = NULL;
 	rule->next_rule = NULL;
 
 	rule->pcnt = 0;
 	rule->bcnt = 0;
 	rule->timestamp = 0;
 
 	IPFW_WLOCK(chain);
 
 	if (chain->rules == NULL) {	/* default rule */
 		chain->rules = rule;
 		goto done;
         }
 
 	/*
 	 * If rulenum is 0, find highest numbered rule before the
 	 * default rule, and add autoinc_step
 	 */
 	if (autoinc_step < 1)
 		autoinc_step = 1;
 	else if (autoinc_step > 1000)
 		autoinc_step = 1000;
 	if (rule->rulenum == 0) {
 		/*
 		 * locate the highest numbered rule before default
 		 */
 		for (f = chain->rules; f; f = f->next) {
 			if (f->rulenum == IPFW_DEFAULT_RULE)
 				break;
 			rule->rulenum = f->rulenum;
 		}
 		if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step)
 			rule->rulenum += autoinc_step;
 		input_rule->rulenum = rule->rulenum;
 	}
 
 	/*
 	 * Now insert the new rule in the right place in the sorted list.
 	 */
 	for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) {
 		if (f->rulenum > rule->rulenum) { /* found the location */
 			if (prev) {
 				rule->next = f;
 				prev->next = rule;
 			} else { /* head insert */
 				rule->next = chain->rules;
 				chain->rules = rule;
 			}
 			break;
 		}
 	}
 	flush_rule_ptrs(chain);
 done:
 	static_count++;
 	static_len += l;
 	IPFW_WUNLOCK(chain);
 	DEB(printf("ipfw: installed rule %d, static count now %d\n",
 		rule->rulenum, static_count);)
 	return (0);
 }
 
 /**
  * Remove a static rule (including derived * dynamic rules)
  * and place it on the ``reap list'' for later reclamation.
  * The caller is in charge of clearing rule pointers to avoid
  * dangling pointers.
  * @return a pointer to the next entry.
  * Arguments are not checked, so they better be correct.
  */
 static struct ip_fw *
 remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
     struct ip_fw *prev)
 {
 	struct ip_fw *n;
 	int l = RULESIZE(rule);
 
 	IPFW_WLOCK_ASSERT(chain);
 
 	n = rule->next;
 	IPFW_DYN_LOCK();
 	remove_dyn_rule(rule, NULL /* force removal */);
 	IPFW_DYN_UNLOCK();
 	if (prev == NULL)
 		chain->rules = n;
 	else
 		prev->next = n;
 	static_count--;
 	static_len -= l;
 
 	rule->next = chain->reap;
 	chain->reap = rule;
 
 	return n;
 }
 
 /**
  * Reclaim storage associated with a list of rules.  This is
  * typically the list created using remove_rule.
  */
 static void
 reap_rules(struct ip_fw *head)
 {
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
 		head = head->next;
 		if (DUMMYNET_LOADED)
 			ip_dn_ruledel_ptr(rule);
 		free(rule, M_IPFW);
 	}
 }
 
 /*
  * Remove all rules from a chain (except rules in set RESVD_SET
  * unless kill_default = 1).  The caller is responsible for
  * reclaiming storage for the rules left in chain->reap.
  */
 static void
 free_chain(struct ip_fw_chain *chain, int kill_default)
 {
 	struct ip_fw *prev, *rule;
 
 	IPFW_WLOCK_ASSERT(chain);
 
 	flush_rule_ptrs(chain); /* more efficient to do outside the loop */
 	for (prev = NULL, rule = chain->rules; rule ; )
 		if (kill_default || rule->set != RESVD_SET)
 			rule = remove_rule(chain, rule, prev);
 		else {
 			prev = rule;
 			rule = rule->next;
 		}
 }
 
 /**
  * Remove all rules with given number, and also do set manipulation.
  * Assumes chain != NULL && *chain != NULL.
  *
  * The argument is an u_int32_t. The low 16 bit are the rule or set number,
  * the next 8 bits are the new set, the top 8 bits are the command:
  *
  *	0	delete rules with given number
  *	1	delete rules with given set number
  *	2	move rules with given number to new set
  *	3	move rules with given set number to new set
  *	4	swap sets with given numbers
  *	5	delete rules with given number and with given set number
  */
 static int
 del_entry(struct ip_fw_chain *chain, u_int32_t arg)
 {
 	struct ip_fw *prev = NULL, *rule;
 	u_int16_t rulenum;	/* rule or old_set */
 	u_int8_t cmd, new_set;
 
 	rulenum = arg & 0xffff;
 	cmd = (arg >> 24) & 0xff;
 	new_set = (arg >> 16) & 0xff;
 
 	if (cmd > 5 || new_set > RESVD_SET)
 		return EINVAL;
 	if (cmd == 0 || cmd == 2 || cmd == 5) {
 		if (rulenum >= IPFW_DEFAULT_RULE)
 			return EINVAL;
 	} else {
 		if (rulenum > RESVD_SET)	/* old_set */
 			return EINVAL;
 	}
 
 	IPFW_WLOCK(chain);
 	rule = chain->rules;
 	chain->reap = NULL;
 	switch (cmd) {
 	case 0:	/* delete rules with given number */
 		/*
 		 * locate first rule to delete
 		 */
 		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
 			;
 		if (rule->rulenum != rulenum) {
 			IPFW_WUNLOCK(chain);
 			return EINVAL;
 		}
 
 		/*
 		 * flush pointers outside the loop, then delete all matching
 		 * rules. prev remains the same throughout the cycle.
 		 */
 		flush_rule_ptrs(chain);
 		while (rule->rulenum == rulenum)
 			rule = remove_rule(chain, rule, prev);
 		break;
 
 	case 1:	/* delete all rules with given set number */
 		flush_rule_ptrs(chain);
 		rule = chain->rules;
 		while (rule->rulenum < IPFW_DEFAULT_RULE)
 			if (rule->set == rulenum)
 				rule = remove_rule(chain, rule, prev);
 			else {
 				prev = rule;
 				rule = rule->next;
 			}
 		break;
 
 	case 2:	/* move rules with given number to new set */
 		rule = chain->rules;
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->rulenum == rulenum)
 				rule->set = new_set;
 		break;
 
 	case 3: /* move rules with given set number to new set */
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->set == rulenum)
 				rule->set = new_set;
 		break;
 
 	case 4: /* swap two sets */
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->set == rulenum)
 				rule->set = new_set;
 			else if (rule->set == new_set)
 				rule->set = rulenum;
 		break;
 	case 5: /* delete rules with given number and with given set number.
 		 * rulenum - given rule number;
 		 * new_set - given set number.
 		 */
 		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
 			;
 		if (rule->rulenum != rulenum) {
 			IPFW_WUNLOCK(chain);
 			return (EINVAL);
 		}
 		flush_rule_ptrs(chain);
 		while (rule->rulenum == rulenum) {
 			if (rule->set == new_set)
 				rule = remove_rule(chain, rule, prev);
 			else {
 				prev = rule;
 				rule = rule->next;
 			}
 		}
 	}
 	/*
 	 * Look for rules to reclaim.  We grab the list before
 	 * releasing the lock then reclaim them w/o the lock to
 	 * avoid a LOR with dummynet.
 	 */
 	rule = chain->reap;
 	chain->reap = NULL;
 	IPFW_WUNLOCK(chain);
 	if (rule)
 		reap_rules(rule);
 	return 0;
 }
 
 /*
  * Clear counters for a specific rule.
  * The enclosing "table" is assumed locked.
  */
 static void
 clear_counters(struct ip_fw *rule, int log_only)
 {
 	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
 
 	if (log_only == 0) {
 		rule->bcnt = rule->pcnt = 0;
 		rule->timestamp = 0;
 	}
 	if (l->o.opcode == O_LOG)
 		l->log_left = l->max_log;
 }
 
 /**
  * Reset some or all counters on firewall rules.
  * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
  * the next 8 bits are the set number, the top 8 bits are the command:
  *	0	work with rules from all set's;
  *	1	work with rules only from specified set.
  * Specified rule number is zero if we want to clear all entries.
  * log_only is 1 if we only want to reset logs, zero otherwise.
  */
 static int
 zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
 {
 	struct ip_fw *rule;
 	char *msg;
 
 	uint16_t rulenum = arg & 0xffff;
 	uint8_t set = (arg >> 16) & 0xff;
 	uint8_t cmd = (arg >> 24) & 0xff;
 
 	if (cmd > 1)
 		return (EINVAL);
 	if (cmd == 1 && set > RESVD_SET)
 		return (EINVAL);
 
 	IPFW_WLOCK(chain);
 	if (rulenum == 0) {
 		norule_counter = 0;
 		for (rule = chain->rules; rule; rule = rule->next) {
 			/* Skip rules from another set. */
 			if (cmd == 1 && rule->set != set)
 				continue;
 			clear_counters(rule, log_only);
 		}
 		msg = log_only ? "ipfw: All logging counts reset.\n" :
 		    "ipfw: Accounting cleared.\n";
 	} else {
 		int cleared = 0;
 		/*
 		 * We can have multiple rules with the same number, so we
 		 * need to clear them all.
 		 */
 		for (rule = chain->rules; rule; rule = rule->next)
 			if (rule->rulenum == rulenum) {
 				while (rule && rule->rulenum == rulenum) {
 					if (cmd == 0 || rule->set == set)
 						clear_counters(rule, log_only);
 					rule = rule->next;
 				}
 				cleared = 1;
 				break;
 			}
 		if (!cleared) {	/* we did not find any matching rules */
 			IPFW_WUNLOCK(chain);
 			return (EINVAL);
 		}
 		msg = log_only ? "ipfw: Entry %d logging count reset.\n" :
 		    "ipfw: Entry %d cleared.\n";
 	}
 	IPFW_WUNLOCK(chain);
 
 	if (fw_verbose)
 		log(LOG_SECURITY | LOG_NOTICE, msg, rulenum);
 	return (0);
 }
 
 /*
  * Check validity of the structure before insert.
  * Fortunately rules are simple, so this mostly need to check rule sizes.
  */
 static int
 check_ipfw_struct(struct ip_fw *rule, int size)
 {
 	int l, cmdlen = 0;
 	int have_action=0;
 	ipfw_insn *cmd;
 
 	if (size < sizeof(*rule)) {
 		printf("ipfw: rule too short\n");
 		return (EINVAL);
 	}
 	/* first, check for valid size */
 	l = RULESIZE(rule);
 	if (l != size) {
 		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
 		return (EINVAL);
 	}
 	if (rule->act_ofs >= rule->cmd_len) {
 		printf("ipfw: bogus action offset (%u > %u)\n",
 		    rule->act_ofs, rule->cmd_len - 1);
 		return (EINVAL);
 	}
 	/*
 	 * Now go for the individual checks. Very simple ones, basically only
 	 * instruction sizes.
 	 */
 	for (l = rule->cmd_len, cmd = rule->cmd ;
 			l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 		if (cmdlen > l) {
 			printf("ipfw: opcode %d size truncated\n",
 			    cmd->opcode);
 			return EINVAL;
 		}
 		DEB(printf("ipfw: opcode %d\n", cmd->opcode);)
 		switch (cmd->opcode) {
 		case O_PROBE_STATE:
 		case O_KEEP_STATE:
 		case O_PROTO:
 		case O_IP_SRC_ME:
 		case O_IP_DST_ME:
 		case O_LAYER2:
 		case O_IN:
 		case O_FRAG:
 		case O_DIVERTED:
 		case O_IPOPT:
 		case O_IPTOS:
 		case O_IPPRECEDENCE:
 		case O_IPVER:
 		case O_TCPWIN:
 		case O_TCPFLAGS:
 		case O_TCPOPTS:
 		case O_ESTAB:
 		case O_VERREVPATH:
 		case O_VERSRCREACH:
 		case O_ANTISPOOF:
 		case O_IPSEC:
 #ifdef INET6
 		case O_IP6_SRC_ME:
 		case O_IP6_DST_ME:
 		case O_EXT_HDR:
 		case O_IP6:
 #endif
 		case O_IP4:
 		case O_TAG:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			break;
+
+		case O_FIB:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+			if (cmd->arg1 >= rt_numfibs) {
+				printf("ipfw: invalid fib number %d\n",
+					cmd->arg1);
+				return EINVAL;
+			}
+			break;
+
+		case O_SETFIB:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn))
+				goto bad_size;
+			if (cmd->arg1 >= rt_numfibs) {
+				printf("ipfw: invalid fib number %d\n",
+					cmd->arg1);
+				return EINVAL;
+			}
+			goto check_action;
 
 		case O_UID:
 		case O_GID:
 		case O_JAIL:
 		case O_IP_SRC:
 		case O_IP_DST:
 		case O_TCPSEQ:
 		case O_TCPACK:
 		case O_PROB:
 		case O_ICMPTYPE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 			break;
 
 		case O_LIMIT:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
 				goto bad_size;
 			break;
 
 		case O_LOG:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
 				goto bad_size;
 
 			((ipfw_insn_log *)cmd)->log_left =
 			    ((ipfw_insn_log *)cmd)->max_log;
 
 			break;
 
 		case O_IP_SRC_MASK:
 		case O_IP_DST_MASK:
 			/* only odd command lengths */
 			if ( !(cmdlen & 1) || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_IP_SRC_SET:
 		case O_IP_DST_SET:
 			if (cmd->arg1 == 0 || cmd->arg1 > 256) {
 				printf("ipfw: invalid set size %d\n",
 					cmd->arg1);
 				return EINVAL;
 			}
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
 			    (cmd->arg1+31)/32 )
 				goto bad_size;
 			break;
 
 		case O_IP_SRC_LOOKUP:
 		case O_IP_DST_LOOKUP:
 			if (cmd->arg1 >= IPFW_TABLES_MAX) {
 				printf("ipfw: invalid table number %d\n",
 				    cmd->arg1);
 				return (EINVAL);
 			}
 			if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
 			    cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 			break;
 
 		case O_MACADDR2:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
 				goto bad_size;
 			break;
 
 		case O_NOP:
 		case O_IPID:
 		case O_IPTTL:
 		case O_IPLEN:
 		case O_TCPDATALEN:
 		case O_TAGGED:
 			if (cmdlen < 1 || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_MAC_TYPE:
 		case O_IP_SRCPORT:
 		case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
 			if (cmdlen < 2 || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_RECV:
 		case O_XMIT:
 		case O_VIA:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
 				goto bad_size;
 			break;
 
 		case O_ALTQ:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
 				goto bad_size;
 			break;
 
 		case O_PIPE:
 		case O_QUEUE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			goto check_action;
 
 		case O_FORWARD_IP:
 #ifdef	IPFIREWALL_FORWARD
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
 				goto bad_size;
 			goto check_action;
 #else
 			return EINVAL;
 #endif
 
 		case O_DIVERT:
 		case O_TEE:
 			if (ip_divert_ptr == NULL)
 				return EINVAL;
 			else
 				goto check_size;
 		case O_NETGRAPH:
 		case O_NGTEE:
 			if (!NG_IPFW_LOADED)
 				return EINVAL;
 			else
 				goto check_size;
 		case O_NAT:
 			if (!IPFW_NAT_LOADED)
 				return EINVAL;
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
  				goto bad_size;		
  			goto check_action;
 		case O_FORWARD_MAC: /* XXX not implemented yet */
 		case O_CHECK_STATE:
 		case O_COUNT:
 		case O_ACCEPT:
 		case O_DENY:
 		case O_REJECT:
 #ifdef INET6
 		case O_UNREACH6:
 #endif
 		case O_SKIPTO:
 check_size:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 check_action:
 			if (have_action) {
 				printf("ipfw: opcode %d, multiple actions"
 					" not allowed\n",
 					cmd->opcode);
 				return EINVAL;
 			}
 			have_action = 1;
 			if (l != cmdlen) {
 				printf("ipfw: opcode %d, action must be"
 					" last opcode\n",
 					cmd->opcode);
 				return EINVAL;
 			}
 			break;
 #ifdef INET6
 		case O_IP6_SRC:
 		case O_IP6_DST:
 			if (cmdlen != F_INSN_SIZE(struct in6_addr) +
 			    F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			break;
 
 		case O_FLOW6ID:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
 			    ((ipfw_insn_u32 *)cmd)->o.arg1)
 				goto bad_size;
 			break;
 
 		case O_IP6_SRC_MASK:
 		case O_IP6_DST_MASK:
 			if ( !(cmdlen & 1) || cmdlen > 127)
 				goto bad_size;
 			break;
 		case O_ICMP6TYPE:
 			if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
 				goto bad_size;
 			break;
 #endif
 
 		default:
 			switch (cmd->opcode) {
 #ifndef INET6
 			case O_IP6_SRC_ME:
 			case O_IP6_DST_ME:
 			case O_EXT_HDR:
 			case O_IP6:
 			case O_UNREACH6:
 			case O_IP6_SRC:
 			case O_IP6_DST:
 			case O_FLOW6ID:
 			case O_IP6_SRC_MASK:
 			case O_IP6_DST_MASK:
 			case O_ICMP6TYPE:
 				printf("ipfw: no IPv6 support in kernel\n");
 				return EPROTONOSUPPORT;
 #endif
 			default:
 				printf("ipfw: opcode %d, unknown opcode\n",
 					cmd->opcode);
 				return EINVAL;
 			}
 		}
 	}
 	if (have_action == 0) {
 		printf("ipfw: missing action\n");
 		return EINVAL;
 	}
 	return 0;
 
 bad_size:
 	printf("ipfw: opcode %d size %d wrong\n",
 		cmd->opcode, cmdlen);
 	return EINVAL;
 }
 
 /*
  * Copy the static and dynamic rules to the supplied buffer
  * and return the amount of space actually used.
  */
 static size_t
 ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
 {
 	char *bp = buf;
 	char *ep = bp + space;
 	struct ip_fw *rule;
 	int i;
 	time_t	boot_seconds;
 
         boot_seconds = boottime.tv_sec;
 	/* XXX this can take a long time and locking will block packet flow */
 	IPFW_RLOCK(chain);
 	for (rule = chain->rules; rule ; rule = rule->next) {
 		/*
 		 * Verify the entry fits in the buffer in case the
 		 * rules changed between calculating buffer space and
 		 * now.  This would be better done using a generation
 		 * number but should suffice for now.
 		 */
 		i = RULESIZE(rule);
 		if (bp + i <= ep) {
 			bcopy(rule, bp, i);
 			/*
 			 * XXX HACK. Store the disable mask in the "next" pointer
 			 * in a wild attempt to keep the ABI the same.
 			 * Why do we do this on EVERY rule?
 			 */
 			bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule),
 			    sizeof(set_disable));
 			if (((struct ip_fw *)bp)->timestamp)
 				((struct ip_fw *)bp)->timestamp += boot_seconds;
 			bp += i;
 		}
 	}
 	IPFW_RUNLOCK(chain);
 	if (ipfw_dyn_v) {
 		ipfw_dyn_rule *p, *last = NULL;
 
 		IPFW_DYN_LOCK();
 		for (i = 0 ; i < curr_dyn_buckets; i++)
 			for (p = ipfw_dyn_v[i] ; p != NULL; p = p->next) {
 				if (bp + sizeof *p <= ep) {
 					ipfw_dyn_rule *dst =
 						(ipfw_dyn_rule *)bp;
 					bcopy(p, dst, sizeof *p);
 					bcopy(&(p->rule->rulenum), &(dst->rule),
 					    sizeof(p->rule->rulenum));
 					/*
 					 * store set number into high word of
 					 * dst->rule pointer.
 					 */
 					bcopy(&(p->rule->set),
 					    (char *)&dst->rule +
 					    sizeof(p->rule->rulenum),
 					    sizeof(p->rule->set));
 					/*
 					 * store a non-null value in "next".
 					 * The userland code will interpret a
 					 * NULL here as a marker
 					 * for the last dynamic rule.
 					 */
 					bcopy(&dst, &dst->next, sizeof(dst));
 					last = dst;
 					dst->expire =
 					    TIME_LEQ(dst->expire, time_uptime) ?
 						0 : dst->expire - time_uptime ;
 					bp += sizeof(ipfw_dyn_rule);
 				}
 			}
 		IPFW_DYN_UNLOCK();
 		if (last != NULL) /* mark last dynamic rule */
 			bzero(&last->next, sizeof(last));
 	}
 	return (bp - (char *)buf);
 }
 
 
 /**
  * {set|get}sockopt parser.
  */
 static int
 ipfw_ctl(struct sockopt *sopt)
 {
 #define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
 	int error;
 	size_t size;
 	struct ip_fw *buf, *rule;
 	u_int32_t rulenum[2];
 
 	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
 	if (error)
 		return (error);
 
 	/*
 	 * Disallow modifications in really-really secure mode, but still allow
 	 * the logging counters to be reset.
 	 */
 	if (sopt->sopt_name == IP_FW_ADD ||
 	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
 		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
 		if (error)
 			return (error);
 	}
 
 	error = 0;
 
 	switch (sopt->sopt_name) {
 	case IP_FW_GET:
 		/*
 		 * pass up a copy of the current rules. Static rules
 		 * come first (the last of which has number IPFW_DEFAULT_RULE),
 		 * followed by a possibly empty list of dynamic rule.
 		 * The last dynamic rule has NULL in the "next" field.
 		 *
 		 * Note that the calculated size is used to bound the
 		 * amount of data returned to the user.  The rule set may
 		 * change between calculating the size and returning the
 		 * data in which case we'll just return what fits.
 		 */
 		size = static_len;	/* size of static rules */
 		if (ipfw_dyn_v)		/* add size of dyn.rules */
 			size += (dyn_count * sizeof(ipfw_dyn_rule));
 
 		/*
 		 * XXX todo: if the user passes a short length just to know
 		 * how much room is needed, do not bother filling up the
 		 * buffer, just jump to the sooptcopyout.
 		 */
 		buf = malloc(size, M_TEMP, M_WAITOK);
 		error = sooptcopyout(sopt, buf,
 				ipfw_getrules(&layer3_chain, buf, size));
 		free(buf, M_TEMP);
 		break;
 
 	case IP_FW_FLUSH:
 		/*
 		 * Normally we cannot release the lock on each iteration.
 		 * We could do it here only because we start from the head all
 		 * the times so there is no risk of missing some entries.
 		 * On the other hand, the risk is that we end up with
 		 * a very inconsistent ruleset, so better keep the lock
 		 * around the whole cycle.
 		 *
 		 * XXX this code can be improved by resetting the head of
 		 * the list to point to the default rule, and then freeing
 		 * the old list without the need for a lock.
 		 */
 
 		IPFW_WLOCK(&layer3_chain);
 		layer3_chain.reap = NULL;
 		free_chain(&layer3_chain, 0 /* keep default rule */);
 		rule = layer3_chain.reap;
 		layer3_chain.reap = NULL;
 		IPFW_WUNLOCK(&layer3_chain);
 		if (rule != NULL)
 			reap_rules(rule);
 		break;
 
 	case IP_FW_ADD:
 		rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
 		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
 			sizeof(struct ip_fw) );
 		if (error == 0)
 			error = check_ipfw_struct(rule, sopt->sopt_valsize);
 		if (error == 0) {
 			error = add_rule(&layer3_chain, rule);
 			size = RULESIZE(rule);
 			if (!error && sopt->sopt_dir == SOPT_GET)
 				error = sooptcopyout(sopt, rule, size);
 		}
 		free(rule, M_TEMP);
 		break;
 
 	case IP_FW_DEL:
 		/*
 		 * IP_FW_DEL is used for deleting single rules or sets,
 		 * and (ab)used to atomically manipulate sets. Argument size
 		 * is used to distinguish between the two:
 		 *    sizeof(u_int32_t)
 		 *	delete single rule or set of rules,
 		 *	or reassign rules (or sets) to a different set.
 		 *    2*sizeof(u_int32_t)
 		 *	atomic disable/enable sets.
 		 *	first u_int32_t contains sets to be disabled,
 		 *	second u_int32_t contains sets to be enabled.
 		 */
 		error = sooptcopyin(sopt, rulenum,
 			2*sizeof(u_int32_t), sizeof(u_int32_t));
 		if (error)
 			break;
 		size = sopt->sopt_valsize;
 		if (size == sizeof(u_int32_t))	/* delete or reassign */
 			error = del_entry(&layer3_chain, rulenum[0]);
 		else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */
 			set_disable =
 			    (set_disable | rulenum[0]) & ~rulenum[1] &
 			    ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
 		else
 			error = EINVAL;
 		break;
 
 	case IP_FW_ZERO:
 	case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
 		rulenum[0] = 0;
 		if (sopt->sopt_val != 0) {
 		    error = sooptcopyin(sopt, rulenum,
 			    sizeof(u_int32_t), sizeof(u_int32_t));
 		    if (error)
 			break;
 		}
 		error = zero_entry(&layer3_chain, rulenum[0],
 			sopt->sopt_name == IP_FW_RESETLOG);
 		break;
 
 	case IP_FW_TABLE_ADD:
 		{
 			ipfw_table_entry ent;
 
 			error = sooptcopyin(sopt, &ent,
 			    sizeof(ent), sizeof(ent));
 			if (error)
 				break;
 			error = add_table_entry(&layer3_chain, ent.tbl,
 			    ent.addr, ent.masklen, ent.value);
 		}
 		break;
 
 	case IP_FW_TABLE_DEL:
 		{
 			ipfw_table_entry ent;
 
 			error = sooptcopyin(sopt, &ent,
 			    sizeof(ent), sizeof(ent));
 			if (error)
 				break;
 			error = del_table_entry(&layer3_chain, ent.tbl,
 			    ent.addr, ent.masklen);
 		}
 		break;
 
 	case IP_FW_TABLE_FLUSH:
 		{
 			u_int16_t tbl;
 
 			error = sooptcopyin(sopt, &tbl,
 			    sizeof(tbl), sizeof(tbl));
 			if (error)
 				break;
 			IPFW_WLOCK(&layer3_chain);
 			error = flush_table(&layer3_chain, tbl);
 			IPFW_WUNLOCK(&layer3_chain);
 		}
 		break;
 
 	case IP_FW_TABLE_GETSIZE:
 		{
 			u_int32_t tbl, cnt;
 
 			if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
 			    sizeof(tbl))))
 				break;
 			IPFW_RLOCK(&layer3_chain);
 			error = count_table(&layer3_chain, tbl, &cnt);
 			IPFW_RUNLOCK(&layer3_chain);
 			if (error)
 				break;
 			error = sooptcopyout(sopt, &cnt, sizeof(cnt));
 		}
 		break;
 
 	case IP_FW_TABLE_LIST:
 		{
 			ipfw_table *tbl;
 
 			if (sopt->sopt_valsize < sizeof(*tbl)) {
 				error = EINVAL;
 				break;
 			}
 			size = sopt->sopt_valsize;
 			tbl = malloc(size, M_TEMP, M_WAITOK);
 			error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
 			if (error) {
 				free(tbl, M_TEMP);
 				break;
 			}
 			tbl->size = (size - sizeof(*tbl)) /
 			    sizeof(ipfw_table_entry);
 			IPFW_RLOCK(&layer3_chain);
 			error = dump_table(&layer3_chain, tbl);
 			IPFW_RUNLOCK(&layer3_chain);
 			if (error) {
 				free(tbl, M_TEMP);
 				break;
 			}
 			error = sooptcopyout(sopt, tbl, size);
 			free(tbl, M_TEMP);
 		}
 		break;
 
 	case IP_FW_NAT_CFG:
 	{
 		if (IPFW_NAT_LOADED)
 			error = ipfw_nat_cfg_ptr(sopt);
 		else {
 			printf("IP_FW_NAT_CFG: ipfw_nat not present, please load it.\n");
 			error = EINVAL;
 		}
 	}
 	break;
 
 	case IP_FW_NAT_DEL:
 	{
 		if (IPFW_NAT_LOADED)
 			error = ipfw_nat_del_ptr(sopt);
 		else {
 			printf("IP_FW_NAT_DEL: ipfw_nat not present, please load it.\n");
 			printf("ipfw_nat not loaded: %d\n", sopt->sopt_name);
 			error = EINVAL;
 		}
 	}
 	break;
 
 	case IP_FW_NAT_GET_CONFIG:
 	{
 		if (IPFW_NAT_LOADED)
 			error = ipfw_nat_get_cfg_ptr(sopt);
 		else {
 			printf("IP_FW_NAT_GET_CFG: ipfw_nat not present, please load it.\n");
 			error = EINVAL;
 		}
 	}
 	break;
 
 	case IP_FW_NAT_GET_LOG:
 	{
 		if (IPFW_NAT_LOADED)
 			error = ipfw_nat_get_log_ptr(sopt);
 		else {
 			printf("IP_FW_NAT_GET_LOG: ipfw_nat not present, please load it.\n");
 			error = EINVAL;
 		}
 	}
 	break;
 
 	default:
 		printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
 		error = EINVAL;
 	}
 
 	return (error);
 #undef RULE_MAXSIZE
 }
 
 /**
  * dummynet needs a reference to the default rule, because rules can be
  * deleted while packets hold a reference to them. When this happens,
  * dummynet changes the reference to the default rule (it could well be a
  * NULL pointer, but this way we do not need to check for the special
  * case, plus here he have info on the default behaviour).
  */
 struct ip_fw *ip_fw_default_rule;
 
 /*
  * This procedure is only used to handle keepalives. It is invoked
  * every dyn_keepalive_period
  */
 static void
 ipfw_tick(void * __unused unused)
 {
 	struct mbuf *m0, *m, *mnext, **mtailp;
 	int i;
 	ipfw_dyn_rule *q;
 
 	if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0)
 		goto done;
 
 	/*
 	 * We make a chain of packets to go out here -- not deferring
 	 * until after we drop the IPFW dynamic rule lock would result
 	 * in a lock order reversal with the normal packet input -> ipfw
 	 * call stack.
 	 */
 	m0 = NULL;
 	mtailp = &m0;
 	IPFW_DYN_LOCK();
 	for (i = 0 ; i < curr_dyn_buckets ; i++) {
 		for (q = ipfw_dyn_v[i] ; q ; q = q->next ) {
 			if (q->dyn_type == O_LIMIT_PARENT)
 				continue;
 			if (q->id.proto != IPPROTO_TCP)
 				continue;
 			if ( (q->state & BOTH_SYN) != BOTH_SYN)
 				continue;
 			if (TIME_LEQ( time_uptime+dyn_keepalive_interval,
 			    q->expire))
 				continue;	/* too early */
 			if (TIME_LEQ(q->expire, time_uptime))
 				continue;	/* too late, rule expired */
 
 			*mtailp = send_pkt(NULL, &(q->id), q->ack_rev - 1,
 				q->ack_fwd, TH_SYN);
 			if (*mtailp != NULL)
 				mtailp = &(*mtailp)->m_nextpkt;
 			*mtailp = send_pkt(NULL, &(q->id), q->ack_fwd - 1,
 				q->ack_rev, 0);
 			if (*mtailp != NULL)
 				mtailp = &(*mtailp)->m_nextpkt;
 		}
 	}
 	IPFW_DYN_UNLOCK();
 	for (m = mnext = m0; m != NULL; m = mnext) {
 		mnext = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 		ip_output(m, NULL, NULL, 0, NULL, NULL);
 	}
 done:
 	callout_reset(&ipfw_timeout, dyn_keepalive_period*hz, ipfw_tick, NULL);
 }
 
 int
 ipfw_init(void)
 {
 	struct ip_fw default_rule;
 	int error;
 
 #ifdef INET6
 	/* Setup IPv6 fw sysctl tree. */
 	sysctl_ctx_init(&ip6_fw_sysctl_ctx);
 	ip6_fw_sysctl_tree = SYSCTL_ADD_NODE(&ip6_fw_sysctl_ctx,
 	    SYSCTL_STATIC_CHILDREN(_net_inet6_ip6), OID_AUTO, "fw",
 	    CTLFLAG_RW | CTLFLAG_SECURE, 0, "Firewall");
 	SYSCTL_ADD_PROC(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree),
 	    OID_AUTO, "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
 	    &fw6_enable, 0, ipfw_chg_hook, "I", "Enable ipfw+6");
 	SYSCTL_ADD_INT(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree),
 	    OID_AUTO, "deny_unknown_exthdrs", CTLFLAG_RW | CTLFLAG_SECURE,
 	    &fw_deny_unknown_exthdrs, 0,
 	    "Deny packets with unknown IPv6 Extension Headers");
 #endif
 
 	layer3_chain.rules = NULL;
 	IPFW_LOCK_INIT(&layer3_chain);
 	ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule",
 	    sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	IPFW_DYN_LOCK_INIT();
 	callout_init(&ipfw_timeout, CALLOUT_MPSAFE);
 
 	bzero(&default_rule, sizeof default_rule);
 
 	default_rule.act_ofs = 0;
 	default_rule.rulenum = IPFW_DEFAULT_RULE;
 	default_rule.cmd_len = 1;
 	default_rule.set = RESVD_SET;
 
 	default_rule.cmd[0].len = 1;
 	default_rule.cmd[0].opcode =
 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
 				1 ? O_ACCEPT :
 #endif
 				O_DENY;
 
 	error = add_rule(&layer3_chain, &default_rule);
 	if (error != 0) {
 		printf("ipfw2: error %u initializing default rule "
 			"(support disabled)\n", error);
 		IPFW_DYN_LOCK_DESTROY();
 		IPFW_LOCK_DESTROY(&layer3_chain);
 		uma_zdestroy(ipfw_dyn_rule_zone);
 		return (error);
 	}
 
 	ip_fw_default_rule = layer3_chain.rules;
 	printf("ipfw2 "
 #ifdef INET6
 		"(+ipv6) "
 #endif
 		"initialized, divert %s, nat %s, "
 		"rule-based forwarding "
 #ifdef IPFIREWALL_FORWARD
 		"enabled, "
 #else
 		"disabled, "
 #endif
 		"default to %s, logging ",
 #ifdef IPDIVERT
 		"enabled",
 #else
 		"loadable",
 #endif
 #ifdef IPFIREWALL_NAT
 		"enabled",
 #else
 		"loadable",
 #endif
 
 		default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny");
 
 #ifdef IPFIREWALL_VERBOSE
 	fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
 	if (fw_verbose == 0)
 		printf("disabled\n");
 	else if (verbose_limit == 0)
 		printf("unlimited\n");
 	else
 		printf("limited to %d packets/entry by default\n",
 		    verbose_limit);
 
 	error = init_tables(&layer3_chain);
 	if (error) {
 		IPFW_DYN_LOCK_DESTROY();
 		IPFW_LOCK_DESTROY(&layer3_chain);
 		uma_zdestroy(ipfw_dyn_rule_zone);
 		return (error);
 	}
 	ip_fw_ctl_ptr = ipfw_ctl;
 	ip_fw_chk_ptr = ipfw_chk;
 	callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL);	
 	LIST_INIT(&layer3_chain.nat);
 	return (0);
 }
 
 void
 ipfw_destroy(void)
 {
 	struct ip_fw *reap;
 
 	ip_fw_chk_ptr = NULL;
 	ip_fw_ctl_ptr = NULL;
 	callout_drain(&ipfw_timeout);
 	IPFW_WLOCK(&layer3_chain);
 	flush_tables(&layer3_chain);
 	layer3_chain.reap = NULL;
 	free_chain(&layer3_chain, 1 /* kill default rule */);
 	reap = layer3_chain.reap, layer3_chain.reap = NULL;
 	IPFW_WUNLOCK(&layer3_chain);
 	if (reap != NULL)
 		reap_rules(reap);
 	IPFW_DYN_LOCK_DESTROY();
 	uma_zdestroy(ipfw_dyn_rule_zone);
 	if (ipfw_dyn_v != NULL)
 		free(ipfw_dyn_v, M_IPFW);
 	IPFW_LOCK_DESTROY(&layer3_chain);
 
 #ifdef INET6
 	/* Free IPv6 fw sysctl tree. */
 	sysctl_ctx_free(&ip6_fw_sysctl_ctx);
 #endif
 
 	printf("IP firewall unloaded\n");
 }
Index: head/sys/netinet/ip_icmp.c
===================================================================
--- head/sys/netinet/ip_icmp.c	(revision 178887)
+++ head/sys/netinet/ip_icmp.c	(revision 178888)
@@ -1,919 +1,926 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/icmp_var.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #endif
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 /*
  * ICMP routines: error generation, receive packet processing, and
  * routines to turnaround packets back to the originator, and
  * host table maintenance routines.
  */
 
 struct	icmpstat icmpstat;
 SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
 	&icmpstat, icmpstat, "");
 
 static int	icmpmaskrepl = 0;
 SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
 	&icmpmaskrepl, 0, "Reply to ICMP Address Mask Request packets.");
 
 static u_int	icmpmaskfake = 0;
 SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW,
 	&icmpmaskfake, 0, "Fake reply to ICMP Address Mask Request packets.");
 
 static int	drop_redirect = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
 	&drop_redirect, 0, "Ignore ICMP redirects");
 
 static int	log_redirect = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
 	&log_redirect, 0, "Log ICMP redirects to the console");
 
 static int      icmplim = 200;
 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
 	&icmplim, 0, "Maximum number of ICMP responses per second");
 
 static int	icmplim_output = 1;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
 	&icmplim_output, 0, "Enable rate limiting of ICMP responses");
 
 static char	reply_src[IFNAMSIZ];
 SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
 	&reply_src, IFNAMSIZ, "icmp reply source for non-local packets.");
 
 static int	icmp_rfi = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_RW,
 	&icmp_rfi, 0, "ICMP reply from incoming interface for "
 	"non-local packets");
 
 static int	icmp_quotelen = 8;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_RW,
 	&icmp_quotelen, 0, "Number of bytes from original packet to "
 	"quote in ICMP reply");
 
 /*
  * ICMP broadcast echo sysctl
  */
 
 static int	icmpbmcastecho = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
 	&icmpbmcastecho, 0, "");
 
 
 #ifdef ICMPPRINTFS
 int	icmpprintfs = 0;
 #endif
 
 static void	icmp_reflect(struct mbuf *);
 static void	icmp_send(struct mbuf *, struct mbuf *);
 
 extern	struct protosw inetsw[];
 
 /*
  * Generate an error packet of type error
  * in response to bad packet ip.
  */
 void
 icmp_error(struct mbuf *n, int type, int code, n_long dest, int mtu)
 {
 	register struct ip *oip = mtod(n, struct ip *), *nip;
 	register unsigned oiphlen = oip->ip_hl << 2;
 	register struct icmp *icp;
 	register struct mbuf *m;
 	unsigned icmplen, icmpelen, nlen;
 
 	KASSERT((u_int)type <= ICMP_MAXTYPE, ("%s: illegal ICMP type", __func__));
 #ifdef ICMPPRINTFS
 	if (icmpprintfs)
 		printf("icmp_error(%p, %x, %d)\n", oip, type, code);
 #endif
 	if (type != ICMP_REDIRECT)
 		icmpstat.icps_error++;
 	/*
 	 * Don't send error:
 	 *  if the original packet was encrypted.
 	 *  if not the first fragment of message.
 	 *  in response to a multicast or broadcast packet.
 	 *  if the old packet protocol was an ICMP error message.
 	 */
 	if (n->m_flags & M_DECRYPTED)
 		goto freeit;
 	if (oip->ip_off & ~(IP_MF|IP_DF))
 		goto freeit;
 	if (n->m_flags & (M_BCAST|M_MCAST))
 		goto freeit;
 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
 	  n->m_len >= oiphlen + ICMP_MINLEN &&
 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiphlen))->icmp_type)) {
 		icmpstat.icps_oldicmp++;
 		goto freeit;
 	}
 	/* Drop if IP header plus 8 bytes is not contignous in first mbuf. */
 	if (oiphlen + 8 > n->m_len)
 		goto freeit;
 	/*
 	 * Calculate length to quote from original packet and
 	 * prevent the ICMP mbuf from overflowing.
 	 * Unfortunatly this is non-trivial since ip_forward()
 	 * sends us truncated packets.
 	 */
 	nlen = m_length(n, NULL);
 	if (oip->ip_p == IPPROTO_TCP) {
 		struct tcphdr *th;
 		int tcphlen;
 
 		if (oiphlen + sizeof(struct tcphdr) > n->m_len &&
 		    n->m_next == NULL)
 			goto stdreply;
 		if (n->m_len < oiphlen + sizeof(struct tcphdr) &&
 		    ((n = m_pullup(n, oiphlen + sizeof(struct tcphdr))) == NULL))
 			goto freeit;
 		th = (struct tcphdr *)((caddr_t)oip + oiphlen);
 		tcphlen = th->th_off << 2;
 		if (tcphlen < sizeof(struct tcphdr))
 			goto freeit;
 		if (oip->ip_len < oiphlen + tcphlen)
 			goto freeit;
 		if (oiphlen + tcphlen > n->m_len && n->m_next == NULL)
 			goto stdreply;
 		if (n->m_len < oiphlen + tcphlen && 
 		    ((n = m_pullup(n, oiphlen + tcphlen)) == NULL))
 			goto freeit;
 		icmpelen = max(tcphlen, min(icmp_quotelen, oip->ip_len - oiphlen));
 	} else
 stdreply:	icmpelen = max(8, min(icmp_quotelen, oip->ip_len - oiphlen));
 
 	icmplen = min(oiphlen + icmpelen, nlen);
 	if (icmplen < sizeof(struct ip))
 		goto freeit;
 
 	if (MHLEN > sizeof(struct ip) + ICMP_MINLEN + icmplen)
 		m = m_gethdr(M_DONTWAIT, MT_DATA);
 	else
 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		goto freeit;
 #ifdef MAC
 	mac_netinet_icmp_reply(n, m);
 #endif
 	icmplen = min(icmplen, M_TRAILINGSPACE(m) - sizeof(struct ip) - ICMP_MINLEN);
 	m_align(m, ICMP_MINLEN + icmplen);
 	m->m_len = ICMP_MINLEN + icmplen;
 
+	/* XXX MRT  make the outgoing packet use the same FIB
+	 * that was associated with the incoming packet
+	 */
+	M_SETFIB(m, M_GETFIB(n));
 	icp = mtod(m, struct icmp *);
 	icmpstat.icps_outhist[type]++;
 	icp->icmp_type = type;
 	if (type == ICMP_REDIRECT)
 		icp->icmp_gwaddr.s_addr = dest;
 	else {
 		icp->icmp_void = 0;
 		/*
 		 * The following assignments assume an overlay with the
 		 * just zeroed icmp_void field.
 		 */
 		if (type == ICMP_PARAMPROB) {
 			icp->icmp_pptr = code;
 			code = 0;
 		} else if (type == ICMP_UNREACH &&
 			code == ICMP_UNREACH_NEEDFRAG && mtu) {
 			icp->icmp_nextmtu = htons(mtu);
 		}
 	}
 	icp->icmp_code = code;
 
 	/*
 	 * Copy the quotation into ICMP message and
 	 * convert quoted IP header back to network representation.
 	 */
 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
 	nip = &icp->icmp_ip;
 	nip->ip_len = htons(nip->ip_len);
 	nip->ip_off = htons(nip->ip_off);
 
 	/*
 	 * Set up ICMP message mbuf and copy old IP header (without options
 	 * in front of ICMP message.
 	 * If the original mbuf was meant to bypass the firewall, the error
 	 * reply should bypass as well.
 	 */
 	m->m_flags |= n->m_flags & M_SKIP_FIREWALL;
 	m->m_data -= sizeof(struct ip);
 	m->m_len += sizeof(struct ip);
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
 	nip = mtod(m, struct ip *);
 	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
 	nip->ip_len = m->m_len;
 	nip->ip_v = IPVERSION;
 	nip->ip_hl = 5;
 	nip->ip_p = IPPROTO_ICMP;
 	nip->ip_tos = 0;
 	icmp_reflect(m);
 
 freeit:
 	m_freem(n);
 }
 
 /*
  * Process a received ICMP message.
  */
 void
 icmp_input(struct mbuf *m, int off)
 {
 	struct icmp *icp;
 	struct in_ifaddr *ia;
 	struct ip *ip = mtod(m, struct ip *);
 	struct sockaddr_in icmpsrc, icmpdst, icmpgw;
 	int hlen = off;
 	int icmplen = ip->ip_len;
 	int i, code;
 	void (*ctlfunc)(int, struct sockaddr *, void *);
+	int fibnum;
 
 	/*
 	 * Locate icmp structure in mbuf, and check
 	 * that not corrupted and of at least minimum length.
 	 */
 #ifdef ICMPPRINTFS
 	if (icmpprintfs) {
 		char buf[4 * sizeof "123"];
 		strcpy(buf, inet_ntoa(ip->ip_src));
 		printf("icmp_input from %s to %s, len %d\n",
 		       buf, inet_ntoa(ip->ip_dst), icmplen);
 	}
 #endif
 	if (icmplen < ICMP_MINLEN) {
 		icmpstat.icps_tooshort++;
 		goto freeit;
 	}
 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
 	if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
 		icmpstat.icps_tooshort++;
 		return;
 	}
 	ip = mtod(m, struct ip *);
 	m->m_len -= hlen;
 	m->m_data += hlen;
 	icp = mtod(m, struct icmp *);
 	if (in_cksum(m, icmplen)) {
 		icmpstat.icps_checksum++;
 		goto freeit;
 	}
 	m->m_len += hlen;
 	m->m_data -= hlen;
 
 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
 		/*
 		 * Deliver very specific ICMP type only.
 		 */
 		switch (icp->icmp_type) {
 		case ICMP_UNREACH:
 		case ICMP_TIMXCEED:
 			break;
 		default:
 			goto freeit;
 		}
 	}
 
 #ifdef ICMPPRINTFS
 	if (icmpprintfs)
 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
 		    icp->icmp_code);
 #endif
 
 	/*
 	 * Message type specific processing.
 	 */
 	if (icp->icmp_type > ICMP_MAXTYPE)
 		goto raw;
 
 	/* Initialize */
 	bzero(&icmpsrc, sizeof(icmpsrc));
 	icmpsrc.sin_len = sizeof(struct sockaddr_in);
 	icmpsrc.sin_family = AF_INET;
 	bzero(&icmpdst, sizeof(icmpdst));
 	icmpdst.sin_len = sizeof(struct sockaddr_in);
 	icmpdst.sin_family = AF_INET;
 	bzero(&icmpgw, sizeof(icmpgw));
 	icmpgw.sin_len = sizeof(struct sockaddr_in);
 	icmpgw.sin_family = AF_INET;
 
 	icmpstat.icps_inhist[icp->icmp_type]++;
 	code = icp->icmp_code;
 	switch (icp->icmp_type) {
 
 	case ICMP_UNREACH:
 		switch (code) {
 			case ICMP_UNREACH_NET:
 			case ICMP_UNREACH_HOST:
 			case ICMP_UNREACH_SRCFAIL:
 			case ICMP_UNREACH_NET_UNKNOWN:
 			case ICMP_UNREACH_HOST_UNKNOWN:
 			case ICMP_UNREACH_ISOLATED:
 			case ICMP_UNREACH_TOSNET:
 			case ICMP_UNREACH_TOSHOST:
 			case ICMP_UNREACH_HOST_PRECEDENCE:
 			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
 				code = PRC_UNREACH_NET;
 				break;
 
 			case ICMP_UNREACH_NEEDFRAG:
 				code = PRC_MSGSIZE;
 				break;
 
 			/*
 			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
 			 * Treat subcodes 2,3 as immediate RST
 			 */
 			case ICMP_UNREACH_PROTOCOL:
 			case ICMP_UNREACH_PORT:
 				code = PRC_UNREACH_PORT;
 				break;
 
 			case ICMP_UNREACH_NET_PROHIB:
 			case ICMP_UNREACH_HOST_PROHIB:
 			case ICMP_UNREACH_FILTER_PROHIB:
 				code = PRC_UNREACH_ADMIN_PROHIB;
 				break;
 
 			default:
 				goto badcode;
 		}
 		goto deliver;
 
 	case ICMP_TIMXCEED:
 		if (code > 1)
 			goto badcode;
 		code += PRC_TIMXCEED_INTRANS;
 		goto deliver;
 
 	case ICMP_PARAMPROB:
 		if (code > 1)
 			goto badcode;
 		code = PRC_PARAMPROB;
 		goto deliver;
 
 	case ICMP_SOURCEQUENCH:
 		if (code)
 			goto badcode;
 		code = PRC_QUENCH;
 	deliver:
 		/*
 		 * Problem with datagram; advise higher level routines.
 		 */
 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
 			icmpstat.icps_badlen++;
 			goto freeit;
 		}
 		icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
 		/* Discard ICMP's in response to multicast packets */
 		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
 			goto badcode;
 #ifdef ICMPPRINTFS
 		if (icmpprintfs)
 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
 #endif
 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
 		/*
 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
 		 * notification to TCP layer.
 		 */
 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
 		if (ctlfunc)
 			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
 				   (void *)&icp->icmp_ip);
 		break;
 
 	badcode:
 		icmpstat.icps_badcode++;
 		break;
 
 	case ICMP_ECHO:
 		if (!icmpbmcastecho
 		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
 			icmpstat.icps_bmcastecho++;
 			break;
 		}
 		icp->icmp_type = ICMP_ECHOREPLY;
 		if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
 			goto freeit;
 		else
 			goto reflect;
 
 	case ICMP_TSTAMP:
 		if (!icmpbmcastecho
 		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
 			icmpstat.icps_bmcasttstamp++;
 			break;
 		}
 		if (icmplen < ICMP_TSLEN) {
 			icmpstat.icps_badlen++;
 			break;
 		}
 		icp->icmp_type = ICMP_TSTAMPREPLY;
 		icp->icmp_rtime = iptime();
 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
 		if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
 			goto freeit;
 		else
 			goto reflect;
 
 	case ICMP_MASKREQ:
 		if (icmpmaskrepl == 0)
 			break;
 		/*
 		 * We are not able to respond with all ones broadcast
 		 * unless we receive it over a point-to-point interface.
 		 */
 		if (icmplen < ICMP_MASKLEN)
 			break;
 		switch (ip->ip_dst.s_addr) {
 
 		case INADDR_BROADCAST:
 		case INADDR_ANY:
 			icmpdst.sin_addr = ip->ip_src;
 			break;
 
 		default:
 			icmpdst.sin_addr = ip->ip_dst;
 		}
 		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
 			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
 		if (ia == 0)
 			break;
 		if (ia->ia_ifp == 0)
 			break;
 		icp->icmp_type = ICMP_MASKREPLY;
 		if (icmpmaskfake == 0)
 			icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
 		else
 			icp->icmp_mask = icmpmaskfake;
 		if (ip->ip_src.s_addr == 0) {
 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
 			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
 			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
 		}
 reflect:
 		ip->ip_len += hlen;	/* since ip_input deducts this */
 		icmpstat.icps_reflect++;
 		icmpstat.icps_outhist[icp->icmp_type]++;
 		icmp_reflect(m);
 		return;
 
 	case ICMP_REDIRECT:
 		if (log_redirect) {
 			u_long src, dst, gw;
 
 			src = ntohl(ip->ip_src.s_addr);
 			dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
 			gw = ntohl(icp->icmp_gwaddr.s_addr);
 			printf("icmp redirect from %d.%d.%d.%d: "
 			       "%d.%d.%d.%d => %d.%d.%d.%d\n",
 			       (int)(src >> 24), (int)((src >> 16) & 0xff),
 			       (int)((src >> 8) & 0xff), (int)(src & 0xff),
 			       (int)(dst >> 24), (int)((dst >> 16) & 0xff),
 			       (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
 			       (int)(gw >> 24), (int)((gw >> 16) & 0xff),
 			       (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
 		}
 		/*
 		 * RFC1812 says we must ignore ICMP redirects if we
 		 * are acting as router.
 		 */
 		if (drop_redirect || ipforwarding)
 			break;
 		if (code > 3)
 			goto badcode;
 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
 			icmpstat.icps_badlen++;
 			break;
 		}
 		/*
 		 * Short circuit routing redirects to force
 		 * immediate change in the kernel's routing
 		 * tables.  The message is also handed to anyone
 		 * listening on a raw socket (e.g. the routing
 		 * daemon for use in updating its tables).
 		 */
 		icmpgw.sin_addr = ip->ip_src;
 		icmpdst.sin_addr = icp->icmp_gwaddr;
 #ifdef	ICMPPRINTFS
 		if (icmpprintfs) {
 			char buf[4 * sizeof "123"];
 			strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
 
 			printf("redirect dst %s to %s\n",
 			       buf, inet_ntoa(icp->icmp_gwaddr));
 		}
 #endif
 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
-		rtredirect((struct sockaddr *)&icmpsrc,
-		  (struct sockaddr *)&icmpdst,
-		  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
-		  (struct sockaddr *)&icmpgw);
+		for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
+			in_rtredirect((struct sockaddr *)&icmpsrc,
+			  (struct sockaddr *)&icmpdst,
+			  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
+			  (struct sockaddr *)&icmpgw, fibnum);
+		}
 		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
 #ifdef IPSEC
 		key_sa_routechange((struct sockaddr *)&icmpsrc);
 #endif
 		break;
 
 	/*
 	 * No kernel processing for the following;
 	 * just fall through to send to raw listener.
 	 */
 	case ICMP_ECHOREPLY:
 	case ICMP_ROUTERADVERT:
 	case ICMP_ROUTERSOLICIT:
 	case ICMP_TSTAMPREPLY:
 	case ICMP_IREQREPLY:
 	case ICMP_MASKREPLY:
 	default:
 		break;
 	}
 
 raw:
 	rip_input(m, off);
 	return;
 
 freeit:
 	m_freem(m);
 }
 
 /*
  * Reflect the ip packet back to the source
  */
 static void
 icmp_reflect(struct mbuf *m)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct ifaddr *ifa;
 	struct ifnet *ifn;
 	struct in_ifaddr *ia;
 	struct in_addr t;
 	struct mbuf *opts = 0;
 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
 
 	if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 	    IN_EXPERIMENTAL(ntohl(ip->ip_src.s_addr)) ||
             IN_LOOPBACK(ntohl(ip->ip_src.s_addr)) ||
 	    IN_ZERONET(ntohl(ip->ip_src.s_addr)) ) {
 		m_freem(m);	/* Bad return address */
 		icmpstat.icps_badaddr++;
 		goto done;	/* Ip_output() will check for broadcast */
 	}
 
 	t = ip->ip_dst;
 	ip->ip_dst = ip->ip_src;
 
 	/*
 	 * Source selection for ICMP replies:
 	 *
 	 * If the incoming packet was addressed directly to one of our
 	 * own addresses, use dst as the src for the reply.
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash)
 		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
 			goto match;
 	/*
 	 * If the incoming packet was addressed to one of our broadcast
 	 * addresses, use the first non-broadcast address which corresponds
 	 * to the incoming interface.
 	 */
 	if (m->m_pkthdr.rcvif != NULL &&
 	    m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    t.s_addr)
 				goto match;
 		}
 	}
 	/*
 	 * If the packet was transiting through us, use the address of
 	 * the interface the packet came through in.  If that interface
 	 * doesn't have a suitable IP address, the normal selection
 	 * criteria apply.
 	 */
 	if (icmp_rfi && m->m_pkthdr.rcvif != NULL) {
 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			goto match;
 		}
 	}
 	/*
 	 * If the incoming packet was not addressed directly to us, use
 	 * designated interface for icmp replies specified by sysctl
 	 * net.inet.icmp.reply_src (default not set). Otherwise continue
 	 * with normal source selection.
 	 */
 	if (reply_src[0] != '\0' && (ifn = ifunit(reply_src))) {
 		TAILQ_FOREACH(ifa, &ifn->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			goto match;
 		}
 	}
 	/*
 	 * If the packet was transiting through us, use the address of
 	 * the interface that is the closest to the packet source.
 	 * When we don't have a route back to the packet source, stop here
 	 * and drop the packet.
 	 */
-	ia = ip_rtaddr(ip->ip_dst);
+	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
 	if (ia == NULL) {
 		m_freem(m);
 		icmpstat.icps_noroute++;
 		goto done;
 	}
 match:
 #ifdef MAC
 	mac_netinet_icmp_replyinplace(m);
 #endif
 	t = IA_SIN(ia)->sin_addr;
 	ip->ip_src = t;
 	ip->ip_ttl = ip_defttl;
 
 	if (optlen > 0) {
 		register u_char *cp;
 		int opt, cnt;
 		u_int len;
 
 		/*
 		 * Retrieve any source routing from the incoming packet;
 		 * add on any record-route or timestamp options.
 		 */
 		cp = (u_char *) (ip + 1);
 		if ((opts = ip_srcroute(m)) == 0 &&
 		    (opts = m_gethdr(M_DONTWAIT, MT_DATA))) {
 			opts->m_len = sizeof(struct in_addr);
 			mtod(opts, struct in_addr *)->s_addr = 0;
 		}
 		if (opts) {
 #ifdef ICMPPRINTFS
 		    if (icmpprintfs)
 			    printf("icmp_reflect optlen %d rt %d => ",
 				optlen, opts->m_len);
 #endif
 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
 			    opt = cp[IPOPT_OPTVAL];
 			    if (opt == IPOPT_EOL)
 				    break;
 			    if (opt == IPOPT_NOP)
 				    len = 1;
 			    else {
 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
 					    break;
 				    len = cp[IPOPT_OLEN];
 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
 				        len > cnt)
 					    break;
 			    }
 			    /*
 			     * Should check for overflow, but it "can't happen"
 			     */
 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
 				opt == IPOPT_SECURITY) {
 				    bcopy((caddr_t)cp,
 					mtod(opts, caddr_t) + opts->m_len, len);
 				    opts->m_len += len;
 			    }
 		    }
 		    /* Terminate & pad, if necessary */
 		    cnt = opts->m_len % 4;
 		    if (cnt) {
 			    for (; cnt < 4; cnt++) {
 				    *(mtod(opts, caddr_t) + opts->m_len) =
 					IPOPT_EOL;
 				    opts->m_len++;
 			    }
 		    }
 #ifdef ICMPPRINTFS
 		    if (icmpprintfs)
 			    printf("%d\n", opts->m_len);
 #endif
 		}
 		/*
 		 * Now strip out original options by copying rest of first
 		 * mbuf's data back, and adjust the IP length.
 		 */
 		ip->ip_len -= optlen;
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = 5;
 		m->m_len -= optlen;
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len -= optlen;
 		optlen += sizeof(struct ip);
 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
 			 (unsigned)(m->m_len - sizeof(struct ip)));
 	}
 	m_tag_delete_nonpersistent(m);
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	icmp_send(m, opts);
 done:
 	if (opts)
 		(void)m_free(opts);
 }
 
 /*
  * Send an icmp packet back to the ip level,
  * after supplying a checksum.
  */
 static void
 icmp_send(struct mbuf *m, struct mbuf *opts)
 {
 	register struct ip *ip = mtod(m, struct ip *);
 	register int hlen;
 	register struct icmp *icp;
 
 	hlen = ip->ip_hl << 2;
 	m->m_data += hlen;
 	m->m_len -= hlen;
 	icp = mtod(m, struct icmp *);
 	icp->icmp_cksum = 0;
 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
 	m->m_data -= hlen;
 	m->m_len += hlen;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef ICMPPRINTFS
 	if (icmpprintfs) {
 		char buf[4 * sizeof "123"];
 		strcpy(buf, inet_ntoa(ip->ip_dst));
 		printf("icmp_send dst %s src %s\n",
 		       buf, inet_ntoa(ip->ip_src));
 	}
 #endif
 	(void) ip_output(m, opts, NULL, 0, NULL, NULL);
 }
 
 n_time
 iptime(void)
 {
 	struct timeval atv;
 	u_long t;
 
 	getmicrotime(&atv);
 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
 	return (htonl(t));
 }
 
 /*
  * Return the next larger or smaller MTU plateau (table from RFC 1191)
  * given current value MTU.  If DIR is less than zero, a larger plateau
  * is returned; otherwise, a smaller value is returned.
  */
 int
 ip_next_mtu(int mtu, int dir)
 {
 	static int mtutab[] = {
 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508,
 		296, 68, 0
 	};
 	int i, size;
 
 	size = (sizeof mtutab) / (sizeof mtutab[0]);
 	if (dir >= 0) {
 		for (i = 0; i < size; i++)
 			if (mtu > mtutab[i])
 				return mtutab[i];
 	} else {
 		for (i = size - 1; i >= 0; i--)
 			if (mtu < mtutab[i])
 				return mtutab[i];
 		if (mtu == mtutab[0])
 			return mtutab[0];
 	}
 	return 0;
 }
 
 
 /*
  * badport_bandlim() - check for ICMP bandwidth limit
  *
  *	Return 0 if it is ok to send an ICMP error response, -1 if we have
  *	hit our bandwidth limit and it is not ok.
  *
  *	If icmplim is <= 0, the feature is disabled and 0 is returned.
  *
  *	For now we separate the TCP and UDP subsystems w/ different 'which'
  *	values.  We may eventually remove this separation (and simplify the
  *	code further).
  *
  *	Note that the printing of the error message is delayed so we can
  *	properly print the icmp error rate that the system was trying to do
  *	(i.e. 22000/100 pps, etc...).  This can cause long delays in printing
  *	the 'final' error, but it doesn't make sense to solve the printing
  *	delay with more complex code.
  */
 
 int
 badport_bandlim(int which)
 {
 #define	N(a)	(sizeof (a) / sizeof (a[0]))
 	static struct rate {
 		const char	*type;
 		struct timeval	lasttime;
 		int		curpps;
 	} rates[BANDLIM_MAX+1] = {
 		{ "icmp unreach response" },
 		{ "icmp ping response" },
 		{ "icmp tstamp response" },
 		{ "closed port RST response" },
 		{ "open port RST response" },
 		{ "icmp6 unreach response" }
 	};
 
 	/*
 	 * Return ok status if feature disabled or argument out of range.
 	 */
 	if (icmplim > 0 && (u_int) which < N(rates)) {
 		struct rate *r = &rates[which];
 		int opps = r->curpps;
 
 		if (!ppsratecheck(&r->lasttime, &r->curpps, icmplim))
 			return -1;	/* discard packet */
 		/*
 		 * If we've dropped below the threshold after having
 		 * rate-limited traffic print the message.  This preserves
 		 * the previous behaviour at the expense of added complexity.
 		 */
 		if (icmplim_output && opps > icmplim)
 			printf("Limiting %s from %d to %d packets/sec\n",
 				r->type, opps, icmplim);
 	}
 	return 0;			/* okay to send packet */
 #undef N
 }
Index: head/sys/netinet/ip_input.c
===================================================================
--- head/sys/netinet/ip_input.c	(revision 178887)
+++ head/sys/netinet/ip_input.c	(revision 178888)
@@ -1,1621 +1,1621 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_carp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #ifdef DEV_CARP
 #include <netinet/ip_carp.h>
 #endif
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #endif /* IPSEC */
 
 #include <sys/socketvar.h>
 
 /* XXX: Temporary until ipfw_ether and ipfw_bridge are converted. */
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 
 #include <security/mac/mac_framework.h>
 
 int rsvp_on = 0;
 
 int	ipforwarding = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
     &ipforwarding, 0, "Enable IP forwarding between interfaces");
 
 static int	ipsendredirects = 1; /* XXX */
 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
     &ipsendredirects, 0, "Enable sending IP redirects");
 
 int	ip_defttl = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
     &ip_defttl, 0, "Maximum TTL on IP packets");
 
 static int	ip_keepfaith = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
     &ip_keepfaith,	0,
     "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
 
 static int	ip_sendsourcequench = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
     &ip_sendsourcequench, 0,
     "Enable the transmission of source quench packets");
 
 int	ip_do_randomid = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
     &ip_do_randomid, 0,
     "Assign random ip_id values");
 
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
  * and transmit implementation do not implement the Strong ES model,
  * setting this to 1 results in an odd hybrid.
  *
  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  * to translate the destination address to another local interface.
  *
  * XXX - ip_checkinterface must be disabled if you add IP aliases
  * to the loopback interface instead of the interface where the
  * packets for those addresses are received.
  */
 static int	ip_checkinterface = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
 
 struct pfil_head inet_pfil_hook;	/* Packet filter hooks */
 
 static struct	ifqueue ipintrq;
 static int	ipqmaxlen = IFQ_MAXLEN;
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
 struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
 u_long 	in_ifaddrhmask;				/* mask for hash table */
 
 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
     &ipintrq.ifq_drops, 0,
     "Number of packets dropped from the IP input queue");
 
 struct ipstat ipstat;
 SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 /*
  * IP datagram reassembly.
  */
 #define IPREASS_NHASH_LOG2      6
 #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
 #define IPREASS_HMASK           (IPREASS_NHASH - 1)
 #define IPREASS_HASH(x,y) \
 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
 
 static uma_zone_t ipq_zone;
 static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
 static struct mtx ipqlock;
 
 #define	IPQ_LOCK()	mtx_lock(&ipqlock)
 #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
 #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
 #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
 
 static void	maxnipq_update(void);
 static void	ipq_zone_change(void *);
 
 static int	maxnipq;	/* Administrative limit on # reass queues. */
 static int	nipq = 0;	/* Total # of reass queues */
 SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD,
     &nipq, 0, "Current number of IPv4 fragment reassembly queue entries");
 
 static int	maxfragsperpacket;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
     &maxfragsperpacket, 0,
     "Maximum number of IPv4 fragments allowed per packet");
 
 struct callout	ipport_tick_callout;
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 int	ipstealth = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
     &ipstealth, 0, "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 
 /*
  * ipfw_ether and ipfw_bridge hooks.
  * XXX: Temporary until those are converted to pfil_hooks as well.
  */
 ip_fw_chk_t *ip_fw_chk_ptr = NULL;
 ip_dn_io_t *ip_dn_io_ptr = NULL;
 int fw_one_pass = 1;
 
 static void	ip_freef(struct ipqhead *, struct ipq *);
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
 ip_init(void)
 {
 	struct protosw *pr;
 	int i;
 
 	TAILQ_INIT(&in_ifaddrhead);
 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip_init: PF_INET not found");
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip_protox[].
 	 */
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 		}
 
 	/* Initialize packet filter hooks. */
 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
 	inet_pfil_hook.ph_af = AF_INET;
 	if ((i = pfil_head_register(&inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
 	/* Initialize IP reassembly queue. */
 	IPQ_LOCK_INIT();
 	for (i = 0; i < IPREASS_NHASH; i++)
 	    TAILQ_INIT(&ipq[i]);
 	maxnipq = nmbclusters / 32;
 	maxfragsperpacket = 16;
 	ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
 	    NULL, UMA_ALIGN_PTR, 0);
 	maxnipq_update();
 
 	/* Start ipport_tick. */
 	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
 	ipport_tick(NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
 		NULL, EVENTHANDLER_PRI_ANY);
 
 	/* Initialize various other remaining things. */
 	ip_id = time_second & 0xffff;
 	ipintrq.ifq_maxlen = ipqmaxlen;
 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
 	netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
 }
 
 void
 ip_fini(void *xtp)
 {
 
 	callout_stop(&ipport_tick_callout);
 }
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	int    checkif, hlen = 0;
 	u_short sum;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
 	M_ASSERTPKTHDR(m);
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		/*
 		 * Firewall or NAT changed destination to local.
 		 * We expect ip_len and ip_off to be in host byte order.
 		 */
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		goto ours;
 	}
 
 	ipstat.ips_total++;
 
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto tooshort;
 
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		ipstat.ips_toosmall++;
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_v != IPVERSION) {
 		ipstat.ips_badvers++;
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		ipstat.ips_badhlen++;
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			ipstat.ips_badhlen++;
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	/* 127/8 must not appear on wire - RFC1122 */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
 			ipstat.ips_badaddr++;
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		ipstat.ips_badsum++;
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		/* packet is dropped by traffic conditioner */
 		return;
 #endif
 
 	/*
 	 * Convert fields to host representation.
 	 */
 	ip->ip_len = ntohs(ip->ip_len);
 	if (ip->ip_len < hlen) {
 		ipstat.ips_badlen++;
 		goto bad;
 	}
 	ip->ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < ip->ip_len) {
 tooshort:
 		ipstat.ips_tooshort++;
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip->ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip->ip_len;
 			m->m_pkthdr.len = ip->ip_len;
 		} else
 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
 	}
 #ifdef IPSEC
 	/*
 	 * Bypass packet filtering for packets from a tunnel (gif).
 	 */
 	if (ip_ipsec_filtertunnel(m))
 		goto passin;
 #endif /* IPSEC */
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing (e.g.
 	 *     by NAT rewriting).  When this happens, tell
 	 *     ip_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passin;
 
 	odst = ip->ip_dst;
 	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
 	    PFIL_IN, NULL) != 0)
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 
 	ip = mtod(m, struct ip *);
 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
 
 #ifdef IPFIREWALL_FORWARD
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
 	if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
 		/*
 		 * Directly ship on the packet.  This allows to forward packets
 		 * that were destined for us to some other directly connected
 		 * host.
 		 */
 		ip_forward(m, dchg);
 		return;
 	}
 #endif /* IPFIREWALL_FORWARD */
 
 passin:
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is 
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP) 
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) if IP forwarding is disabled and the packet
 	 * is not locally generated and the packet is not subject to
 	 * 'ipfw fwd'.
 	 *
 	 * XXX - Checking also should be disabled if the destination
 	 * address is ipnat'ed to a different interface.
 	 *
 	 * XXX - Checking is incompatible with IP aliases added
 	 * to the loopback interface instead of the interface where
 	 * the packets are received.
 	 *
 	 * XXX - This is the case for carp vhost IPs as well so we
 	 * insert a workaround. If the packet got here, we already
 	 * checked with carp_iamatch() and carp_forus().
 	 */
 	checkif = ip_checkinterface && (ipforwarding == 0) && 
 	    m->m_pkthdr.rcvif != NULL &&
 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
 #ifdef DEV_CARP
 	    !m->m_pkthdr.rcvif->if_carp &&
 #endif
 	    (dchg == 0);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 
 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
 			goto ours;
 	}
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (m->m_pkthdr.rcvif != NULL &&
 	    m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    ip->ip_dst.s_addr)
 				goto ours;
 			if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr)
 				goto ours;
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
 				goto ours;
 #endif
 		}
 	}
 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
 		ipstat.ips_cantforward++;
 		m_freem(m);
 		return;
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		struct in_multi *inm;
 		if (ip_mrouter) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward &&
 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
 				ipstat.ips_cantforward++;
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP)
 				goto ours;
 			ipstat.ips_forward++;
 		}
 		/*
 		 * See if we belong to the destination multicast group on the
 		 * arrival interface.
 		 */
 		IN_MULTI_LOCK();
 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
 		IN_MULTI_UNLOCK();
 		if (inm == NULL) {
 			ipstat.ips_notmember++;
 			m_freem(m);
 			return;
 		}
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 
 	/*
 	 * FAITH(Firewall Aided Internet Translator)
 	 */
 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
 		if (ip_keepfaith) {
 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 
 				goto ours;
 		}
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (ipforwarding == 0) {
 		ipstat.ips_cantforward++;
 		m_freem(m);
 	} else {
 #ifdef IPSEC
 		if (ip_ipsec_fwd(m))
 			goto bad;
 #endif /* IPSEC */
 		ip_forward(m, dchg);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (ipstealth && hlen > sizeof (struct ip) &&
 	    ip_dooptions(m, 1))
 		return;
 #endif /* IPSTEALTH */
 
 	/* Count the packet in the ip address stats */
 	if (ia != NULL) {
 		ia->ia_ifa.if_ipackets++;
 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
 	}
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 	}
 
 	/*
 	 * Further protocols expect the packet length to be w/o the
 	 * IP header.
 	 */
 	ip->ip_len -= hlen;
 
 #ifdef IPSEC
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
 	if (ip_ipsec_input(m))
 		goto bad;
 #endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	ipstat.ips_delivered++;
 
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
  * max has slightly different semantics than the sysctl, for historical
  * reasons.
  */
 static void
 maxnipq_update(void)
 {
 
 	/*
 	 * -1 for unlimited allocation.
 	 */
 	if (maxnipq < 0)
 		uma_zone_set_max(ipq_zone, 0);
 	/*
 	 * Positive number for specific bound.
 	 */
 	if (maxnipq > 0)
 		uma_zone_set_max(ipq_zone, maxnipq);
 	/*
 	 * Zero specifies no further fragment queue allocation -- set the
 	 * bound very low, but rely on implementation elsewhere to actually
 	 * prevent allocation and reclaim current queues.
 	 */
 	if (maxnipq == 0)
 		uma_zone_set_max(ipq_zone, 1);
 }
 
 static void
 ipq_zone_change(void *tag)
 {
 
 	if (maxnipq > 0 && maxnipq < (nmbclusters / 32)) {
 		maxnipq = nmbclusters / 32;
 		maxnipq_update();
 	}
 }
 
 static int
 sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	i = maxnipq;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * XXXRW: Might be a good idea to sanity check the argument and place
 	 * an extreme upper bound.
 	 */
 	if (i < -1)
 		return (EINVAL);
 	maxnipq = i;
 	maxnipq_update();
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
     NULL, 0, sysctl_maxnipq, "I",
     "Maximum number of IPv4 fragment reassembly queue entries");
 
 /*
  * Take incoming datagram fragment and try to reassemble it into
  * whole datagram.  If the argument is the first fragment or one
  * in between the function will return NULL and store the mbuf
  * in the fragment chain.  If the argument is the last fragment
  * the packet will be reassembled and the pointer to the new
  * mbuf returned for further processing.  Only m_tags attached
  * to the first packet/fragment are preserved.
  * The IP header is *NOT* adjusted out of iplen.
  */
 struct mbuf *
 ip_reass(struct mbuf *m)
 {
 	struct ip *ip;
 	struct mbuf *p, *q, *nq, *t;
 	struct ipq *fp = NULL;
 	struct ipqhead *head;
 	int i, hlen, next;
 	u_int8_t ecn, ecn0;
 	u_short hash;
 
 	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
 	if (maxnipq == 0 || maxfragsperpacket == 0) {
 		ipstat.ips_fragments++;
 		ipstat.ips_fragdropped++;
 		m_freem(m);
 		return (NULL);
 	}
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
 	head = &ipq[hash];
 	IPQ_LOCK();
 
 	/*
 	 * Look for queue of fragments
 	 * of this datagram.
 	 */
 	TAILQ_FOREACH(fp, head, ipq_list)
 		if (ip->ip_id == fp->ipq_id &&
 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
 #ifdef MAC
 		    mac_ipq_match(m, fp) &&
 #endif
 		    ip->ip_p == fp->ipq_p)
 			goto found;
 
 	fp = NULL;
 
 	/*
 	 * Attempt to trim the number of allocated fragment queues if it
 	 * exceeds the administrative limit.
 	 */
 	if ((nipq > maxnipq) && (maxnipq > 0)) {
 		/*
 		 * drop something from the tail of the current queue
 		 * before proceeding further
 		 */
 		struct ipq *q = TAILQ_LAST(head, ipqhead);
 		if (q == NULL) {   /* gak */
 			for (i = 0; i < IPREASS_NHASH; i++) {
 				struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
 				if (r) {
 					ipstat.ips_fragtimeout += r->ipq_nfrags;
 					ip_freef(&ipq[i], r);
 					break;
 				}
 			}
 		} else {
 			ipstat.ips_fragtimeout += q->ipq_nfrags;
 			ip_freef(head, q);
 		}
 	}
 
 found:
 	/*
 	 * Adjust ip_len to not reflect header,
 	 * convert offset of this to bytes.
 	 */
 	ip->ip_len -= hlen;
 	if (ip->ip_off & IP_MF) {
 		/*
 		 * Make sure that fragments have a data length
 		 * that's a non-zero multiple of 8 bytes.
 		 */
 		if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
 			ipstat.ips_toosmall++; /* XXX */
 			goto dropfrag;
 		}
 		m->m_flags |= M_FRAG;
 	} else
 		m->m_flags &= ~M_FRAG;
 	ip->ip_off <<= 3;
 
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	ipstat.ips_fragments++;
 	m->m_pkthdr.header = ip;
 
 	/* Previous ip_reass() started here. */
 	/*
 	 * Presence of header sizes in mbufs
 	 * would confuse code below.
 	 */
 	m->m_data += hlen;
 	m->m_len -= hlen;
 
 	/*
 	 * If first fragment to arrive, create a reassembly queue.
 	 */
 	if (fp == NULL) {
 		fp = uma_zalloc(ipq_zone, M_NOWAIT);
 		if (fp == NULL)
 			goto dropfrag;
 #ifdef MAC
 		if (mac_ipq_init(fp, M_NOWAIT) != 0) {
 			uma_zfree(ipq_zone, fp);
 			fp = NULL;
 			goto dropfrag;
 		}
 		mac_ipq_create(m, fp);
 #endif
 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
 		nipq++;
 		fp->ipq_nfrags = 1;
 		fp->ipq_ttl = IPFRAGTTL;
 		fp->ipq_p = ip->ip_p;
 		fp->ipq_id = ip->ip_id;
 		fp->ipq_src = ip->ip_src;
 		fp->ipq_dst = ip->ip_dst;
 		fp->ipq_frags = m;
 		m->m_nextpkt = NULL;
 		goto done;
 	} else {
 		fp->ipq_nfrags++;
 #ifdef MAC
 		mac_ipq_update(m, fp);
 #endif
 	}
 
 #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
 
 	/*
 	 * Handle ECN by comparing this segment with the first one;
 	 * if CE is set, do not lose CE.
 	 * drop if CE and not-ECT are mixed for the same packet.
 	 */
 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
 	if (ecn == IPTOS_ECN_CE) {
 		if (ecn0 == IPTOS_ECN_NOTECT)
 			goto dropfrag;
 		if (ecn0 != IPTOS_ECN_CE)
 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
 	}
 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
 		goto dropfrag;
 
 	/*
 	 * Find a segment which begins after this one does.
 	 */
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
 		if (GETIP(q)->ip_off > ip->ip_off)
 			break;
 
 	/*
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us, otherwise
 	 * stick new segment in the proper place.
 	 *
 	 * If some of the data is dropped from the the preceding
 	 * segment, then it's checksum is invalidated.
 	 */
 	if (p) {
 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
 		if (i > 0) {
 			if (i >= ip->ip_len)
 				goto dropfrag;
 			m_adj(m, i);
 			m->m_pkthdr.csum_flags = 0;
 			ip->ip_off += i;
 			ip->ip_len -= i;
 		}
 		m->m_nextpkt = p->m_nextpkt;
 		p->m_nextpkt = m;
 	} else {
 		m->m_nextpkt = fp->ipq_frags;
 		fp->ipq_frags = m;
 	}
 
 	/*
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
 	     q = nq) {
 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
 		if (i < GETIP(q)->ip_len) {
 			GETIP(q)->ip_len -= i;
 			GETIP(q)->ip_off += i;
 			m_adj(q, i);
 			q->m_pkthdr.csum_flags = 0;
 			break;
 		}
 		nq = q->m_nextpkt;
 		m->m_nextpkt = nq;
 		ipstat.ips_fragdropped++;
 		fp->ipq_nfrags--;
 		m_freem(q);
 	}
 
 	/*
 	 * Check for complete reassembly and perform frag per packet
 	 * limiting.
 	 *
 	 * Frag limiting is performed here so that the nth frag has
 	 * a chance to complete the packet before we drop the packet.
 	 * As a result, n+1 frags are actually allowed per packet, but
 	 * only n will ever be stored. (n = maxfragsperpacket.)
 	 *
 	 */
 	next = 0;
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
 		if (GETIP(q)->ip_off != next) {
 			if (fp->ipq_nfrags > maxfragsperpacket) {
 				ipstat.ips_fragdropped += fp->ipq_nfrags;
 				ip_freef(head, fp);
 			}
 			goto done;
 		}
 		next += GETIP(q)->ip_len;
 	}
 	/* Make sure the last packet didn't have the IP_MF flag */
 	if (p->m_flags & M_FRAG) {
 		if (fp->ipq_nfrags > maxfragsperpacket) {
 			ipstat.ips_fragdropped += fp->ipq_nfrags;
 			ip_freef(head, fp);
 		}
 		goto done;
 	}
 
 	/*
 	 * Reassembly is complete.  Make sure the packet is a sane size.
 	 */
 	q = fp->ipq_frags;
 	ip = GETIP(q);
 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
 		ipstat.ips_toolong++;
 		ipstat.ips_fragdropped += fp->ipq_nfrags;
 		ip_freef(head, fp);
 		goto done;
 	}
 
 	/*
 	 * Concatenate fragments.
 	 */
 	m = q;
 	t = m->m_next;
 	m->m_next = NULL;
 	m_cat(m, t);
 	nq = q->m_nextpkt;
 	q->m_nextpkt = NULL;
 	for (q = nq; q != NULL; q = nq) {
 		nq = q->m_nextpkt;
 		q->m_nextpkt = NULL;
 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
 		m_cat(m, q);
 	}
 	/*
 	 * In order to do checksumming faster we do 'end-around carry' here
 	 * (and not in for{} loop), though it implies we are not going to
 	 * reassemble more than 64k fragments.
 	 */
 	m->m_pkthdr.csum_data =
 	    (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16);
 #ifdef MAC
 	mac_ipq_reassemble(fp, m);
 	mac_ipq_destroy(fp);
 #endif
 
 	/*
 	 * Create header for new ip packet by modifying header of first
 	 * packet;  dequeue and discard fragment reassembly header.
 	 * Make header visible.
 	 */
 	ip->ip_len = (ip->ip_hl << 2) + next;
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 	TAILQ_REMOVE(head, fp, ipq_list);
 	nipq--;
 	uma_zfree(ipq_zone, fp);
 	m->m_len += (ip->ip_hl << 2);
 	m->m_data -= (ip->ip_hl << 2);
 	/* some debugging cruft by sklower, below, will go away soon */
 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
 		m_fixhdr(m);
 	ipstat.ips_reassembled++;
 	IPQ_UNLOCK();
 	return (m);
 
 dropfrag:
 	ipstat.ips_fragdropped++;
 	if (fp != NULL)
 		fp->ipq_nfrags--;
 	m_freem(m);
 done:
 	IPQ_UNLOCK();
 	return (NULL);
 
 #undef GETIP
 }
 
 /*
  * Free a fragment reassembly header and all
  * associated datagrams.
  */
 static void
 ip_freef(struct ipqhead *fhp, struct ipq *fp)
 {
 	struct mbuf *q;
 
 	IPQ_LOCK_ASSERT();
 
 	while (fp->ipq_frags) {
 		q = fp->ipq_frags;
 		fp->ipq_frags = q->m_nextpkt;
 		m_freem(q);
 	}
 	TAILQ_REMOVE(fhp, fp, ipq_list);
 	uma_zfree(ipq_zone, fp);
 	nipq--;
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo(void)
 {
 	struct ipq *fp;
 	int i;
 
 	IPQ_LOCK();
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
 			struct ipq *fpp;
 
 			fpp = fp;
 			fp = TAILQ_NEXT(fp, ipq_list);
 			if(--fpp->ipq_ttl == 0) {
 				ipstat.ips_fragtimeout += fpp->ipq_nfrags;
 				ip_freef(&ipq[i], fpp);
 			}
 		}
 	}
 	/*
 	 * If we are over the maximum number of fragments
 	 * (due to the limit being lowered), drain off
 	 * enough to get down to the new limit.
 	 */
 	if (maxnipq >= 0 && nipq > maxnipq) {
 		for (i = 0; i < IPREASS_NHASH; i++) {
 			while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) {
 				ipstat.ips_fragdropped +=
 				    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
 			}
 		}
 	}
 	IPQ_UNLOCK();
 }
 
 /*
  * Drain off all datagram fragments.
  */
 void
 ip_drain(void)
 {
 	int     i;
 
 	IPQ_LOCK();
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		while(!TAILQ_EMPTY(&ipq[i])) {
 			ipstat.ips_fragdropped +=
 			    TAILQ_FIRST(&ipq[i])->ipq_nfrags;
 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
 		}
 	}
 	IPQ_UNLOCK();
 	in_rtqdrain();
 }
 
 /*
  * The protocol to be inserted into ip_protox[] must be already registered
  * in inetsw[], either statically or through pf_proto_register().
  */
 int
 ipproto_register(u_char ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto == 0)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/* Find the protocol position in inetsw[] and set the index. */
 	for (pr = inetdomain.dom_protosw;
 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX) {
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 				return (0);
 			} else
 				return (EINVAL);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ipproto_unregister(u_char ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto == 0)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip_protox[ipproto] = pr - inetsw;
 	return (0);
 }
 
 /*
  * Given address of next destination (final or next hop),
  * return internet address info of interface to be used to get there.
  */
 struct in_ifaddr *
-ip_rtaddr(struct in_addr dst)
+ip_rtaddr(struct in_addr dst, u_int fibnum)
 {
 	struct route sro;
 	struct sockaddr_in *sin;
 	struct in_ifaddr *ifa;
 
 	bzero(&sro, sizeof(sro));
 	sin = (struct sockaddr_in *)&sro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = dst;
-	rtalloc_ign(&sro, RTF_CLONING);
+	in_rtalloc_ign(&sro, RTF_CLONING, fibnum);
 
 	if (sro.ro_rt == NULL)
 		return (NULL);
 
 	ifa = ifatoia(sro.ro_rt->rt_ifa);
 	RTFREE(sro.ro_rt);
 	return (ifa);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 void
 ip_forward(struct mbuf *m, int srcrt)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia = NULL;
 	struct mbuf *mcopy;
 	struct in_addr dest;
 	struct route ro;
 	int error, type = 0, code = 0, mtu = 0;
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		ipstat.ips_cantforward++;
 		m_freem(m);
 		return;
 	}
 #ifdef IPSTEALTH
 	if (!ipstealth) {
 #endif
 		if (ip->ip_ttl <= IPTTLDEC) {
 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
 			    0, 0);
 			return;
 		}
 #ifdef IPSTEALTH
 	}
 #endif
 
-	ia = ip_rtaddr(ip->ip_dst);
+	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
 	if (!srcrt && ia == NULL) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return;
 	}
 
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
 	 * We don't use m_copy() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	MGETHDR(mcopy, M_DONTWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
 		mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
 
 #ifdef IPSTEALTH
 	if (!ipstealth) {
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	dest.s_addr = 0;
 	if (!srcrt && ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) {
 		struct sockaddr_in *sin;
 		struct rtentry *rt;
 
 		bzero(&ro, sizeof(ro));
 		sin = (struct sockaddr_in *)&ro.ro_dst;
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(*sin);
 		sin->sin_addr = ip->ip_dst;
-		rtalloc_ign(&ro, RTF_CLONING);
+		in_rtalloc_ign(&ro, RTF_CLONING, M_GETFIB(m));
 
 		rt = ro.ro_rt;
 
 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
 			u_long src = ntohl(ip->ip_src.s_addr);
 
 			if (RTA(rt) &&
 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
 				if (rt->rt_flags & RTF_GATEWAY)
 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
 				else
 					dest.s_addr = ip->ip_dst.s_addr;
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
 			}
 		}
 		if (rt)
 			RTFREE(rt);
 	}
 
 	/*
 	 * Try to cache the route MTU from ip_output so we can consider it for
 	 * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
 	 */
 	bzero(&ro, sizeof(ro));
-	rtalloc_ign(&ro, RTF_CLONING);
+	rtalloc_ign_fib(&ro, RTF_CLONING, M_GETFIB(m));
 
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 
 	if (error == EMSGSIZE && ro.ro_rt)
 		mtu = ro.ro_rt->rt_rmx.rmx_mtu;
 	if (ro.ro_rt)
 		RTFREE(ro.ro_rt);
 
 	if (error)
 		ipstat.ips_cantforward++;
 	else {
 		ipstat.ips_forward++;
 		if (type)
 			ipstat.ips_redirectsent++;
 		else {
 			if (mcopy)
 				m_freem(mcopy);
 			return;
 		}
 	}
 	if (mcopy == NULL)
 		return;
 
 	switch (error) {
 
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:		/* shouldn't happen, checked above */
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 
 #ifdef IPSEC
 		/* 
 		 * If IPsec is configured for this path,
 		 * override any possibly mtu value set by ip_output.
 		 */ 
 		mtu = ip_ipsec_mtu(m, mtu);
 #endif /* IPSEC */
 		/*
 		 * If the MTU was set before make sure we are below the
 		 * interface MTU.
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
 		 * current packet size.
 		 */
 		if (mtu != 0) {
 			if (ia != NULL)
 				mtu = min(mtu, ia->ia_ifp->if_mtu);
 		} else {
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
 				mtu = ip_next_mtu(ip->ip_len, 0);
 		}
 		ipstat.ips_cantfrag++;
 		break;
 
 	case ENOBUFS:
 		/*
 		 * A router should not generate ICMP_SOURCEQUENCH as
 		 * required in RFC1812 Requirements for IP Version 4 Routers.
 		 * Source quench could be a big problem under DoS attacks,
 		 * or if the underlying interface is rate-limited.
 		 * Those who need source quench packets may re-enable them
 		 * via the net.inet.ip.sendsourcequench sysctl.
 		 */
 		if (ip_sendsourcequench == 0) {
 			m_freem(mcopy);
 			return;
 		} else {
 			type = ICMP_SOURCEQUENCH;
 			code = 0;
 		}
 		break;
 
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		return;
 	}
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 
 void
 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
     struct mbuf *m)
 {
 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
 		struct bintime bt;
 
 		bintime(&bt);
 		if (inp->inp_socket->so_options & SO_BINTIME) {
 			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
 			SCM_BINTIME, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 			struct timeval tv;
 
 			bintime2timeval(&bt, &tv);
 			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
 				SCM_TIMESTAMP, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if (((ifp = m->m_pkthdr.rcvif)) 
 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if ((sdp->sdl_family != AF_LINK)
 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:	
 			sdl2->sdl_len
 				= offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
 			IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 }
 
 /*
  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
  * compiled.
  */
 static int ip_rsvp_on;
 struct socket *ip_rsvpd;
 int
 ip_rsvp_init(struct socket *so)
 {
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!ip_rsvp_on) {
 		ip_rsvp_on = 1;
 		rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 	ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (ip_rsvp_on) {
 		ip_rsvp_on = 0;
 		rsvp_on--;
 	}
 	return 0;
 }
 
 void
 rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
 {
 	if (rsvp_input_p) { /* call the real one if loaded */
 		rsvp_input_p(m, off);
 		return;
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 	
 	if (!rsvp_on) {
 		m_freem(m);
 		return;
 	}
 
 	if (ip_rsvpd != NULL) { 
 		rip_input(m, off);
 		return;
 	}
 	/* Drop the packet */
 	m_freem(m);
 }
Index: head/sys/netinet/ip_mroute.c
===================================================================
--- head/sys/netinet/ip_mroute.c	(revision 178887)
+++ head/sys/netinet/ip_mroute.c	(revision 178888)
@@ -1,3151 +1,3151 @@
 /*-
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
  */
 
 /*
  * IP multicast forwarding procedures
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
  * Modified by Bill Fenner, PARC, April 1995
  * Modified by Ahmed Helmy, SGI, June 1996
  * Modified by George Edmond Eddy (Rusty), ISI, February 1998
  * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
  * Modified by Hitoshi Asaeda, WIDE, August 2000
  * Modified by Pavlin Radoslavov, ICSI, October 2002
  *
  * MROUTING Revision: 3.5
  * and PIM-SMv2 and PIM-DM support, advanced API support,
  * bandwidth metering and signaling
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mac.h"
 #include "opt_mrouting.h"
 
 #define _PIM_VT 1
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/igmp.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/pim.h>
 #include <netinet/pim_var.h>
 #include <netinet/udp.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/ip6_var.h>
 #endif
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 /*
  * Control debugging code for rsvp and multicast routing code.
  * Can only set them with the debugger.
  */
 static u_int    rsvpdebug;		/* non-zero enables debugging	*/
 
 static u_int	mrtdebug;		/* any set of the flags below	*/
 #define		DEBUG_MFC	0x02
 #define		DEBUG_FORWARD	0x04
 #define		DEBUG_EXPIRE	0x08
 #define		DEBUG_XMIT	0x10
 #define		DEBUG_PIM	0x20
 
 #define		VIFI_INVALID	((vifi_t) -1)
 
 #define M_HASCL(m)	((m)->m_flags & M_EXT)
 
 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables");
 
 /*
  * Locking.  We use two locks: one for the virtual interface table and
  * one for the forwarding table.  These locks may be nested in which case
  * the VIF lock must always be taken first.  Note that each lock is used
  * to cover not only the specific data structure but also related data
  * structures.  It may be better to add more fine-grained locking later;
  * it's not clear how performance-critical this code is.
  *
  * XXX: This module could particularly benefit from being cleaned
  *      up to use the <sys/queue.h> macros.
  *
  */
 
 static struct mrtstat	mrtstat;
 SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW,
     &mrtstat, mrtstat,
     "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)");
 
 static struct mfc	*mfctable[MFCTBLSIZ];
 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
     &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]",
     "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)");
 
 static struct mtx mrouter_mtx;
 #define	MROUTER_LOCK()		mtx_lock(&mrouter_mtx)
 #define	MROUTER_UNLOCK()	mtx_unlock(&mrouter_mtx)
 #define	MROUTER_LOCK_ASSERT()	mtx_assert(&mrouter_mtx, MA_OWNED)
 #define	MROUTER_LOCK_INIT()	\
 	mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
 #define	MROUTER_LOCK_DESTROY()	mtx_destroy(&mrouter_mtx)
 
 static struct mtx mfc_mtx;
 #define	MFC_LOCK()	mtx_lock(&mfc_mtx)
 #define	MFC_UNLOCK()	mtx_unlock(&mfc_mtx)
 #define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
 #define	MFC_LOCK_INIT()	mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF)
 #define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
 
 static struct vif	viftable[MAXVIFS];
 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD,
     &viftable, sizeof(viftable), "S,vif[MAXVIFS]",
     "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
 
 static struct mtx vif_mtx;
 #define	VIF_LOCK()	mtx_lock(&vif_mtx)
 #define	VIF_UNLOCK()	mtx_unlock(&vif_mtx)
 #define	VIF_LOCK_ASSERT()	mtx_assert(&vif_mtx, MA_OWNED)
 #define	VIF_LOCK_INIT()	mtx_init(&vif_mtx, "mroute vif table", NULL, MTX_DEF)
 #define	VIF_LOCK_DESTROY()	mtx_destroy(&vif_mtx)
 
 static u_char		nexpire[MFCTBLSIZ];
 
 static eventhandler_tag if_detach_event_tag = NULL;
 
 static struct callout expire_upcalls_ch;
 
 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
 #define		UPCALL_EXPIRE	6		/* number of timeouts	*/
 
 #define ENCAP_TTL 64
 
 /*
  * Bandwidth meter variables and constants
  */
 static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
 /*
  * Pending timeouts are stored in a hash table, the key being the
  * expiration time. Periodically, the entries are analysed and processed.
  */
 #define BW_METER_BUCKETS	1024
 static struct bw_meter *bw_meter_timers[BW_METER_BUCKETS];
 static struct callout bw_meter_ch;
 #define BW_METER_PERIOD (hz)		/* periodical handling of bw meters */
 
 /*
  * Pending upcalls are stored in a vector which is flushed when
  * full, or periodically
  */
 static struct bw_upcall	bw_upcalls[BW_UPCALLS_MAX];
 static u_int	bw_upcalls_n; /* # of pending upcalls */
 static struct callout bw_upcalls_ch;
 #define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
 
 static struct pimstat pimstat;
 
 SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 SYSCTL_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD,
     &pimstat, pimstat,
     "PIM Statistics (struct pimstat, netinet/pim_var.h)");
 
 static u_long	pim_squelch_wholepkt = 0;
 SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
     &pim_squelch_wholepkt, 0,
     "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
 
 extern  struct domain inetdomain;
 struct protosw in_pim_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		pim_input,
 	.pr_output =		(pr_output_t*)rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 static const struct encaptab *pim_encap_cookie;
 
 #ifdef INET6
 /* ip6_mroute.c glue */
 extern struct in6_protosw in6_pim_protosw;
 static const struct encaptab *pim6_encap_cookie;
 
 extern int X_ip6_mrouter_set(struct socket *, struct sockopt *);
 extern int X_ip6_mrouter_get(struct socket *, struct sockopt *);
 extern int X_ip6_mrouter_done(void);
 extern int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
 extern int X_mrt6_ioctl(int, caddr_t);
 #endif
 
 static int pim_encapcheck(const struct mbuf *, int, int, void *);
 
 /*
  * Note: the PIM Register encapsulation adds the following in front of a
  * data packet:
  *
  * struct pim_encap_hdr {
  *    struct ip ip;
  *    struct pim_encap_pimhdr  pim;
  * }
  *
  */
 
 struct pim_encap_pimhdr {
 	struct pim pim;
 	uint32_t   flags;
 };
 
 static struct ip pim_encap_iphdr = {
 #if BYTE_ORDER == LITTLE_ENDIAN
 	sizeof(struct ip) >> 2,
 	IPVERSION,
 #else
 	IPVERSION,
 	sizeof(struct ip) >> 2,
 #endif
 	0,			/* tos */
 	sizeof(struct ip),	/* total length */
 	0,			/* id */
 	0,			/* frag offset */
 	ENCAP_TTL,
 	IPPROTO_PIM,
 	0,			/* checksum */
 };
 
 static struct pim_encap_pimhdr pim_encap_pimhdr = {
     {
 	PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */
 	0,			/* reserved */
 	0,			/* checksum */
     },
     0				/* flags */
 };
 
 static struct ifnet multicast_register_if;
 static vifi_t reg_vif_num = VIFI_INVALID;
 
 /*
  * Private variables.
  */
 static vifi_t	   numvifs;
 
 static u_long	X_ip_mcast_src(int vifi);
 static int	X_ip_mforward(struct ip *ip, struct ifnet *ifp,
 			struct mbuf *m, struct ip_moptions *imo);
 static int	X_ip_mrouter_done(void);
 static int	X_ip_mrouter_get(struct socket *so, struct sockopt *m);
 static int	X_ip_mrouter_set(struct socket *so, struct sockopt *m);
 static int	X_legal_vif_num(int vif);
-static int	X_mrt_ioctl(int cmd, caddr_t data);
+static int	X_mrt_ioctl(int cmd, caddr_t data, int fibnum);
 
 static int get_sg_cnt(struct sioc_sg_req *);
 static int get_vif_cnt(struct sioc_vif_req *);
 static void if_detached_event(void *arg __unused, struct ifnet *);
 static int ip_mrouter_init(struct socket *, int);
 static int add_vif(struct vifctl *);
 static int del_vif_locked(vifi_t);
 static int del_vif(vifi_t);
 static int add_mfc(struct mfcctl2 *);
 static int del_mfc(struct mfcctl2 *);
 static int set_api_config(uint32_t *); /* chose API capabilities */
 static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
 static int set_assert(int);
 static void expire_upcalls(void *);
 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
 static void phyint_send(struct ip *, struct vif *, struct mbuf *);
 static void send_packet(struct vif *, struct mbuf *);
 
 /*
  * Bandwidth monitoring
  */
 static void free_bw_list(struct bw_meter *list);
 static int add_bw_upcall(struct bw_upcall *);
 static int del_bw_upcall(struct bw_upcall *);
 static void bw_meter_receive_packet(struct bw_meter *x, int plen,
 		struct timeval *nowp);
 static void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp);
 static void bw_upcalls_send(void);
 static void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp);
 static void unschedule_bw_meter(struct bw_meter *x);
 static void bw_meter_process(void);
 static void expire_bw_upcalls_send(void *);
 static void expire_bw_meter_process(void *);
 
 static int pim_register_send(struct ip *, struct vif *,
 		struct mbuf *, struct mfc *);
 static int pim_register_send_rp(struct ip *, struct vif *,
 		struct mbuf *, struct mfc *);
 static int pim_register_send_upcall(struct ip *, struct vif *,
 		struct mbuf *, struct mfc *);
 static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *);
 
 /*
  * whether or not special PIM assert processing is enabled.
  */
 static int pim_assert;
 /*
  * Rate limit for assert notification messages, in usec
  */
 #define ASSERT_MSG_TIME		3000000
 
 /*
  * Kernel multicast routing API capabilities and setup.
  * If more API capabilities are added to the kernel, they should be
  * recorded in `mrt_api_support'.
  */
 static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
 					 MRT_MFC_FLAGS_BORDER_VIF |
 					 MRT_MFC_RP |
 					 MRT_MFC_BW_UPCALL);
 static uint32_t mrt_api_config = 0;
 
 /*
  * Hash function for a source, group entry
  */
 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
 			((g) >> 20) ^ ((g) >> 10) ^ (g))
 
 /*
  * Find a route for a given origin IP address and Multicast group address
  * Statistics are updated by the caller if needed
  * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
  */
 static struct mfc *
 mfc_find(in_addr_t o, in_addr_t g)
 {
     struct mfc *rt;
 
     MFC_LOCK_ASSERT();
 
     for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next)
 	if ((rt->mfc_origin.s_addr == o) &&
 		(rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL))
 	    break;
     return rt;
 }
 
 /*
  * Macros to compute elapsed time efficiently
  * Borrowed from Van Jacobson's scheduling code
  */
 #define TV_DELTA(a, b, delta) {					\
 	int xxs;						\
 	delta = (a).tv_usec - (b).tv_usec;			\
 	if ((xxs = (a).tv_sec - (b).tv_sec)) {			\
 		switch (xxs) {					\
 		case 2:						\
 		      delta += 1000000;				\
 		      /* FALLTHROUGH */				\
 		case 1:						\
 		      delta += 1000000;				\
 		      break;					\
 		default:					\
 		      delta += (1000000 * xxs);			\
 		}						\
 	}							\
 }
 
 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 
 /*
  * Handle MRT setsockopt commands to modify the multicast routing tables.
  */
 static int
 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
     int	error, optval;
     vifi_t	vifi;
     struct	vifctl vifc;
     struct	mfcctl2 mfc;
     struct	bw_upcall bw_upcall;
     uint32_t	i;
 
     if (so != ip_mrouter && sopt->sopt_name != MRT_INIT)
 	return EPERM;
 
     error = 0;
     switch (sopt->sopt_name) {
     case MRT_INIT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	error = ip_mrouter_init(so, optval);
 	break;
 
     case MRT_DONE:
 	error = ip_mrouter_done();
 	break;
 
     case MRT_ADD_VIF:
 	error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 	if (error)
 	    break;
 	error = add_vif(&vifc);
 	break;
 
     case MRT_DEL_VIF:
 	error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 	if (error)
 	    break;
 	error = del_vif(vifi);
 	break;
 
     case MRT_ADD_MFC:
     case MRT_DEL_MFC:
 	/*
 	 * select data size depending on API version.
 	 */
 	if (sopt->sopt_name == MRT_ADD_MFC &&
 		mrt_api_config & MRT_API_FLAGS_ALL) {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2),
 				sizeof(struct mfcctl2));
 	} else {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl),
 				sizeof(struct mfcctl));
 	    bzero((caddr_t)&mfc + sizeof(struct mfcctl),
 			sizeof(mfc) - sizeof(struct mfcctl));
 	}
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_MFC)
 	    error = add_mfc(&mfc);
 	else
 	    error = del_mfc(&mfc);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	set_assert(optval);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
 	if (!error)
 	    error = set_api_config(&i);
 	if (!error)
 	    error = sooptcopyout(sopt, &i, sizeof i);
 	break;
 
     case MRT_ADD_BW_UPCALL:
     case MRT_DEL_BW_UPCALL:
 	error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall,
 				sizeof bw_upcall);
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_BW_UPCALL)
 	    error = add_bw_upcall(&bw_upcall);
 	else
 	    error = del_bw_upcall(&bw_upcall);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle MRT getsockopt commands
  */
 static int
 X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
     int error;
     static int version = 0x0305; /* !!! why is this here? XXX */
 
     switch (sopt->sopt_name) {
     case MRT_VERSION:
 	error = sooptcopyout(sopt, &version, sizeof version);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
 	break;
 
     case MRT_API_SUPPORT:
 	error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyout(sopt, &mrt_api_config, sizeof mrt_api_config);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle ioctl commands to obtain information from the cache
  */
 static int
-X_mrt_ioctl(int cmd, caddr_t data)
+X_mrt_ioctl(int cmd, caddr_t data, int fibnum)
 {
     int error = 0;
 
     /*
      * Currently the only function calling this ioctl routine is rtioctl().
      * Typically, only root can create the raw socket in order to execute
      * this ioctl method, however the request might be coming from a prison
      */
     error = priv_check(curthread, PRIV_NETINET_MROUTE);
     if (error)
 	return (error);
     switch (cmd) {
     case (SIOCGETVIFCNT):
 	error = get_vif_cnt((struct sioc_vif_req *)data);
 	break;
 
     case (SIOCGETSGCNT):
 	error = get_sg_cnt((struct sioc_sg_req *)data);
 	break;
 
     default:
 	error = EINVAL;
 	break;
     }
     return error;
 }
 
 /*
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
 get_sg_cnt(struct sioc_sg_req *req)
 {
     struct mfc *rt;
 
     MFC_LOCK();
     rt = mfc_find(req->src.s_addr, req->grp.s_addr);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 	return EADDRNOTAVAIL;
     }
     req->pktcnt = rt->mfc_pkt_cnt;
     req->bytecnt = rt->mfc_byte_cnt;
     req->wrong_if = rt->mfc_wrong_if;
     MFC_UNLOCK();
     return 0;
 }
 
 /*
  * returns the input and output packet and byte counts on the vif provided
  */
 static int
 get_vif_cnt(struct sioc_vif_req *req)
 {
     vifi_t vifi = req->vifi;
 
     VIF_LOCK();
     if (vifi >= numvifs) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
 
     req->icount = viftable[vifi].v_pkt_in;
     req->ocount = viftable[vifi].v_pkt_out;
     req->ibytes = viftable[vifi].v_bytes_in;
     req->obytes = viftable[vifi].v_bytes_out;
     VIF_UNLOCK();
 
     return 0;
 }
 
 static void
 ip_mrouter_reset(void)
 {
     bzero((caddr_t)mfctable, sizeof(mfctable));
     bzero((caddr_t)nexpire, sizeof(nexpire));
 
     pim_assert = 0;
     mrt_api_config = 0;
 
     callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE);
 
     bw_upcalls_n = 0;
     bzero((caddr_t)bw_meter_timers, sizeof(bw_meter_timers));
     callout_init(&bw_upcalls_ch, CALLOUT_MPSAFE);
     callout_init(&bw_meter_ch, CALLOUT_MPSAFE);
 }
 
 static void
 if_detached_event(void *arg __unused, struct ifnet *ifp)
 {
     vifi_t vifi;
     int i;
     struct mfc *mfc;
     struct mfc *nmfc;
     struct mfc **ppmfc;	/* Pointer to previous node's next-pointer */
     struct rtdetq *pq;
     struct rtdetq *npq;
 
     MROUTER_LOCK();
     if (ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
     }
 
     /*
      * Tear down multicast forwarder state associated with this ifnet.
      * 1. Walk the vif list, matching vifs against this ifnet.
      * 2. Walk the multicast forwarding cache (mfc) looking for
      *    inner matches with this vif's index.
      * 3. Free any pending mbufs for this mfc.
      * 4. Free the associated mfc entry and state associated with this vif.
      *    Be very careful about unlinking from a singly-linked list whose
      *    "head node" is a pointer in a simple array.
      * 5. Free vif state. This should disable ALLMULTI on the interface.
      */
     VIF_LOCK();
     MFC_LOCK();
     for (vifi = 0; vifi < numvifs; vifi++) {
 	if (viftable[vifi].v_ifp != ifp)
 		continue;
 	for (i = 0; i < MFCTBLSIZ; i++) {
 	    ppmfc = &mfctable[i];
 	    for (mfc = mfctable[i]; mfc != NULL; ) {
 		nmfc = mfc->mfc_next;
 		if (mfc->mfc_parent == vifi) {
 		    for (pq = mfc->mfc_stall; pq != NULL; ) {
 			npq = pq->next;
 			m_freem(pq->m);
 			free(pq, M_MRTABLE);
 			pq = npq;
 		    }
 		    free_bw_list(mfc->mfc_bw_meter);
 		    free(mfc, M_MRTABLE);
 		    *ppmfc = nmfc;
 		} else {
 		    ppmfc = &mfc->mfc_next;
 		}
 		mfc = nmfc;
 	    }
 	}
 	del_vif_locked(vifi);
     }
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     MROUTER_UNLOCK();
 }
                         
 /*
  * Enable multicast routing
  */
 static int
 ip_mrouter_init(struct socket *so, int version)
 {
     if (mrtdebug)
 	log(LOG_DEBUG, "ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
 	    so->so_type, so->so_proto->pr_protocol);
 
     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP)
 	return EOPNOTSUPP;
 
     if (version != 1)
 	return ENOPROTOOPT;
 
     MROUTER_LOCK();
 
     if (ip_mrouter != NULL) {
 	MROUTER_UNLOCK();
 	return EADDRINUSE;
     }
 
     if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 
         if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
     if (if_detach_event_tag == NULL) {
 	MROUTER_UNLOCK();
 	return (ENOMEM);
     }
 
     callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL);
 
     callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD,
 	expire_bw_upcalls_send, NULL);
     callout_reset(&bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, NULL);
 
     ip_mrouter = so;
 
     MROUTER_UNLOCK();
 
     if (mrtdebug)
 	log(LOG_DEBUG, "ip_mrouter_init\n");
 
     return 0;
 }
 
 /*
  * Disable multicast routing
  */
 static int
 X_ip_mrouter_done(void)
 {
     vifi_t vifi;
     int i;
     struct ifnet *ifp;
     struct ifreq ifr;
     struct mfc *rt;
     struct rtdetq *rte;
 
     MROUTER_LOCK();
 
     if (ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return EINVAL;
     }
 
     /*
      * Detach/disable hooks to the reset of the system.
      */
     ip_mrouter = NULL;
     mrt_api_config = 0;
 
     VIF_LOCK();
     /*
      * For each phyint in use, disable promiscuous reception of all IP
      * multicasts.
      */
     for (vifi = 0; vifi < numvifs; vifi++) {
 	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
 		!(viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
 	    struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr);
 
 	    so->sin_len = sizeof(struct sockaddr_in);
 	    so->sin_family = AF_INET;
 	    so->sin_addr.s_addr = INADDR_ANY;
 	    ifp = viftable[vifi].v_ifp;
 	    if_allmulti(ifp, 0);
 	}
     }
     bzero((caddr_t)viftable, sizeof(viftable));
     numvifs = 0;
     pim_assert = 0;
     VIF_UNLOCK();
     EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 
     /*
      * Free all multicast forwarding cache entries.
      */
     callout_stop(&expire_upcalls_ch);
     callout_stop(&bw_upcalls_ch);
     callout_stop(&bw_meter_ch);
 
     MFC_LOCK();
     for (i = 0; i < MFCTBLSIZ; i++) {
 	for (rt = mfctable[i]; rt != NULL; ) {
 	    struct mfc *nr = rt->mfc_next;
 
 	    for (rte = rt->mfc_stall; rte != NULL; ) {
 		struct rtdetq *n = rte->next;
 
 		m_freem(rte->m);
 		free(rte, M_MRTABLE);
 		rte = n;
 	    }
 	    free_bw_list(rt->mfc_bw_meter);
 	    free(rt, M_MRTABLE);
 	    rt = nr;
 	}
     }
     bzero((caddr_t)mfctable, sizeof(mfctable));
     bzero((caddr_t)nexpire, sizeof(nexpire));
     bw_upcalls_n = 0;
     bzero(bw_meter_timers, sizeof(bw_meter_timers));
     MFC_UNLOCK();
 
     reg_vif_num = VIFI_INVALID;
 
     MROUTER_UNLOCK();
 
     if (mrtdebug)
 	log(LOG_DEBUG, "ip_mrouter_done\n");
 
     return 0;
 }
 
 /*
  * Set PIM assert processing global
  */
 static int
 set_assert(int i)
 {
     if ((i != 1) && (i != 0))
 	return EINVAL;
 
     pim_assert = i;
 
     return 0;
 }
 
 /*
  * Configure API capabilities
  */
 int
 set_api_config(uint32_t *apival)
 {
     int i;
 
     /*
      * We can set the API capabilities only if it is the first operation
      * after MRT_INIT. I.e.:
      *  - there are no vifs installed
      *  - pim_assert is not enabled
      *  - the MFC table is empty
      */
     if (numvifs > 0) {
 	*apival = 0;
 	return EPERM;
     }
     if (pim_assert) {
 	*apival = 0;
 	return EPERM;
     }
     for (i = 0; i < MFCTBLSIZ; i++) {
 	if (mfctable[i] != NULL) {
 	    *apival = 0;
 	    return EPERM;
 	}
     }
 
     mrt_api_config = *apival & mrt_api_support;
     *apival = mrt_api_config;
 
     return 0;
 }
 
 /*
  * Add a vif to the vif table
  */
 static int
 add_vif(struct vifctl *vifcp)
 {
     struct vif *vifp = viftable + vifcp->vifc_vifi;
     struct sockaddr_in sin = {sizeof sin, AF_INET};
     struct ifaddr *ifa;
     struct ifnet *ifp;
     int error;
 
     VIF_LOCK();
     if (vifcp->vifc_vifi >= MAXVIFS) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
     /* rate limiting is no longer supported by this code */
     if (vifcp->vifc_rate_limit != 0) {
 	log(LOG_ERR, "rate limiting is no longer supported\n");
 	VIF_UNLOCK();
 	return EINVAL;
     }
     if (vifp->v_lcl_addr.s_addr != INADDR_ANY) {
 	VIF_UNLOCK();
 	return EADDRINUSE;
     }
     if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) {
 	VIF_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /* Find the interface with an address in AF_INET family */
     if (vifcp->vifc_flags & VIFF_REGISTER) {
 	/*
 	 * XXX: Because VIFF_REGISTER does not really need a valid
 	 * local interface (e.g. it could be 127.0.0.2), we don't
 	 * check its address.
 	 */
 	ifp = NULL;
     } else {
 	sin.sin_addr = vifcp->vifc_lcl_addr;
 	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 	if (ifa == NULL) {
 	    VIF_UNLOCK();
 	    return EADDRNOTAVAIL;
 	}
 	ifp = ifa->ifa_ifp;
     }
 
     if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
 	log(LOG_ERR, "tunnels are no longer supported\n");
 	VIF_UNLOCK();
 	return EOPNOTSUPP;
     } else if (vifcp->vifc_flags & VIFF_REGISTER) {
 	ifp = &multicast_register_if;
 	if (mrtdebug)
 	    log(LOG_DEBUG, "Adding a register vif, ifp: %p\n",
 		    (void *)&multicast_register_if);
 	if (reg_vif_num == VIFI_INVALID) {
 	    if_initname(&multicast_register_if, "register_vif", 0);
 	    multicast_register_if.if_flags = IFF_LOOPBACK;
 	    reg_vif_num = vifcp->vifc_vifi;
 	}
     } else {		/* Make sure the interface supports multicast */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 	    VIF_UNLOCK();
 	    return EOPNOTSUPP;
 	}
 
 	/* Enable promiscuous reception of all IP multicasts from the if */
 	error = if_allmulti(ifp, 1);
 	if (error) {
 	    VIF_UNLOCK();
 	    return error;
 	}
     }
 
     vifp->v_flags     = vifcp->vifc_flags;
     vifp->v_threshold = vifcp->vifc_threshold;
     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
     vifp->v_ifp       = ifp;
     vifp->v_rsvp_on   = 0;
     vifp->v_rsvpd     = NULL;
     /* initialize per vif pkt counters */
     vifp->v_pkt_in    = 0;
     vifp->v_pkt_out   = 0;
     vifp->v_bytes_in  = 0;
     vifp->v_bytes_out = 0;
     bzero(&vifp->v_route, sizeof(vifp->v_route));
 
     /* Adjust numvifs up if the vifi is higher than numvifs */
     if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
 
     VIF_UNLOCK();
 
     if (mrtdebug)
 	log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x\n",
 	    vifcp->vifc_vifi,
 	    (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr),
 	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
 	    (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr),
 	    vifcp->vifc_threshold);
 
     return 0;
 }
 
 /*
  * Delete a vif from the vif table
  */
 static int
 del_vif_locked(vifi_t vifi)
 {
     struct vif *vifp;
 
     VIF_LOCK_ASSERT();
 
     if (vifi >= numvifs) {
 	return EINVAL;
     }
     vifp = &viftable[vifi];
     if (vifp->v_lcl_addr.s_addr == INADDR_ANY) {
 	return EADDRNOTAVAIL;
     }
 
     if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
 	if_allmulti(vifp->v_ifp, 0);
 
     if (vifp->v_flags & VIFF_REGISTER)
 	reg_vif_num = VIFI_INVALID;
 
     bzero((caddr_t)vifp, sizeof (*vifp));
 
     if (mrtdebug)
 	log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs);
 
     /* Adjust numvifs down */
     for (vifi = numvifs; vifi > 0; vifi--)
 	if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY)
 	    break;
     numvifs = vifi;
 
     return 0;
 }
 
 static int
 del_vif(vifi_t vifi)
 {
     int cc;
 
     VIF_LOCK();
     cc = del_vif_locked(vifi);
     VIF_UNLOCK();
 
     return cc;
 }
 
 /*
  * update an mfc entry without resetting counters and S,G addresses.
  */
 static void
 update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     int i;
 
     rt->mfc_parent = mfccp->mfcc_parent;
     for (i = 0; i < numvifs; i++) {
 	rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 	rt->mfc_flags[i] = mfccp->mfcc_flags[i] & mrt_api_config &
 	    MRT_MFC_FLAGS_ALL;
     }
     /* set the RP address */
     if (mrt_api_config & MRT_MFC_RP)
 	rt->mfc_rp = mfccp->mfcc_rp;
     else
 	rt->mfc_rp.s_addr = INADDR_ANY;
 }
 
 /*
  * fully initialize an mfc entry from the parameter.
  */
 static void
 init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     rt->mfc_origin     = mfccp->mfcc_origin;
     rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 
     update_mfc_params(rt, mfccp);
 
     /* initialize pkt counters per src-grp */
     rt->mfc_pkt_cnt    = 0;
     rt->mfc_byte_cnt   = 0;
     rt->mfc_wrong_if   = 0;
     rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 }
 
 
 /*
  * Add an mfc entry
  */
 static int
 add_mfc(struct mfcctl2 *mfccp)
 {
     struct mfc *rt;
     u_long hash;
     struct rtdetq *rte;
     u_short nstl;
 
     VIF_LOCK();
     MFC_LOCK();
 
     rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
 
     /* If an entry already exists, just update the fields */
     if (rt) {
 	if (mrtdebug & DEBUG_MFC)
 	    log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n",
 		(u_long)ntohl(mfccp->mfcc_origin.s_addr),
 		(u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 		mfccp->mfcc_parent);
 
 	update_mfc_params(rt, mfccp);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return 0;
     }
 
     /*
      * Find the entry for which the upcall was made and update
      */
     hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
     for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
 
 	if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
 		(rt->mfc_stall != NULL)) {
 
 	    if (nstl++)
 		log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
 		    "multiple kernel entries",
 		    (u_long)ntohl(mfccp->mfcc_origin.s_addr),
 		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 		    mfccp->mfcc_parent, (void *)rt->mfc_stall);
 
 	    if (mrtdebug & DEBUG_MFC)
 		log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
 		    (u_long)ntohl(mfccp->mfcc_origin.s_addr),
 		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 		    mfccp->mfcc_parent, (void *)rt->mfc_stall);
 
 	    init_mfc_params(rt, mfccp);
 
 	    rt->mfc_expire = 0;	/* Don't clean this guy up */
 	    nexpire[hash]--;
 
 	    /* free packets Qed at the end of this entry */
 	    for (rte = rt->mfc_stall; rte != NULL; ) {
 		struct rtdetq *n = rte->next;
 
 		ip_mdq(rte->m, rte->ifp, rt, -1);
 		m_freem(rte->m);
 		free(rte, M_MRTABLE);
 		rte = n;
 	    }
 	    rt->mfc_stall = NULL;
 	}
     }
 
     /*
      * It is possible that an entry is being inserted without an upcall
      */
     if (nstl == 0) {
 	if (mrtdebug & DEBUG_MFC)
 	    log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
 		hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr),
 		(u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 		mfccp->mfcc_parent);
 
 	for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
 	    if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 		    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
 		init_mfc_params(rt, mfccp);
 		if (rt->mfc_expire)
 		    nexpire[hash]--;
 		rt->mfc_expire = 0;
 		break; /* XXX */
 	    }
 	}
 	if (rt == NULL) {		/* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL) {
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return ENOBUFS;
 	    }
 
 	    init_mfc_params(rt, mfccp);
 	    rt->mfc_expire     = 0;
 	    rt->mfc_stall      = NULL;
 
 	    rt->mfc_bw_meter = NULL;
 	    /* insert new entry at head of hash chain */
 	    rt->mfc_next = mfctable[hash];
 	    mfctable[hash] = rt;
 	}
     }
     MFC_UNLOCK();
     VIF_UNLOCK();
     return 0;
 }
 
 /*
  * Delete an mfc entry
  */
 static int
 del_mfc(struct mfcctl2 *mfccp)
 {
     struct in_addr	origin;
     struct in_addr	mcastgrp;
     struct mfc		*rt;
     struct mfc		**nptr;
     u_long		hash;
     struct bw_meter	*list;
 
     origin = mfccp->mfcc_origin;
     mcastgrp = mfccp->mfcc_mcastgrp;
 
     if (mrtdebug & DEBUG_MFC)
 	log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
 	    (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
 
     MFC_LOCK();
 
     hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
     for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next)
 	if (origin.s_addr == rt->mfc_origin.s_addr &&
 		mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
 		rt->mfc_stall == NULL)
 	    break;
     if (rt == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     *nptr = rt->mfc_next;
 
     /*
      * free the bw_meter entries
      */
     list = rt->mfc_bw_meter;
     rt->mfc_bw_meter = NULL;
 
     free(rt, M_MRTABLE);
 
     free_bw_list(list);
 
     MFC_UNLOCK();
 
     return 0;
 }
 
 /*
  * Send a message to the routing daemon on the multicast routing socket
  */
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
 {
     if (s) {
 	SOCKBUF_LOCK(&s->so_rcv);
 	if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm,
 	    NULL) != 0) {
 	    sorwakeup_locked(s);
 	    return 0;
 	}
 	SOCKBUF_UNLOCK(&s->so_rcv);
     }
     m_freem(mm);
     return -1;
 }
 
 /*
  * IP multicast forwarding function. This function assumes that the packet
  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IP multicast group.
  *
  * The packet is returned unscathed to the caller, unless it is
  * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  */
 
 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
 
 static int
 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
     struct ip_moptions *imo)
 {
     struct mfc *rt;
     int error;
     vifi_t vifi;
 
     if (mrtdebug & DEBUG_FORWARD)
 	log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n",
 	    (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr),
 	    (void *)ifp);
 
     if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
 		((u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
 	/*
 	 * Packet arrived via a physical interface or
 	 * an encapsulated tunnel or a register_vif.
 	 */
     } else {
 	/*
 	 * Packet arrived through a source-route tunnel.
 	 * Source-route tunnels are no longer supported.
 	 */
 	static int last_log;
 	if (last_log != time_uptime) {
 	    last_log = time_uptime;
 	    log(LOG_ERR,
 		"ip_mforward: received source-routed packet from %lx\n",
 		(u_long)ntohl(ip->ip_src.s_addr));
 	}
 	return 1;
     }
 
     VIF_LOCK();
     MFC_LOCK();
     if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
 	if (ip->ip_ttl < MAXTTL)
 	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
 	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
 	    struct vif *vifp = viftable + vifi;
 
 	    printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s)\n",
 		(long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr),
 		vifi,
 		(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
 		vifp->v_ifp->if_xname);
 	}
 	error = ip_mdq(m, ifp, NULL, vifi);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     }
     if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
 	printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n",
 	    (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr));
 	if (!imo)
 	    printf("In fact, no options were specified at all\n");
     }
 
     /*
      * Don't forward a packet with time-to-live of zero or one,
      * or a packet destined to a local-only group.
      */
     if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return 0;
     }
 
     /*
      * Determine forwarding vifs from the forwarding cache table
      */
     ++mrtstat.mrts_mfc_lookups;
     rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr);
 
     /* Entry exists, so forward if necessary */
     if (rt != NULL) {
 	error = ip_mdq(m, ifp, rt, -1);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     } else {
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet & send message to routing daemon
 	 */
 
 	struct mbuf *mb0;
 	struct rtdetq *rte;
 	u_long hash;
 	int hlen = ip->ip_hl << 2;
 
 	++mrtstat.mrts_mfc_misses;
 
 	mrtstat.mrts_no_route++;
 	if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
 	    log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n",
 		(u_long)ntohl(ip->ip_src.s_addr),
 		(u_long)ntohl(ip->ip_dst.s_addr));
 
 	/*
 	 * Allocate mbufs early so that we don't do extra work if we are
 	 * just going to fail anyway.  Make sure to pullup the header so
 	 * that other people can't step on it.
 	 */
 	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT);
 	if (rte == NULL) {
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 	mb0 = m_copypacket(m, M_DONTWAIT);
 	if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
 	    mb0 = m_pullup(mb0, hlen);
 	if (mb0 == NULL) {
 	    free(rte, M_MRTABLE);
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	/* is there an upcall waiting for this flow ? */
 	hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
 	for (rt = mfctable[hash]; rt; rt = rt->mfc_next) {
 	    if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
 		    (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
 		    (rt->mfc_stall != NULL))
 		break;
 	}
 
 	if (rt == NULL) {
 	    int i;
 	    struct igmpmsg *im;
 	    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 	    struct mbuf *mm;
 
 	    /*
 	     * Locate the vifi for the incoming interface for this packet.
 	     * If none found, drop packet.
 	     */
 	    for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++)
 		;
 	    if (vifi >= numvifs)	/* vif not found, drop packet */
 		goto non_fatal;
 
 	    /* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL)
 		goto fail;
 	    /* Make a copy of the header to send to the user level process */
 	    mm = m_copy(mb0, 0, hlen);
 	    if (mm == NULL)
 		goto fail1;
 
 	    /*
 	     * Send message to routing daemon to install
 	     * a route into the kernel table
 	     */
 
 	    im = mtod(mm, struct igmpmsg *);
 	    im->im_msgtype = IGMPMSG_NOCACHE;
 	    im->im_mbz = 0;
 	    im->im_vif = vifi;
 
 	    mrtstat.mrts_upcalls++;
 
 	    k_igmpsrc.sin_addr = ip->ip_src;
 	    if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
 		log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
 		++mrtstat.mrts_upq_sockfull;
 fail1:
 		free(rt, M_MRTABLE);
 fail:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return ENOBUFS;
 	    }
 
 	    /* insert new entry at head of hash chain */
 	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
 	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
 	    rt->mfc_expire	      = UPCALL_EXPIRE;
 	    nexpire[hash]++;
 	    for (i = 0; i < numvifs; i++) {
 		rt->mfc_ttls[i] = 0;
 		rt->mfc_flags[i] = 0;
 	    }
 	    rt->mfc_parent = -1;
 
 	    rt->mfc_rp.s_addr = INADDR_ANY; /* clear the RP address */
 
 	    rt->mfc_bw_meter = NULL;
 
 	    /* link into table */
 	    rt->mfc_next   = mfctable[hash];
 	    mfctable[hash] = rt;
 	    rt->mfc_stall = rte;
 
 	} else {
 	    /* determine if q has overflowed */
 	    int npkts = 0;
 	    struct rtdetq **p;
 
 	    /*
 	     * XXX ouch! we need to append to the list, but we
 	     * only have a pointer to the front, so we have to
 	     * scan the entire list every time.
 	     */
 	    for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
 		npkts++;
 
 	    if (npkts > MAX_UPQ) {
 		mrtstat.mrts_upq_ovflw++;
 non_fatal:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return 0;
 	    }
 
 	    /* Add this entry to the end of the queue */
 	    *p = rte;
 	}
 
 	rte->m			= mb0;
 	rte->ifp		= ifp;
 	rte->next		= NULL;
 
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 
 	return 0;
     }
 }
 
 /*
  * Clean up the cache entry if upcall is not serviced
  */
 static void
 expire_upcalls(void *unused)
 {
     struct rtdetq *rte;
     struct mfc *mfc, **nptr;
     int i;
 
     MFC_LOCK();
     for (i = 0; i < MFCTBLSIZ; i++) {
 	if (nexpire[i] == 0)
 	    continue;
 	nptr = &mfctable[i];
 	for (mfc = *nptr; mfc != NULL; mfc = *nptr) {
 	    /*
 	     * Skip real cache entries
 	     * Make sure it wasn't marked to not expire (shouldn't happen)
 	     * If it expires now
 	     */
 	    if (mfc->mfc_stall != NULL && mfc->mfc_expire != 0 &&
 		    --mfc->mfc_expire == 0) {
 		if (mrtdebug & DEBUG_EXPIRE)
 		    log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
 			(u_long)ntohl(mfc->mfc_origin.s_addr),
 			(u_long)ntohl(mfc->mfc_mcastgrp.s_addr));
 		/*
 		 * drop all the packets
 		 * free the mbuf with the pkt, if, timing info
 		 */
 		for (rte = mfc->mfc_stall; rte; ) {
 		    struct rtdetq *n = rte->next;
 
 		    m_freem(rte->m);
 		    free(rte, M_MRTABLE);
 		    rte = n;
 		}
 		++mrtstat.mrts_cache_cleanups;
 		nexpire[i]--;
 
 		/*
 		 * free the bw_meter entries
 		 */
 		while (mfc->mfc_bw_meter != NULL) {
 		    struct bw_meter *x = mfc->mfc_bw_meter;
 
 		    mfc->mfc_bw_meter = x->bm_mfc_next;
 		    free(x, M_BWMETER);
 		}
 
 		*nptr = mfc->mfc_next;
 		free(mfc, M_MRTABLE);
 	    } else {
 		nptr = &mfc->mfc_next;
 	    }
 	}
     }
     MFC_UNLOCK();
 
     callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL);
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
 {
     struct ip  *ip = mtod(m, struct ip *);
     vifi_t vifi;
     int plen = ip->ip_len;
 
     VIF_LOCK_ASSERT();
 
     /*
      * If xmt_vif is not -1, send on only the requested vif.
      *
      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
      */
     if (xmt_vif < numvifs) {
 	if (viftable[xmt_vif].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, viftable + xmt_vif, m, rt);
 	else
 		phyint_send(ip, viftable + xmt_vif, m);
 	return 1;
     }
 
     /*
      * Don't forward if it didn't arrive from the parent vif for its origin.
      */
     vifi = rt->mfc_parent;
     if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
 	/* came in the wrong interface */
 	if (mrtdebug & DEBUG_FORWARD)
 	    log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
 		(void *)ifp, vifi, (void *)viftable[vifi].v_ifp);
 	++mrtstat.mrts_wrong_if;
 	++rt->mfc_wrong_if;
 	/*
 	 * If we are doing PIM assert processing, send a message
 	 * to the routing daemon.
 	 *
 	 * XXX: A PIM-SM router needs the WRONGVIF detection so it
 	 * can complete the SPT switch, regardless of the type
 	 * of the iif (broadcast media, GRE tunnel, etc).
 	 */
 	if (pim_assert && (vifi < numvifs) && viftable[vifi].v_ifp) {
 	    struct timeval now;
 	    u_long delta;
 
 	    if (ifp == &multicast_register_if)
 		pimstat.pims_rcv_registers_wrongiif++;
 
 	    /* Get vifi for the incoming packet */
 	    for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++)
 		;
 	    if (vifi >= numvifs)
 		return 0;	/* The iif is not found: ignore the packet. */
 
 	    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF)
 		return 0;	/* WRONGVIF disabled: ignore the packet */
 
 	    GET_TIME(now);
 
 	    TV_DELTA(now, rt->mfc_last_assert, delta);
 
 	    if (delta > ASSERT_MSG_TIME) {
 		struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 		struct igmpmsg *im;
 		int hlen = ip->ip_hl << 2;
 		struct mbuf *mm = m_copy(m, 0, hlen);
 
 		if (mm && (M_HASCL(mm) || mm->m_len < hlen))
 		    mm = m_pullup(mm, hlen);
 		if (mm == NULL)
 		    return ENOBUFS;
 
 		rt->mfc_last_assert = now;
 
 		im = mtod(mm, struct igmpmsg *);
 		im->im_msgtype	= IGMPMSG_WRONGVIF;
 		im->im_mbz		= 0;
 		im->im_vif		= vifi;
 
 		mrtstat.mrts_upcalls++;
 
 		k_igmpsrc.sin_addr = im->im_src;
 		if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
 		    log(LOG_WARNING,
 			"ip_mforward: ip_mrouter socket queue full\n");
 		    ++mrtstat.mrts_upq_sockfull;
 		    return ENOBUFS;
 		}
 	    }
 	}
 	return 0;
     }
 
     /* If I sourced this packet, it counts as output, else it was input. */
     if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
 	viftable[vifi].v_pkt_out++;
 	viftable[vifi].v_bytes_out += plen;
     } else {
 	viftable[vifi].v_pkt_in++;
 	viftable[vifi].v_bytes_in += plen;
     }
     rt->mfc_pkt_cnt++;
     rt->mfc_byte_cnt += plen;
 
     /*
      * For each vif, decide if a copy of the packet should be forwarded.
      * Forward if:
      *		- the ttl exceeds the vif's threshold
      *		- there are group members downstream on interface
      */
     for (vifi = 0; vifi < numvifs; vifi++)
 	if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
 	    viftable[vifi].v_pkt_out++;
 	    viftable[vifi].v_bytes_out += plen;
 	    if (viftable[vifi].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, viftable + vifi, m, rt);
 	    else
 		phyint_send(ip, viftable + vifi, m);
 	}
 
     /*
      * Perform upcall-related bw measuring.
      */
     if (rt->mfc_bw_meter != NULL) {
 	struct bw_meter *x;
 	struct timeval now;
 
 	GET_TIME(now);
 	MFC_LOCK_ASSERT();
 	for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
 	    bw_meter_receive_packet(x, plen, &now);
     }
 
     return 0;
 }
 
 /*
  * check if a vif number is legal/ok. This is used by ip_output.
  */
 static int
 X_legal_vif_num(int vif)
 {
     /* XXX unlocked, matter? */
     return (vif >= 0 && vif < numvifs);
 }
 
 /*
  * Return the local address used by this vif
  */
 static u_long
 X_ip_mcast_src(int vifi)
 {
     /* XXX unlocked, matter? */
     if (vifi >= 0 && vifi < numvifs)
 	return viftable[vifi].v_lcl_addr.s_addr;
     else
 	return INADDR_ANY;
 }
 
 static void
 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
 {
     struct mbuf *mb_copy;
     int hlen = ip->ip_hl << 2;
 
     VIF_LOCK_ASSERT();
 
     /*
      * Make a new reference to the packet; make sure that
      * the IP header is actually copied, not just referenced,
      * so that ip_output() only scribbles on the copy.
      */
     mb_copy = m_copypacket(m, M_DONTWAIT);
     if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
 	mb_copy = m_pullup(mb_copy, hlen);
     if (mb_copy == NULL)
 	return;
 
     send_packet(vifp, mb_copy);
 }
 
 static void
 send_packet(struct vif *vifp, struct mbuf *m)
 {
 	struct ip_moptions imo;
 	struct in_multi *imm[2];
 	int error;
 
 	VIF_LOCK_ASSERT();
 
 	imo.imo_multicast_ifp  = vifp->v_ifp;
 	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
 	imo.imo_multicast_loop = 1;
 	imo.imo_multicast_vif  = -1;
 	imo.imo_num_memberships = 0;
 	imo.imo_max_memberships = 2;
 	imo.imo_membership  = &imm[0];
 
 	/*
 	 * Re-entrancy should not be a problem here, because
 	 * the packets that we send out and are looped back at us
 	 * should get rejected because they appear to come from
 	 * the loopback interface, thus preventing looping.
 	 */
 	error = ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, &imo, NULL);
 	if (mrtdebug & DEBUG_XMIT) {
 	    log(LOG_DEBUG, "phyint_send on vif %td err %d\n",
 		vifp - viftable, error);
 	}
 }
 
 static int
 X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt)
 {
     int error, vifi;
 
     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
 	return EOPNOTSUPP;
 
     error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
     if (error)
 	return error;
 
     VIF_LOCK();
 
     if (vifi < 0 || vifi >= numvifs) {	/* Error if vif is invalid */
 	VIF_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     if (sopt->sopt_name == IP_RSVP_VIF_ON) {
 	/* Check if socket is available. */
 	if (viftable[vifi].v_rsvpd != NULL) {
 	    VIF_UNLOCK();
 	    return EADDRINUSE;
 	}
 
 	viftable[vifi].v_rsvpd = so;
 	/* This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!viftable[vifi].v_rsvp_on) {
 	    viftable[vifi].v_rsvp_on = 1;
 	    rsvp_on++;
 	}
     } else { /* must be VIF_OFF */
 	/*
 	 * XXX as an additional consistency check, one could make sure
 	 * that viftable[vifi].v_rsvpd == so, otherwise passing so as
 	 * first parameter is pretty useless.
 	 */
 	viftable[vifi].v_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (viftable[vifi].v_rsvp_on) {
 	    viftable[vifi].v_rsvp_on = 0;
 	    rsvp_on--;
 	}
     }
     VIF_UNLOCK();
     return 0;
 }
 
 static void
 X_ip_rsvp_force_done(struct socket *so)
 {
     int vifi;
 
     /* Don't bother if it is not the right type of socket. */
     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
 	return;
 
     VIF_LOCK();
 
     /* The socket may be attached to more than one vif...this
      * is perfectly legal.
      */
     for (vifi = 0; vifi < numvifs; vifi++) {
 	if (viftable[vifi].v_rsvpd == so) {
 	    viftable[vifi].v_rsvpd = NULL;
 	    /* This may seem silly, but we need to be sure we don't
 	     * over-decrement the RSVP counter, in case something slips up.
 	     */
 	    if (viftable[vifi].v_rsvp_on) {
 		viftable[vifi].v_rsvp_on = 0;
 		rsvp_on--;
 	    }
 	}
     }
 
     VIF_UNLOCK();
 }
 
 static void
 X_rsvp_input(struct mbuf *m, int off)
 {
     int vifi;
     struct ip *ip = mtod(m, struct ip *);
     struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET };
     struct ifnet *ifp;
 
     if (rsvpdebug)
 	printf("rsvp_input: rsvp_on %d\n",rsvp_on);
 
     /* Can still get packets with rsvp_on = 0 if there is a local member
      * of the group to which the RSVP packet is addressed.  But in this
      * case we want to throw the packet away.
      */
     if (!rsvp_on) {
 	m_freem(m);
 	return;
     }
 
     if (rsvpdebug)
 	printf("rsvp_input: check vifs\n");
 
 #ifdef DIAGNOSTIC
     M_ASSERTPKTHDR(m);
 #endif
 
     ifp = m->m_pkthdr.rcvif;
 
     VIF_LOCK();
     /* Find which vif the packet arrived on. */
     for (vifi = 0; vifi < numvifs; vifi++)
 	if (viftable[vifi].v_ifp == ifp)
 	    break;
 
     if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) {
 	/*
 	 * Drop the lock here to avoid holding it across rip_input.
 	 * This could make rsvpdebug printfs wrong.  If you care,
 	 * record the state of stuff before dropping the lock.
 	 */
 	VIF_UNLOCK();
 	/*
 	 * If the old-style non-vif-associated socket is set,
 	 * then use it.  Otherwise, drop packet since there
 	 * is no specific socket for this vif.
 	 */
 	if (ip_rsvpd != NULL) {
 	    if (rsvpdebug)
 		printf("rsvp_input: Sending packet up old-style socket\n");
 	    rip_input(m, off);  /* xxx */
 	} else {
 	    if (rsvpdebug && vifi == numvifs)
 		printf("rsvp_input: Can't find vif for packet.\n");
 	    else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL)
 		printf("rsvp_input: No socket defined for vif %d\n",vifi);
 	    m_freem(m);
 	}
 	return;
     }
     rsvp_src.sin_addr = ip->ip_src;
 
     if (rsvpdebug && m)
 	printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n",
 	       m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
 
     if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) {
 	if (rsvpdebug)
 	    printf("rsvp_input: Failed to append to socket\n");
     } else {
 	if (rsvpdebug)
 	    printf("rsvp_input: send packet up\n");
     }
     VIF_UNLOCK();
 }
 
 /*
  * Code for bandwidth monitors
  */
 
 /*
  * Define common interface for timeval-related methods
  */
 #define	BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp)
 #define	BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp))
 #define	BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp))
 
 static uint32_t
 compute_bw_meter_flags(struct bw_upcall *req)
 {
     uint32_t flags = 0;
 
     if (req->bu_flags & BW_UPCALL_UNIT_PACKETS)
 	flags |= BW_METER_UNIT_PACKETS;
     if (req->bu_flags & BW_UPCALL_UNIT_BYTES)
 	flags |= BW_METER_UNIT_BYTES;
     if (req->bu_flags & BW_UPCALL_GEQ)
 	flags |= BW_METER_GEQ;
     if (req->bu_flags & BW_UPCALL_LEQ)
 	flags |= BW_METER_LEQ;
 
     return flags;
 }
 
 /*
  * Add a bw_meter entry
  */
 static int
 add_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
 		BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
     struct timeval now;
     struct bw_meter *x;
     uint32_t flags;
 
     if (!(mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     /* Test if the flags are valid */
     if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES)))
 	return EINVAL;
     if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)))
 	return EINVAL;
     if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	    == (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	return EINVAL;
 
     /* Test if the threshold time interval is valid */
     if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <))
 	return EINVAL;
 
     flags = compute_bw_meter_flags(req);
 
     /*
      * Find if we have already same bw_meter entry
      */
     MFC_LOCK();
     mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
     for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) {
 	if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			   &req->bu_threshold.b_time, ==)) &&
 	    (x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 	    (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 	    (x->bm_flags & BW_METER_USER_FLAGS) == flags)  {
 	    MFC_UNLOCK();
 	    return 0;		/* XXX Already installed */
 	}
     }
 
     /* Allocate the new bw_meter entry */
     x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
     if (x == NULL) {
 	MFC_UNLOCK();
 	return ENOBUFS;
     }
 
     /* Set the new bw_meter entry */
     x->bm_threshold.b_time = req->bu_threshold.b_time;
     GET_TIME(now);
     x->bm_start_time = now;
     x->bm_threshold.b_packets = req->bu_threshold.b_packets;
     x->bm_threshold.b_bytes = req->bu_threshold.b_bytes;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags = flags;
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 
     /* Add the new bw_meter entry to the front of entries for this MFC */
     x->bm_mfc = mfc;
     x->bm_mfc_next = mfc->mfc_bw_meter;
     mfc->mfc_bw_meter = x;
     schedule_bw_meter(x, &now);
     MFC_UNLOCK();
 
     return 0;
 }
 
 static void
 free_bw_list(struct bw_meter *list)
 {
     while (list != NULL) {
 	struct bw_meter *x = list;
 
 	list = list->bm_mfc_next;
 	unschedule_bw_meter(x);
 	free(x, M_BWMETER);
     }
 }
 
 /*
  * Delete one or multiple bw_meter entries
  */
 static int
 del_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct bw_meter *x;
 
     if (!(mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     MFC_LOCK();
     /* Find the corresponding MFC entry */
     mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) {
 	/*
 	 * Delete all bw_meter entries for this mfc
 	 */
 	struct bw_meter *list;
 
 	list = mfc->mfc_bw_meter;
 	mfc->mfc_bw_meter = NULL;
 	free_bw_list(list);
 	MFC_UNLOCK();
 	return 0;
     } else {			/* Delete a single bw_meter entry */
 	struct bw_meter *prev;
 	uint32_t flags = 0;
 
 	flags = compute_bw_meter_flags(req);
 
 	/* Find the bw_meter entry to delete */
 	for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL;
 	     prev = x, x = x->bm_mfc_next) {
 	    if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			       &req->bu_threshold.b_time, ==)) &&
 		(x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 		(x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 		(x->bm_flags & BW_METER_USER_FLAGS) == flags)
 		break;
 	}
 	if (x != NULL) { /* Delete entry from the list for this MFC */
 	    if (prev != NULL)
 		prev->bm_mfc_next = x->bm_mfc_next;	/* remove from middle*/
 	    else
 		x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */
 
 	    unschedule_bw_meter(x);
 	    MFC_UNLOCK();
 	    /* Free the bw_meter entry */
 	    free(x, M_BWMETER);
 	    return 0;
 	} else {
 	    MFC_UNLOCK();
 	    return EINVAL;
 	}
     }
     /* NOTREACHED */
 }
 
 /*
  * Perform bandwidth measurement processing that may result in an upcall
  */
 static void
 bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
 {
     struct timeval delta;
 
     MFC_LOCK_ASSERT();
 
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     if (x->bm_flags & BW_METER_GEQ) {
 	/*
 	 * Processing for ">=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /* Reset the bw_meter entry */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should deliver an upcall
 	 */
 	if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) {
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 		x->bm_flags |= BW_METER_UPCALL_DELIVERED;
 	    }
 	}
     } else if (x->bm_flags & BW_METER_LEQ) {
 	/*
 	 * Processing for "<=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /*
 	     * We are behind time with the multicast forwarding table
 	     * scanning for "<=" type of bw_meter entries, so test now
 	     * if we should deliver an upcall.
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 	    }
 	    /* Reschedule the bw_meter entry */
 	    unschedule_bw_meter(x);
 	    schedule_bw_meter(x, nowp);
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should restart the measuring interval
 	 */
 	if ((x->bm_flags & BW_METER_UNIT_PACKETS &&
 	     x->bm_measured.b_packets <= x->bm_threshold.b_packets) ||
 	    (x->bm_flags & BW_METER_UNIT_BYTES &&
 	     x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) {
 	    /* Don't restart the measuring interval */
 	} else {
 	    /* Do restart the measuring interval */
 	    /*
 	     * XXX: note that we don't unschedule and schedule, because this
 	     * might be too much overhead per packet. Instead, when we process
 	     * all entries for a given timer hash bin, we check whether it is
 	     * really a timeout. If not, we reschedule at that time.
 	     */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
     }
 }
 
 /*
  * Prepare a bandwidth-related upcall
  */
 static void
 bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
 {
     struct timeval delta;
     struct bw_upcall *u;
 
     MFC_LOCK_ASSERT();
 
     /*
      * Compute the measured time interval
      */
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     /*
      * If there are too many pending upcalls, deliver them now
      */
     if (bw_upcalls_n >= BW_UPCALLS_MAX)
 	bw_upcalls_send();
 
     /*
      * Set the bw_upcall entry
      */
     u = &bw_upcalls[bw_upcalls_n++];
     u->bu_src = x->bm_mfc->mfc_origin;
     u->bu_dst = x->bm_mfc->mfc_mcastgrp;
     u->bu_threshold.b_time = x->bm_threshold.b_time;
     u->bu_threshold.b_packets = x->bm_threshold.b_packets;
     u->bu_threshold.b_bytes = x->bm_threshold.b_bytes;
     u->bu_measured.b_time = delta;
     u->bu_measured.b_packets = x->bm_measured.b_packets;
     u->bu_measured.b_bytes = x->bm_measured.b_bytes;
     u->bu_flags = 0;
     if (x->bm_flags & BW_METER_UNIT_PACKETS)
 	u->bu_flags |= BW_UPCALL_UNIT_PACKETS;
     if (x->bm_flags & BW_METER_UNIT_BYTES)
 	u->bu_flags |= BW_UPCALL_UNIT_BYTES;
     if (x->bm_flags & BW_METER_GEQ)
 	u->bu_flags |= BW_UPCALL_GEQ;
     if (x->bm_flags & BW_METER_LEQ)
 	u->bu_flags |= BW_UPCALL_LEQ;
 }
 
 /*
  * Send the pending bandwidth-related upcalls
  */
 static void
 bw_upcalls_send(void)
 {
     struct mbuf *m;
     int len = bw_upcalls_n * sizeof(bw_upcalls[0]);
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
     static struct igmpmsg igmpmsg = { 0,		/* unused1 */
 				      0,		/* unused2 */
 				      IGMPMSG_BW_UPCALL,/* im_msgtype */
 				      0,		/* im_mbz  */
 				      0,		/* im_vif  */
 				      0,		/* unused3 */
 				      { 0 },		/* im_src  */
 				      { 0 } };		/* im_dst  */
 
     MFC_LOCK_ASSERT();
 
     if (bw_upcalls_n == 0)
 	return;			/* No pending upcalls */
 
     bw_upcalls_n = 0;
 
     /*
      * Allocate a new mbuf, initialize it with the header and
      * the payload for the pending calls.
      */
     MGETHDR(m, M_DONTWAIT, MT_DATA);
     if (m == NULL) {
 	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
 	return;
     }
 
     m->m_len = m->m_pkthdr.len = 0;
     m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
     m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&bw_upcalls[0]);
 
     /*
      * Send the upcalls
      * XXX do we need to set the address in k_igmpsrc ?
      */
     mrtstat.mrts_upcalls++;
     if (socket_send(ip_mrouter, m, &k_igmpsrc) < 0) {
 	log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n");
 	++mrtstat.mrts_upq_sockfull;
     }
 }
 
 /*
  * Compute the timeout hash value for the bw_meter entries
  */
 #define	BW_METER_TIMEHASH(bw_meter, hash)				\
     do {								\
 	struct timeval next_timeval = (bw_meter)->bm_start_time;	\
 									\
 	BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \
 	(hash) = next_timeval.tv_sec;					\
 	if (next_timeval.tv_usec)					\
 	    (hash)++; /* XXX: make sure we don't timeout early */	\
 	(hash) %= BW_METER_BUCKETS;					\
     } while (0)
 
 /*
  * Schedule a timer to process periodically bw_meter entry of type "<="
  * by linking the entry in the proper hash bucket.
  */
 static void
 schedule_bw_meter(struct bw_meter *x, struct timeval *nowp)
 {
     int time_hash;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Reset the bw_meter entry
      */
     x->bm_start_time = *nowp;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 
     /*
      * Compute the timeout hash value and insert the entry
      */
     BW_METER_TIMEHASH(x, time_hash);
     x->bm_time_next = bw_meter_timers[time_hash];
     bw_meter_timers[time_hash] = x;
     x->bm_time_hash = time_hash;
 }
 
 /*
  * Unschedule the periodic timer that processes bw_meter entry of type "<="
  * by removing the entry from the proper hash bucket.
  */
 static void
 unschedule_bw_meter(struct bw_meter *x)
 {
     int time_hash;
     struct bw_meter *prev, *tmp;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Compute the timeout hash value and delete the entry
      */
     time_hash = x->bm_time_hash;
     if (time_hash >= BW_METER_BUCKETS)
 	return;		/* Entry was not scheduled */
 
     for (prev = NULL, tmp = bw_meter_timers[time_hash];
 	     tmp != NULL; prev = tmp, tmp = tmp->bm_time_next)
 	if (tmp == x)
 	    break;
 
     if (tmp == NULL)
 	panic("unschedule_bw_meter: bw_meter entry not found");
 
     if (prev != NULL)
 	prev->bm_time_next = x->bm_time_next;
     else
 	bw_meter_timers[time_hash] = x->bm_time_next;
 
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 }
 
 
 /*
  * Process all "<=" type of bw_meter that should be processed now,
  * and for each entry prepare an upcall if necessary. Each processed
  * entry is rescheduled again for the (periodic) processing.
  *
  * This is run periodically (once per second normally). On each round,
  * all the potentially matching entries are in the hash slot that we are
  * looking at.
  */
 static void
 bw_meter_process()
 {
     static uint32_t last_tv_sec;	/* last time we processed this */
 
     uint32_t loops;
     int i;
     struct timeval now, process_endtime;
 
     GET_TIME(now);
     if (last_tv_sec == now.tv_sec)
 	return;		/* nothing to do */
 
     loops = now.tv_sec - last_tv_sec;
     last_tv_sec = now.tv_sec;
     if (loops > BW_METER_BUCKETS)
 	loops = BW_METER_BUCKETS;
 
     MFC_LOCK();
     /*
      * Process all bins of bw_meter entries from the one after the last
      * processed to the current one. On entry, i points to the last bucket
      * visited, so we need to increment i at the beginning of the loop.
      */
     for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) {
 	struct bw_meter *x, *tmp_list;
 
 	if (++i >= BW_METER_BUCKETS)
 	    i = 0;
 
 	/* Disconnect the list of bw_meter entries from the bin */
 	tmp_list = bw_meter_timers[i];
 	bw_meter_timers[i] = NULL;
 
 	/* Process the list of bw_meter entries */
 	while (tmp_list != NULL) {
 	    x = tmp_list;
 	    tmp_list = tmp_list->bm_time_next;
 
 	    /* Test if the time interval is over */
 	    process_endtime = x->bm_start_time;
 	    BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time);
 	    if (BW_TIMEVALCMP(&process_endtime, &now, >)) {
 		/* Not yet: reschedule, but don't reset */
 		int time_hash;
 
 		BW_METER_TIMEHASH(x, time_hash);
 		if (time_hash == i && process_endtime.tv_sec == now.tv_sec) {
 		    /*
 		     * XXX: somehow the bin processing is a bit ahead of time.
 		     * Put the entry in the next bin.
 		     */
 		    if (++time_hash >= BW_METER_BUCKETS)
 			time_hash = 0;
 		}
 		x->bm_time_next = bw_meter_timers[time_hash];
 		bw_meter_timers[time_hash] = x;
 		x->bm_time_hash = time_hash;
 
 		continue;
 	    }
 
 	    /*
 	     * Test if we should deliver an upcall
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, &now);
 	    }
 
 	    /*
 	     * Reschedule for next processing
 	     */
 	    schedule_bw_meter(x, &now);
 	}
     }
 
     /* Send all upcalls that are pending delivery */
     bw_upcalls_send();
 
     MFC_UNLOCK();
 }
 
 /*
  * A periodic function for sending all upcalls that are pending delivery
  */
 static void
 expire_bw_upcalls_send(void *unused)
 {
     MFC_LOCK();
     bw_upcalls_send();
     MFC_UNLOCK();
 
     callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD,
 	expire_bw_upcalls_send, NULL);
 }
 
 /*
  * A periodic function for periodic scanning of the multicast forwarding
  * table for processing all "<=" bw_meter entries.
  */
 static void
 expire_bw_meter_process(void *unused)
 {
     if (mrt_api_config & MRT_MFC_BW_UPCALL)
 	bw_meter_process();
 
     callout_reset(&bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, NULL);
 }
 
 /*
  * End of bandwidth monitoring code
  */
 
 /*
  * Send the packet up to the user daemon, or eventually do kernel encapsulation
  *
  */
 static int
 pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
     struct mfc *rt)
 {
     struct mbuf *mb_copy, *mm;
 
     if (mrtdebug & DEBUG_PIM)
 	log(LOG_DEBUG, "pim_register_send: ");
 
     /*
      * Do not send IGMP_WHOLEPKT notifications to userland, if the
      * rendezvous point was unspecified, and we were told not to.
      */
     if (pim_squelch_wholepkt != 0 && (mrt_api_config & MRT_MFC_RP) &&
 	(rt->mfc_rp.s_addr == INADDR_ANY))
 	return 0;
 
     mb_copy = pim_register_prepare(ip, m);
     if (mb_copy == NULL)
 	return ENOBUFS;
 
     /*
      * Send all the fragments. Note that the mbuf for each fragment
      * is freed by the sending machinery.
      */
     for (mm = mb_copy; mm; mm = mb_copy) {
 	mb_copy = mm->m_nextpkt;
 	mm->m_nextpkt = 0;
 	mm = m_pullup(mm, sizeof(struct ip));
 	if (mm != NULL) {
 	    ip = mtod(mm, struct ip *);
 	    if ((mrt_api_config & MRT_MFC_RP) &&
 		(rt->mfc_rp.s_addr != INADDR_ANY)) {
 		pim_register_send_rp(ip, vifp, mm, rt);
 	    } else {
 		pim_register_send_upcall(ip, vifp, mm, rt);
 	    }
 	}
     }
 
     return 0;
 }
 
 /*
  * Return a copy of the data packet that is ready for PIM Register
  * encapsulation.
  * XXX: Note that in the returned copy the IP header is a valid one.
  */
 static struct mbuf *
 pim_register_prepare(struct ip *ip, struct mbuf *m)
 {
     struct mbuf *mb_copy = NULL;
     int mtu;
 
     /* Take care of delayed checksums */
     if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 	in_delayed_cksum(m);
 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
     }
 
     /*
      * Copy the old packet & pullup its IP header into the
      * new mbuf so we can modify it.
      */
     mb_copy = m_copypacket(m, M_DONTWAIT);
     if (mb_copy == NULL)
 	return NULL;
     mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
     if (mb_copy == NULL)
 	return NULL;
 
     /* take care of the TTL */
     ip = mtod(mb_copy, struct ip *);
     --ip->ip_ttl;
 
     /* Compute the MTU after the PIM Register encapsulation */
     mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
 
     if (ip->ip_len <= mtu) {
 	/* Turn the IP header into a valid one */
 	ip->ip_len = htons(ip->ip_len);
 	ip->ip_off = htons(ip->ip_off);
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
     } else {
 	/* Fragment the packet */
 	if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) {
 	    m_freem(mb_copy);
 	    return NULL;
 	}
     }
     return mb_copy;
 }
 
 /*
  * Send an upcall with the data packet to the user-level process.
  */
 static int
 pim_register_send_upcall(struct ip *ip, struct vif *vifp,
     struct mbuf *mb_copy, struct mfc *rt)
 {
     struct mbuf *mb_first;
     int len = ntohs(ip->ip_len);
     struct igmpmsg *im;
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 
     VIF_LOCK_ASSERT();
 
     /*
      * Add a new mbuf with an upcall header
      */
     MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg);
     mb_first->m_len = sizeof(struct igmpmsg);
     mb_first->m_next = mb_copy;
 
     /* Send message to routing daemon */
     im = mtod(mb_first, struct igmpmsg *);
     im->im_msgtype	= IGMPMSG_WHOLEPKT;
     im->im_mbz		= 0;
     im->im_vif		= vifp - viftable;
     im->im_src		= ip->ip_src;
     im->im_dst		= ip->ip_dst;
 
     k_igmpsrc.sin_addr	= ip->ip_src;
 
     mrtstat.mrts_upcalls++;
 
     if (socket_send(ip_mrouter, mb_first, &k_igmpsrc) < 0) {
 	if (mrtdebug & DEBUG_PIM)
 	    log(LOG_WARNING,
 		"mcast: pim_register_send_upcall: ip_mrouter socket queue full");
 	++mrtstat.mrts_upq_sockfull;
 	return ENOBUFS;
     }
 
     /* Keep statistics */
     pimstat.pims_snd_registers_msgs++;
     pimstat.pims_snd_registers_bytes += len;
 
     return 0;
 }
 
 /*
  * Encapsulate the data packet in PIM Register message and send it to the RP.
  */
 static int
 pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
     struct mfc *rt)
 {
     struct mbuf *mb_first;
     struct ip *ip_outer;
     struct pim_encap_pimhdr *pimhdr;
     int len = ntohs(ip->ip_len);
     vifi_t vifi = rt->mfc_parent;
 
     VIF_LOCK_ASSERT();
 
     if ((vifi >= numvifs) || (viftable[vifi].v_lcl_addr.s_addr == 0)) {
 	m_freem(mb_copy);
 	return EADDRNOTAVAIL;		/* The iif vif is invalid */
     }
 
     /*
      * Add a new mbuf with the encapsulating header
      */
     MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
     mb_first->m_next = mb_copy;
 
     mb_first->m_pkthdr.len = len + mb_first->m_len;
 
     /*
      * Fill in the encapsulating IP and PIM header
      */
     ip_outer = mtod(mb_first, struct ip *);
     *ip_outer = pim_encap_iphdr;
     ip_outer->ip_id = ip_newid();
     ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
     ip_outer->ip_src = viftable[vifi].v_lcl_addr;
     ip_outer->ip_dst = rt->mfc_rp;
     /*
      * Copy the inner header TOS to the outer header, and take care of the
      * IP_DF bit.
      */
     ip_outer->ip_tos = ip->ip_tos;
     if (ntohs(ip->ip_off) & IP_DF)
 	ip_outer->ip_off |= IP_DF;
     pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
 					 + sizeof(pim_encap_iphdr));
     *pimhdr = pim_encap_pimhdr;
     /* If the iif crosses a border, set the Border-bit */
     if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & mrt_api_config)
 	pimhdr->flags |= htonl(PIM_BORDER_REGISTER);
 
     mb_first->m_data += sizeof(pim_encap_iphdr);
     pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
     mb_first->m_data -= sizeof(pim_encap_iphdr);
 
     send_packet(vifp, mb_first);
 
     /* Keep statistics */
     pimstat.pims_snd_registers_msgs++;
     pimstat.pims_snd_registers_bytes += len;
 
     return 0;
 }
 
 /*
  * pim_encapcheck() is called by the encap[46]_input() path at runtime to
  * determine if a packet is for PIM; allowing PIM to be dynamically loaded
  * into the kernel.
  */
 static int
 pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 
 #ifdef DIAGNOSTIC
     KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
 #endif
     if (proto != IPPROTO_PIM)
 	return 0;	/* not for us; reject the datagram. */
 
     return 64;		/* claim the datagram. */
 }
 
 /*
  * PIM-SMv2 and PIM-DM messages processing.
  * Receives and verifies the PIM control messages, and passes them
  * up to the listening socket, using rip_input().
  * The only message with special processing is the PIM_REGISTER message
  * (used by PIM-SM): the PIM header is stripped off, and the inner packet
  * is passed to if_simloop().
  */
 void
 pim_input(struct mbuf *m, int off)
 {
     struct ip *ip = mtod(m, struct ip *);
     struct pim *pim;
     int minlen;
     int datalen = ip->ip_len;
     int ip_tos;
     int iphlen = off;
 
     /* Keep statistics */
     pimstat.pims_rcv_total_msgs++;
     pimstat.pims_rcv_total_bytes += datalen;
 
     /*
      * Validate lengths
      */
     if (datalen < PIM_MINLEN) {
 	pimstat.pims_rcv_tooshort++;
 	log(LOG_ERR, "pim_input: packet size too small %d from %lx\n",
 	    datalen, (u_long)ip->ip_src.s_addr);
 	m_freem(m);
 	return;
     }
 
     /*
      * If the packet is at least as big as a REGISTER, go agead
      * and grab the PIM REGISTER header size, to avoid another
      * possible m_pullup() later.
      *
      * PIM_MINLEN       == pimhdr + u_int32_t == 4 + 4 = 8
      * PIM_REG_MINLEN   == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28
      */
     minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN);
     /*
      * Get the IP and PIM headers in contiguous memory, and
      * possibly the PIM REGISTER header.
      */
     if ((m->m_flags & M_EXT || m->m_len < minlen) &&
 	(m = m_pullup(m, minlen)) == 0) {
 	log(LOG_ERR, "pim_input: m_pullup failure\n");
 	return;
     }
     /* m_pullup() may have given us a new mbuf so reset ip. */
     ip = mtod(m, struct ip *);
     ip_tos = ip->ip_tos;
 
     /* adjust mbuf to point to the PIM header */
     m->m_data += iphlen;
     m->m_len  -= iphlen;
     pim = mtod(m, struct pim *);
 
     /*
      * Validate checksum. If PIM REGISTER, exclude the data packet.
      *
      * XXX: some older PIMv2 implementations don't make this distinction,
      * so for compatibility reason perform the checksum over part of the
      * message, and if error, then over the whole message.
      */
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) {
 	/* do nothing, checksum okay */
     } else if (in_cksum(m, datalen)) {
 	pimstat.pims_rcv_badsum++;
 	if (mrtdebug & DEBUG_PIM)
 	    log(LOG_DEBUG, "pim_input: invalid checksum");
 	m_freem(m);
 	return;
     }
 
     /* PIM version check */
     if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) {
 	pimstat.pims_rcv_badversion++;
 	log(LOG_ERR, "pim_input: incorrect version %d, expecting %d\n",
 	    PIM_VT_V(pim->pim_vt), PIM_VERSION);
 	m_freem(m);
 	return;
     }
 
     /* restore mbuf back to the outer IP */
     m->m_data -= iphlen;
     m->m_len  += iphlen;
 
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) {
 	/*
 	 * Since this is a REGISTER, we'll make a copy of the register
 	 * headers ip + pim + u_int32 + encap_ip, to be passed up to the
 	 * routing daemon.
 	 */
 	struct sockaddr_in dst = { sizeof(dst), AF_INET };
 	struct mbuf *mcp;
 	struct ip *encap_ip;
 	u_int32_t *reghdr;
 	struct ifnet *vifp;
 
 	VIF_LOCK();
 	if ((reg_vif_num >= numvifs) || (reg_vif_num == VIFI_INVALID)) {
 	    VIF_UNLOCK();
 	    if (mrtdebug & DEBUG_PIM)
 		log(LOG_DEBUG,
 		    "pim_input: register vif not set: %d\n", reg_vif_num);
 	    m_freem(m);
 	    return;
 	}
 	/* XXX need refcnt? */
 	vifp = viftable[reg_vif_num].v_ifp;
 	VIF_UNLOCK();
 
 	/*
 	 * Validate length
 	 */
 	if (datalen < PIM_REG_MINLEN) {
 	    pimstat.pims_rcv_tooshort++;
 	    pimstat.pims_rcv_badregisters++;
 	    log(LOG_ERR,
 		"pim_input: register packet size too small %d from %lx\n",
 		datalen, (u_long)ip->ip_src.s_addr);
 	    m_freem(m);
 	    return;
 	}
 
 	reghdr = (u_int32_t *)(pim + 1);
 	encap_ip = (struct ip *)(reghdr + 1);
 
 	if (mrtdebug & DEBUG_PIM) {
 	    log(LOG_DEBUG,
 		"pim_input[register], encap_ip: %lx -> %lx, encap_ip len %d\n",
 		(u_long)ntohl(encap_ip->ip_src.s_addr),
 		(u_long)ntohl(encap_ip->ip_dst.s_addr),
 		ntohs(encap_ip->ip_len));
 	}
 
 	/* verify the version number of the inner packet */
 	if (encap_ip->ip_v != IPVERSION) {
 	    pimstat.pims_rcv_badregisters++;
 	    if (mrtdebug & DEBUG_PIM) {
 		log(LOG_DEBUG, "pim_input: invalid IP version (%d) "
 		    "of the inner packet\n", encap_ip->ip_v);
 	    }
 	    m_freem(m);
 	    return;
 	}
 
 	/* verify the inner packet is destined to a mcast group */
 	if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) {
 	    pimstat.pims_rcv_badregisters++;
 	    if (mrtdebug & DEBUG_PIM)
 		log(LOG_DEBUG,
 		    "pim_input: inner packet of register is not "
 		    "multicast %lx\n",
 		    (u_long)ntohl(encap_ip->ip_dst.s_addr));
 	    m_freem(m);
 	    return;
 	}
 
 	/* If a NULL_REGISTER, pass it to the daemon */
 	if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
 	    goto pim_input_to_daemon;
 
 	/*
 	 * Copy the TOS from the outer IP header to the inner IP header.
 	 */
 	if (encap_ip->ip_tos != ip_tos) {
 	    /* Outer TOS -> inner TOS */
 	    encap_ip->ip_tos = ip_tos;
 	    /* Recompute the inner header checksum. Sigh... */
 
 	    /* adjust mbuf to point to the inner IP header */
 	    m->m_data += (iphlen + PIM_MINLEN);
 	    m->m_len  -= (iphlen + PIM_MINLEN);
 
 	    encap_ip->ip_sum = 0;
 	    encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2);
 
 	    /* restore mbuf to point back to the outer IP header */
 	    m->m_data -= (iphlen + PIM_MINLEN);
 	    m->m_len  += (iphlen + PIM_MINLEN);
 	}
 
 	/*
 	 * Decapsulate the inner IP packet and loopback to forward it
 	 * as a normal multicast packet. Also, make a copy of the
 	 *     outer_iphdr + pimhdr + reghdr + encap_iphdr
 	 * to pass to the daemon later, so it can take the appropriate
 	 * actions (e.g., send back PIM_REGISTER_STOP).
 	 * XXX: here m->m_data points to the outer IP header.
 	 */
 	mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN);
 	if (mcp == NULL) {
 	    log(LOG_ERR,
 		"pim_input: pim register: could not copy register head\n");
 	    m_freem(m);
 	    return;
 	}
 
 	/* Keep statistics */
 	/* XXX: registers_bytes include only the encap. mcast pkt */
 	pimstat.pims_rcv_registers_msgs++;
 	pimstat.pims_rcv_registers_bytes += ntohs(encap_ip->ip_len);
 
 	/*
 	 * forward the inner ip packet; point m_data at the inner ip.
 	 */
 	m_adj(m, iphlen + PIM_MINLEN);
 
 	if (mrtdebug & DEBUG_PIM) {
 	    log(LOG_DEBUG,
 		"pim_input: forwarding decapsulated register: "
 		"src %lx, dst %lx, vif %d\n",
 		(u_long)ntohl(encap_ip->ip_src.s_addr),
 		(u_long)ntohl(encap_ip->ip_dst.s_addr),
 		reg_vif_num);
 	}
 	/* NB: vifp was collected above; can it change on us? */
 	if_simloop(vifp, m, dst.sin_family, 0);
 
 	/* prepare the register head to send to the mrouting daemon */
 	m = mcp;
     }
 
 pim_input_to_daemon:
     /*
      * Pass the PIM message up to the daemon; if it is a Register message,
      * pass the 'head' only up to the daemon. This includes the
      * outer IP header, PIM header, PIM-Register header and the
      * inner IP header.
      * XXX: the outer IP header pkt size of a Register is not adjust to
      * reflect the fact that the inner multicast data is truncated.
      */
     rip_input(m, iphlen);
 
     return;
 }
 
 /*
  * XXX: This is common code for dealing with initialization for both
  * the IPv4 and IPv6 multicast forwarding paths. It could do with cleanup.
  */
 static int
 ip_mroute_modevent(module_t mod, int type, void *unused)
 {
     switch (type) {
     case MOD_LOAD:
 	MROUTER_LOCK_INIT();
 	MFC_LOCK_INIT();
 	VIF_LOCK_INIT();
 	ip_mrouter_reset();
 	TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
 	    &pim_squelch_wholepkt);
 
 	pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
 	    pim_encapcheck, &in_pim_protosw, NULL);
 	if (pim_encap_cookie == NULL) {
 		printf("ip_mroute: unable to attach pim encap\n");
 		VIF_LOCK_DESTROY();
 		MFC_LOCK_DESTROY();
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 #ifdef INET6
 	pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
 	    pim_encapcheck, (struct protosw *)&in6_pim_protosw, NULL);
 	if (pim6_encap_cookie == NULL) {
 		printf("ip_mroute: unable to attach pim6 encap\n");
 		if (pim_encap_cookie) {
 		    encap_detach(pim_encap_cookie);
 		    pim_encap_cookie = NULL;
 		}
 		VIF_LOCK_DESTROY();
 		MFC_LOCK_DESTROY();
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 #endif
 
 	ip_mcast_src = X_ip_mcast_src;
 	ip_mforward = X_ip_mforward;
 	ip_mrouter_done = X_ip_mrouter_done;
 	ip_mrouter_get = X_ip_mrouter_get;
 	ip_mrouter_set = X_ip_mrouter_set;
 
 #ifdef INET6
 	ip6_mforward = X_ip6_mforward;
 	ip6_mrouter_done = X_ip6_mrouter_done;
 	ip6_mrouter_get = X_ip6_mrouter_get;
 	ip6_mrouter_set = X_ip6_mrouter_set;
 	mrt6_ioctl = X_mrt6_ioctl;
 #endif
 
 	ip_rsvp_force_done = X_ip_rsvp_force_done;
 	ip_rsvp_vif = X_ip_rsvp_vif;
 
 	legal_vif_num = X_legal_vif_num;
 	mrt_ioctl = X_mrt_ioctl;
 	rsvp_input_p = X_rsvp_input;
 	break;
 
     case MOD_UNLOAD:
 	/*
 	 * Typically module unload happens after the user-level
 	 * process has shutdown the kernel services (the check
 	 * below insures someone can't just yank the module out
 	 * from under a running process).  But if the module is
 	 * just loaded and then unloaded w/o starting up a user
 	 * process we still need to cleanup.
 	 */
 	if (ip_mrouter
 #ifdef INET6
 	    || ip6_mrouter
 #endif
 	)
 	    return EINVAL;
 
 #ifdef INET6
 	if (pim6_encap_cookie) {
 	    encap_detach(pim6_encap_cookie);
 	    pim6_encap_cookie = NULL;
 	}
 	X_ip6_mrouter_done();
 	ip6_mforward = NULL;
 	ip6_mrouter_done = NULL;
 	ip6_mrouter_get = NULL;
 	ip6_mrouter_set = NULL;
 	mrt6_ioctl = NULL;
 #endif
 
 	if (pim_encap_cookie) {
 	    encap_detach(pim_encap_cookie);
 	    pim_encap_cookie = NULL;
 	}
 	X_ip_mrouter_done();
 	ip_mcast_src = NULL;
 	ip_mforward = NULL;
 	ip_mrouter_done = NULL;
 	ip_mrouter_get = NULL;
 	ip_mrouter_set = NULL;
 
 	ip_rsvp_force_done = NULL;
 	ip_rsvp_vif = NULL;
 
 	legal_vif_num = NULL;
 	mrt_ioctl = NULL;
 	rsvp_input_p = NULL;
 
 	VIF_LOCK_DESTROY();
 	MFC_LOCK_DESTROY();
 	MROUTER_LOCK_DESTROY();
 	break;
 
     default:
 	return EOPNOTSUPP;
     }
     return 0;
 }
 
 static moduledata_t ip_mroutemod = {
     "ip_mroute",
     ip_mroute_modevent,
     0
 };
 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
Index: head/sys/netinet/ip_mroute.h
===================================================================
--- head/sys/netinet/ip_mroute.h	(revision 178887)
+++ head/sys/netinet/ip_mroute.h	(revision 178888)
@@ -1,366 +1,366 @@
 /*-
  * Copyright (c) 1989 Stephen Deering.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_mroute.h	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IP_MROUTE_H_
 #define _NETINET_IP_MROUTE_H_
 
 /*
  * Definitions for IP multicast forwarding.
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Ajit Thyagarajan, PARC, August 1993.
  * Modified by Ajit Thyagarajan, PARC, August 1994.
  * Modified by Ahmed Helmy, SGI, June 1996.
  * Modified by Pavlin Radoslavov, ICSI, October 2002.
  *
  * MROUTING Revision: 3.3.1.3
  * and PIM-SMv2 and PIM-DM support, advanced API support,
  * bandwidth metering and signaling.
  */
 
 
 /*
  * Multicast Routing set/getsockopt commands.
  */
 #define	MRT_INIT	100	/* initialize forwarder */
 #define	MRT_DONE	101	/* shut down forwarder */
 #define	MRT_ADD_VIF	102	/* create virtual interface */
 #define	MRT_DEL_VIF	103	/* delete virtual interface */
 #define MRT_ADD_MFC	104	/* insert forwarding cache entry */
 #define MRT_DEL_MFC	105	/* delete forwarding cache entry */
 #define MRT_VERSION	106	/* get kernel version number */
 #define MRT_ASSERT      107     /* enable assert processing */
 #define MRT_PIM		MRT_ASSERT /* enable PIM processing */
 #define MRT_API_SUPPORT	109	/* supported MRT API */
 #define MRT_API_CONFIG	110	/* config MRT API */
 #define MRT_ADD_BW_UPCALL 111	/* create bandwidth monitor */
 #define MRT_DEL_BW_UPCALL 112	/* delete bandwidth monitor */
 
 
 #define GET_TIME(t)	microtime(&t)
 
 /*
  * Types and macros for handling bitmaps with one bit per virtual interface.
  */
 #define	MAXVIFS 32
 typedef u_long vifbitmap_t;
 typedef u_short vifi_t;		/* type of a vif index */
 #define ALL_VIFS (vifi_t)-1
 
 #define	VIFM_SET(n, m)		((m) |= (1 << (n)))
 #define	VIFM_CLR(n, m)		((m) &= ~(1 << (n)))
 #define	VIFM_ISSET(n, m)	((m) & (1 << (n)))
 #define	VIFM_CLRALL(m)		((m) = 0x00000000)
 #define	VIFM_COPY(mfrom, mto)	((mto) = (mfrom))
 #define	VIFM_SAME(m1, m2)	((m1) == (m2))
 
 
 /*
  * Argument structure for MRT_ADD_VIF.
  * (MRT_DEL_VIF takes a single vifi_t argument.)
  */
 struct vifctl {
 	vifi_t	vifc_vifi;		/* the index of the vif to be added */
 	u_char	vifc_flags;		/* VIFF_ flags defined below */
 	u_char	vifc_threshold;		/* min ttl required to forward on vif */
 	u_int	vifc_rate_limit;	/* max rate */
 	struct	in_addr vifc_lcl_addr;	/* local interface address */
 	struct	in_addr vifc_rmt_addr;	/* remote address (tunnels only) */
 };
 
 #define	VIFF_TUNNEL	0x1		/* no-op; retained for old source */
 #define VIFF_SRCRT	0x2		/* no-op; retained for old source */
 #define VIFF_REGISTER	0x4		/* used for PIM Register encap/decap */
 
 /*
  * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC
  * XXX if you change this, make sure to change struct mfcctl2 as well.
  */
 struct mfcctl {
     struct in_addr  mfcc_origin;		/* ip origin of mcasts       */
     struct in_addr  mfcc_mcastgrp;		/* multicast group associated*/
     vifi_t	    mfcc_parent;		/* incoming vif              */
     u_char	    mfcc_ttls[MAXVIFS];		/* forwarding ttls on vifs   */
 };
 
 /*
  * The new argument structure for MRT_ADD_MFC and MRT_DEL_MFC overlays
  * and extends the old struct mfcctl.
  */
 struct mfcctl2 {
 	/* the mfcctl fields */
 	struct in_addr	mfcc_origin;		/* ip origin of mcasts	     */
 	struct in_addr	mfcc_mcastgrp;		/* multicast group associated*/
 	vifi_t		mfcc_parent;		/* incoming vif		     */
 	u_char		mfcc_ttls[MAXVIFS];	/* forwarding ttls on vifs   */
 
 	/* extension fields */
 	uint8_t		mfcc_flags[MAXVIFS];	/* the MRT_MFC_FLAGS_* flags */
 	struct in_addr	mfcc_rp;		/* the RP address            */
 };
 /*
  * The advanced-API flags.
  *
  * The MRT_MFC_FLAGS_XXX API flags are also used as flags
  * for the mfcc_flags field.
  */
 #define	MRT_MFC_FLAGS_DISABLE_WRONGVIF	(1 << 0) /* disable WRONGVIF signals */
 #define	MRT_MFC_FLAGS_BORDER_VIF	(1 << 1) /* border vif		     */
 #define MRT_MFC_RP			(1 << 8) /* enable RP address	     */
 #define MRT_MFC_BW_UPCALL		(1 << 9) /* enable bw upcalls	     */
 #define MRT_MFC_FLAGS_ALL		(MRT_MFC_FLAGS_DISABLE_WRONGVIF |    \
 					 MRT_MFC_FLAGS_BORDER_VIF)
 #define MRT_API_FLAGS_ALL		(MRT_MFC_FLAGS_ALL |		     \
 					 MRT_MFC_RP |			     \
 					 MRT_MFC_BW_UPCALL)
 
 /*
  * Structure for installing or delivering an upcall if the
  * measured bandwidth is above or below a threshold.
  *
  * User programs (e.g. daemons) may have a need to know when the
  * bandwidth used by some data flow is above or below some threshold.
  * This interface allows the userland to specify the threshold (in
  * bytes and/or packets) and the measurement interval. Flows are
  * all packet with the same source and destination IP address.
  * At the moment the code is only used for multicast destinations
  * but there is nothing that prevents its use for unicast.
  *
  * The measurement interval cannot be shorter than some Tmin (currently, 3s).
  * The threshold is set in packets and/or bytes per_interval.
  *
  * Measurement works as follows:
  *
  * For >= measurements:
  * The first packet marks the start of a measurement interval.
  * During an interval we count packets and bytes, and when we
  * pass the threshold we deliver an upcall and we are done.
  * The first packet after the end of the interval resets the
  * count and restarts the measurement.
  *
  * For <= measurement:
  * We start a timer to fire at the end of the interval, and
  * then for each incoming packet we count packets and bytes.
  * When the timer fires, we compare the value with the threshold,
  * schedule an upcall if we are below, and restart the measurement
  * (reschedule timer and zero counters).
  */
 
 struct bw_data {
 	struct timeval	b_time;
 	uint64_t	b_packets;
 	uint64_t	b_bytes;
 };
 
 struct bw_upcall {
 	struct in_addr	bu_src;			/* source address            */
 	struct in_addr	bu_dst;			/* destination address       */
 	uint32_t	bu_flags;		/* misc flags (see below)    */
 #define BW_UPCALL_UNIT_PACKETS   (1 << 0)	/* threshold (in packets)    */
 #define BW_UPCALL_UNIT_BYTES     (1 << 1)	/* threshold (in bytes)      */
 #define BW_UPCALL_GEQ            (1 << 2)	/* upcall if bw >= threshold */
 #define BW_UPCALL_LEQ            (1 << 3)	/* upcall if bw <= threshold */
 #define BW_UPCALL_DELETE_ALL     (1 << 4)	/* delete all upcalls for s,d*/
 	struct bw_data	bu_threshold;		/* the bw threshold	     */
 	struct bw_data	bu_measured;		/* the measured bw	     */
 };
 
 /* max. number of upcalls to deliver together */
 #define BW_UPCALLS_MAX				128
 /* min. threshold time interval for bandwidth measurement */
 #define BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC	3
 #define BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC	0
 
 /*
  * The kernel's multicast routing statistics.
  */
 struct mrtstat {
     u_long	mrts_mfc_lookups;	/* # forw. cache hash table hits   */
     u_long	mrts_mfc_misses;	/* # forw. cache hash table misses */
     u_long	mrts_upcalls;		/* # calls to multicast routing daemon */
     u_long	mrts_no_route;		/* no route for packet's origin    */
     u_long	mrts_bad_tunnel;	/* malformed tunnel options        */
     u_long	mrts_cant_tunnel;	/* no room for tunnel options      */
     u_long	mrts_wrong_if;		/* arrived on wrong interface	   */
     u_long	mrts_upq_ovflw;		/* upcall Q overflow		   */
     u_long	mrts_cache_cleanups;	/* # entries with no upcalls	   */
     u_long	mrts_drop_sel;		/* pkts dropped selectively        */
     u_long	mrts_q_overflow;	/* pkts dropped - Q overflow       */
     u_long	mrts_pkt2large;		/* pkts dropped - size > BKT SIZE  */
     u_long	mrts_upq_sockfull;	/* upcalls dropped - socket full */
 };
 
 /*
  * Argument structure used by mrouted to get src-grp pkt counts
  */
 struct sioc_sg_req {
     struct in_addr src;
     struct in_addr grp;
     u_long pktcnt;
     u_long bytecnt;
     u_long wrong_if;
 };
 
 /*
  * Argument structure used by mrouted to get vif pkt counts
  */
 struct sioc_vif_req {
     vifi_t vifi;		/* vif number				*/
     u_long icount;		/* Input packet count on vif		*/
     u_long ocount;		/* Output packet count on vif		*/
     u_long ibytes;		/* Input byte count on vif		*/
     u_long obytes;		/* Output byte count on vif		*/
 };
 
 
 /*
  * The kernel's virtual-interface structure.
  */
 struct vif {
     u_char		v_flags;	/* VIFF_ flags defined above         */
     u_char		v_threshold;	/* min ttl required to forward on vif*/
     u_int		v_rate_limit;	/* ignored; kept for compatibility */
     struct tbf         *v_tbf;		/* ignored; kept for compatibility */
     struct in_addr	v_lcl_addr;	/* local interface address           */
     struct in_addr	v_rmt_addr;	/* remote address (tunnels only)     */
     struct ifnet       *v_ifp;		/* pointer to interface              */
     u_long		v_pkt_in;	/* # pkts in on interface            */
     u_long		v_pkt_out;	/* # pkts out on interface           */
     u_long		v_bytes_in;	/* # bytes in on interface	     */
     u_long		v_bytes_out;	/* # bytes out on interface	     */
     struct route	v_route;	/* cached route */
     u_int		v_rsvp_on;	/* RSVP listening on this vif */
     struct socket      *v_rsvpd;	/* RSVP daemon socket */
 };
 
 /*
  * The kernel's multicast forwarding cache entry structure
  * (A field for the type of service (mfc_tos) is to be added
  * at a future point)
  */
 struct mfc {
 	struct in_addr	mfc_origin;		/* IP origin of mcasts	     */
 	struct in_addr  mfc_mcastgrp;		/* multicast group associated*/
 	vifi_t		mfc_parent;		/* incoming vif              */
 	u_char		mfc_ttls[MAXVIFS];	/* forwarding ttls on vifs   */
 	u_long		mfc_pkt_cnt;		/* pkt count for src-grp     */
 	u_long		mfc_byte_cnt;		/* byte count for src-grp    */
 	u_long		mfc_wrong_if;		/* wrong if for src-grp	     */
 	int		mfc_expire;		/* time to clean entry up    */
 	struct timeval	mfc_last_assert;	/* last time I sent an assert*/
 	struct rtdetq	*mfc_stall;		/* q of packets awaiting mfc */
 	struct mfc	*mfc_next;		/* next mfc entry            */
 	uint8_t		mfc_flags[MAXVIFS];	/* the MRT_MFC_FLAGS_* flags */
 	struct in_addr	mfc_rp;			/* the RP address	     */
 	struct bw_meter	*mfc_bw_meter;		/* list of bandwidth meters  */
 };
 
 /*
  * Struct used to communicate from kernel to multicast router
  * note the convenient similarity to an IP packet
  */
 struct igmpmsg {
     uint32_t	    unused1;
     uint32_t	    unused2;
     u_char	    im_msgtype;			/* what type of message	    */
 #define IGMPMSG_NOCACHE		1	/* no MFC in the kernel		    */
 #define IGMPMSG_WRONGVIF	2	/* packet came from wrong interface */
 #define	IGMPMSG_WHOLEPKT	3	/* PIM pkt for user level encap.    */
 #define	IGMPMSG_BW_UPCALL	4	/* BW monitoring upcall		    */
     u_char	    im_mbz;			/* must be zero		    */
     u_char	    im_vif;			/* vif rec'd on		    */
     u_char	    unused3;
     struct in_addr  im_src, im_dst;
 };
 
 /*
  * Argument structure used for pkt info. while upcall is made
  */
 struct rtdetq {
     struct mbuf		*m;		/* A copy of the packet		    */
     struct ifnet	*ifp;		/* Interface pkt came in on	    */
     vifi_t		xmt_vif;	/* Saved copy of imo_multicast_vif  */
     struct rtdetq	*next;		/* Next in list of packets          */
 };
 
 #define MFCTBLSIZ	256
 #if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0	  /* from sys:route.h */
 #define MFCHASHMOD(h)	((h) & (MFCTBLSIZ - 1))
 #else
 #define MFCHASHMOD(h)	((h) % MFCTBLSIZ)
 #endif
 
 #define MAX_UPQ	4		/* max. no of pkts in upcall Q */
 
 /*
  * Structure for measuring the bandwidth and sending an upcall if the
  * measured bandwidth is above or below a threshold.
  */
 struct bw_meter {
 	struct bw_meter	*bm_mfc_next;		/* next bw meter (same mfc)  */
 	struct bw_meter	*bm_time_next;		/* next bw meter (same time) */
 	uint32_t	bm_time_hash;		/* the time hash value       */
 	struct mfc	*bm_mfc;		/* the corresponding mfc     */
 	uint32_t	bm_flags;		/* misc flags (see below)    */
 #define BW_METER_UNIT_PACKETS	(1 << 0)	/* threshold (in packets)    */
 #define BW_METER_UNIT_BYTES	(1 << 1)	/* threshold (in bytes)      */
 #define BW_METER_GEQ		(1 << 2)	/* upcall if bw >= threshold */
 #define BW_METER_LEQ		(1 << 3)	/* upcall if bw <= threshold */
 #define BW_METER_USER_FLAGS	(BW_METER_UNIT_PACKETS |		\
 				 BW_METER_UNIT_BYTES |			\
 				 BW_METER_GEQ |				\
 				 BW_METER_LEQ)
 
 #define BW_METER_UPCALL_DELIVERED (1 << 24)	/* upcall was delivered      */
 
 	struct bw_data	bm_threshold;		/* the upcall threshold	     */
 	struct bw_data	bm_measured;		/* the measured bw	     */
 	struct timeval	bm_start_time;		/* abs. time		     */
 };
 
 #ifdef _KERNEL
 
 struct sockopt;
 
 extern int	(*ip_mrouter_set)(struct socket *, struct sockopt *);
 extern int	(*ip_mrouter_get)(struct socket *, struct sockopt *);
 extern int	(*ip_mrouter_done)(void);
-extern int	(*mrt_ioctl)(int, caddr_t);
+extern int	(*mrt_ioctl)(int, caddr_t, int);
 
 #endif /* _KERNEL */
 
 #endif /* _NETINET_IP_MROUTE_H_ */
Index: head/sys/netinet/ip_options.c
===================================================================
--- head/sys/netinet/ip_options.c	(revision 178887)
+++ head/sys/netinet/ip_options.c	(revision 178888)
@@ -1,684 +1,685 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
  *      The Regents of the University of California.
  * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipstealth.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip_icmp.h>
 #include <machine/in_cksum.h>
 
 #include <sys/socketvar.h>
 
 #include <security/mac/mac_framework.h>
 
 static int	ip_dosourceroute = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
 
 static int	ip_acceptsourceroute = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
     CTLFLAG_RW, &ip_acceptsourceroute, 0, 
     "Enable accepting source routed IP packets");
 
 int		ip_doopts = 1;	/* 0 = ignore, 1 = process, 2 = reject */
 SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
     &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
 
 static void	save_rte(struct mbuf *m, u_char *, struct in_addr);
 
 /*
  * Do option processing on a datagram, possibly discarding it if bad options
  * are encountered, or forwarding it if source-routed.
  *
  * The pass argument is used when operating in the IPSTEALTH mode to tell
  * what options to process: [LS]SRR (pass 0) or the others (pass 1).  The
  * reason for as many as two passes is that when doing IPSTEALTH, non-routing
  * options should be processed only if the packet is for us.
  *
  * Returns 1 if packet has been forwarded/freed, 0 if the packet should be
  * processed further.
  */
 int
 ip_dooptions(struct mbuf *m, int pass)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	u_char *cp;
 	struct in_ifaddr *ia;
 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
 	struct in_addr *sin, dst;
 	n_time ntime;
 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
 
 	/* Ignore or reject packets with IP options. */
 	if (ip_doopts == 0)
 		return 0;
 	else if (ip_doopts == 2) {
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_FILTER_PROHIB;
 		goto bad;
 	}
 
 	dst = ip->ip_dst;
 	cp = (u_char *)(ip + 1);
 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[IPOPT_OPTVAL];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		/*
 		 * Source routing with record.  Find interface with current
 		 * destination address.  If none on this machine then drop if
 		 * strictly routed, or do nothing if loosely routed.  Record
 		 * interface address and bring up next address component.  If
 		 * strictly routed make sure next address is on directly
 		 * accessible net.
 		 */
 		case IPOPT_LSRR:
 		case IPOPT_SSRR:
 #ifdef IPSTEALTH
 			if (ipstealth && pass > 0)
 				break;
 #endif
 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			ipaddr.sin_addr = ip->ip_dst;
 			ia = (struct in_ifaddr *)
 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
 			if (ia == NULL) {
 				if (opt == IPOPT_SSRR) {
 					type = ICMP_UNREACH;
 					code = ICMP_UNREACH_SRCFAIL;
 					goto bad;
 				}
 				if (!ip_dosourceroute)
 					goto nosourcerouting;
 				/*
 				 * Loose routing, and not at next destination
 				 * yet; nothing to do except forward.
 				 */
 				break;
 			}
 			off--;			/* 0 origin */
 			if (off > optlen - (int)sizeof(struct in_addr)) {
 				/*
 				 * End of source route.  Should be for us.
 				 */
 				if (!ip_acceptsourceroute)
 					goto nosourcerouting;
 				save_rte(m, cp, ip->ip_src);
 				break;
 			}
 #ifdef IPSTEALTH
 			if (ipstealth)
 				goto dropit;
 #endif
 			if (!ip_dosourceroute) {
 				if (ipforwarding) {
 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
 					/*
 					 * Acting as a router, so generate
 					 * ICMP
 					 */
 nosourcerouting:
 					strcpy(buf, inet_ntoa(ip->ip_dst));
 					log(LOG_WARNING, 
 					    "attempted source route from %s to %s\n",
 					    inet_ntoa(ip->ip_src), buf);
 					type = ICMP_UNREACH;
 					code = ICMP_UNREACH_SRCFAIL;
 					goto bad;
 				} else {
 					/*
 					 * Not acting as a router, so
 					 * silently drop.
 					 */
 #ifdef IPSTEALTH
 dropit:
 #endif
 					ipstat.ips_cantforward++;
 					m_freem(m);
 					return (1);
 				}
 			}
 
 			/*
 			 * locate outgoing interface
 			 */
 			(void)memcpy(&ipaddr.sin_addr, cp + off,
 			    sizeof(ipaddr.sin_addr));
 
 			if (opt == IPOPT_SSRR) {
 #define	INA	struct in_ifaddr *
 #define	SA	struct sockaddr *
 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
 			} else
-				ia = ip_rtaddr(ipaddr.sin_addr);
+/* XXX MRT 0 for routing */
+				ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m));
 			if (ia == NULL) {
 				type = ICMP_UNREACH;
 				code = ICMP_UNREACH_SRCFAIL;
 				goto bad;
 			}
 			ip->ip_dst = ipaddr.sin_addr;
 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
 			    sizeof(struct in_addr));
 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 			/*
 			 * Let ip_intr's mcast routing check handle mcast pkts
 			 */
 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
 			break;
 
 		case IPOPT_RR:
 #ifdef IPSTEALTH
 			if (ipstealth && pass == 0)
 				break;
 #endif
 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			/*
 			 * If no space remains, ignore.
 			 */
 			off--;			/* 0 origin */
 			if (off > optlen - (int)sizeof(struct in_addr))
 				break;
 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
 			    sizeof(ipaddr.sin_addr));
 			/*
 			 * Locate outgoing interface; if we're the
 			 * destination, use the incoming interface (should be
 			 * same).
 			 */
 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
-			    (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
+			    (ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) {
 				type = ICMP_UNREACH;
 				code = ICMP_UNREACH_HOST;
 				goto bad;
 			}
 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
 			    sizeof(struct in_addr));
 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 			break;
 
 		case IPOPT_TS:
 #ifdef IPSTEALTH
 			if (ipstealth && pass == 0)
 				break;
 #endif
 			code = cp - (u_char *)ip;
 			if (optlen < 4 || optlen > 40) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < 5) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if (off > optlen - (int)sizeof(int32_t)) {
 				cp[IPOPT_OFFSET + 1] += (1 << 4);
 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				break;
 			}
 			off--;				/* 0 origin */
 			sin = (struct in_addr *)(cp + off);
 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
 
 			case IPOPT_TS_TSONLY:
 				break;
 
 			case IPOPT_TS_TSANDADDR:
 				if (off + sizeof(n_time) +
 				    sizeof(struct in_addr) > optlen) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				ipaddr.sin_addr = dst;
 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
 							    m->m_pkthdr.rcvif);
 				if (ia == NULL)
 					continue;
 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
 				    sizeof(struct in_addr));
 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 				off += sizeof(struct in_addr);
 				break;
 
 			case IPOPT_TS_PRESPEC:
 				if (off + sizeof(n_time) +
 				    sizeof(struct in_addr) > optlen) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				(void)memcpy(&ipaddr.sin_addr, sin,
 				    sizeof(struct in_addr));
 				if (ifa_ifwithaddr((SA)&ipaddr) == NULL)
 					continue;
 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 				off += sizeof(struct in_addr);
 				break;
 
 			default:
 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
 				goto bad;
 			}
 			ntime = iptime();
 			(void)memcpy(cp + off, &ntime, sizeof(n_time));
 			cp[IPOPT_OFFSET] += sizeof(n_time);
 		}
 	}
 	if (forward && ipforwarding) {
 		ip_forward(m, 1);
 		return (1);
 	}
 	return (0);
 bad:
 	icmp_error(m, type, code, 0, 0);
 	ipstat.ips_badoptions++;
 	return (1);
 }
 
 /*
  * Save incoming source route for use in replies, to be picked up later by
  * ip_srcroute if the receiver is interested.
  */
 static void
 save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
 {
 	unsigned olen;
 	struct ipopt_tag *opts;
 
 	opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
 	    sizeof(struct ipopt_tag), M_NOWAIT);
 	if (opts == NULL)
 		return;
 
 	olen = option[IPOPT_OLEN];
 	if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
 		m_tag_free((struct m_tag *)opts);
 		return;
 	}
 	bcopy(option, opts->ip_srcrt.srcopt, olen);
 	opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
 	opts->ip_srcrt.dst = dst;
 	m_tag_prepend(m, (struct m_tag *)opts);
 }
 
 /*
  * Retrieve incoming source route for use in replies, in the same form used
  * by setsockopt.  The first hop is placed before the options, will be
  * removed later.
  */
 struct mbuf *
 ip_srcroute(struct mbuf *m0)
 {
 	struct in_addr *p, *q;
 	struct mbuf *m;
 	struct ipopt_tag *opts;
 
 	opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
 	if (opts == NULL)
 		return (NULL);
 
 	if (opts->ip_nhops == 0)
 		return (NULL);
 	m = m_get(M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
 #define OPTSIZ	(sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
 
 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
 	m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
 	    sizeof(struct in_addr) + OPTSIZ;
 
 	/*
 	 * First, save first hop for return route.
 	 */
 	p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
 	*(mtod(m, struct in_addr *)) = *p--;
 
 	/*
 	 * Copy option fields and padding (nop) to mbuf.
 	 */
 	opts->ip_srcrt.nop = IPOPT_NOP;
 	opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
 	    &(opts->ip_srcrt.nop), OPTSIZ);
 	q = (struct in_addr *)(mtod(m, caddr_t) +
 	    sizeof(struct in_addr) + OPTSIZ);
 #undef OPTSIZ
 	/*
 	 * Record return path as an IP source route, reversing the path
 	 * (pointers are now aligned).
 	 */
 	while (p >= opts->ip_srcrt.route) {
 		*q++ = *p--;
 	}
 	/*
 	 * Last hop goes to final destination.
 	 */
 	*q = opts->ip_srcrt.dst;
 	m_tag_delete(m0, (struct m_tag *)opts);
 	return (m);
 }
 
 /*
  * Strip out IP options, at higher level protocol in the kernel.  Second
  * argument is buffer to which options will be moved, and return value is
  * their length.
  *
  * XXX should be deleted; last arg currently ignored.
  */
 void
 ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
 {
 	int i;
 	struct ip *ip = mtod(m, struct ip *);
 	caddr_t opts;
 	int olen;
 
 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
 	opts = (caddr_t)(ip + 1);
 	i = m->m_len - (sizeof (struct ip) + olen);
 	bcopy(opts + olen, opts, (unsigned)i);
 	m->m_len -= olen;
 	if (m->m_flags & M_PKTHDR)
 		m->m_pkthdr.len -= olen;
 	ip->ip_v = IPVERSION;
 	ip->ip_hl = sizeof(struct ip) >> 2;
 }
 
 /*
  * Insert IP options into preformed packet.  Adjust IP destination as
  * required for IP source routing, as indicated by a non-zero in_addr at the
  * start of the options.
  *
  * XXX This routine assumes that the packet has no options in place.
  */
 struct mbuf *
 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
 {
 	struct ipoption *p = mtod(opt, struct ipoption *);
 	struct mbuf *n;
 	struct ip *ip = mtod(m, struct ip *);
 	unsigned optlen;
 
 	optlen = opt->m_len - sizeof(p->ipopt_dst);
 	if (optlen + ip->ip_len > IP_MAXPACKET) {
 		*phlen = 0;
 		return (m);		/* XXX should fail */
 	}
 	if (p->ipopt_dst.s_addr)
 		ip->ip_dst = p->ipopt_dst;
 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
 		MGETHDR(n, M_DONTWAIT, MT_DATA);
 		if (n == NULL) {
 			*phlen = 0;
 			return (m);
 		}
 		M_MOVE_PKTHDR(n, m);
 		n->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 		mac_mbuf_copy(m, n);
 #endif
 		n->m_pkthdr.len += optlen;
 		m->m_len -= sizeof(struct ip);
 		m->m_data += sizeof(struct ip);
 		n->m_next = m;
 		m = n;
 		m->m_len = optlen + sizeof(struct ip);
 		m->m_data += max_linkhdr;
 		bcopy(ip, mtod(m, void *), sizeof(struct ip));
 	} else {
 		m->m_data -= optlen;
 		m->m_len += optlen;
 		m->m_pkthdr.len += optlen;
 		bcopy(ip, mtod(m, void *), sizeof(struct ip));
 	}
 	ip = mtod(m, struct ip *);
 	bcopy(p->ipopt_list, ip + 1, optlen);
 	*phlen = sizeof(struct ip) + optlen;
 	ip->ip_v = IPVERSION;
 	ip->ip_hl = *phlen >> 2;
 	ip->ip_len += optlen;
 	return (m);
 }
 
 /*
  * Copy options from ip to jp, omitting those not copied during
  * fragmentation.
  */
 int
 ip_optcopy(struct ip *ip, struct ip *jp)
 {
 	u_char *cp, *dp;
 	int opt, optlen, cnt;
 
 	cp = (u_char *)(ip + 1);
 	dp = (u_char *)(jp + 1);
 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[0];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP) {
 			/* Preserve for IP mcast tunnel's LSRR alignment. */
 			*dp++ = IPOPT_NOP;
 			optlen = 1;
 			continue;
 		}
 
 		KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
 		    ("ip_optcopy: malformed ipv4 option"));
 		optlen = cp[IPOPT_OLEN];
 		KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
 		    ("ip_optcopy: malformed ipv4 option"));
 
 		/* Bogus lengths should have been caught by ip_dooptions. */
 		if (optlen > cnt)
 			optlen = cnt;
 		if (IPOPT_COPIED(opt)) {
 			bcopy(cp, dp, optlen);
 			dp += optlen;
 		}
 	}
 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
 		*dp++ = IPOPT_EOL;
 	return (optlen);
 }
 
 /*
  * Set up IP options in pcb for insertion in output packets.  Store in mbuf
  * with pointer in pcbopt, adding pseudo-option with destination address if
  * source routed.
  */
 int
 ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
 {
 	int cnt, optlen;
 	u_char *cp;
 	struct mbuf **pcbopt;
 	u_char opt;
 
 	INP_WLOCK_ASSERT(inp);
 
 	pcbopt = &inp->inp_options;
 
 	/* turn off any old options */
 	if (*pcbopt)
 		(void)m_free(*pcbopt);
 	*pcbopt = 0;
 	if (m == NULL || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options.
 		 */
 		if (m != NULL)
 			(void)m_free(m);
 		return (0);
 	}
 
 	if (m->m_len % sizeof(int32_t))
 		goto bad;
 	/*
 	 * IP first-hop destination address will be stored before actual
 	 * options; move other options back and clear it when none present.
 	 */
 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
 		goto bad;
 	cnt = m->m_len;
 	m->m_len += sizeof(struct in_addr);
 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
 	bcopy(mtod(m, void *), cp, (unsigned)cnt);
 	bzero(mtod(m, void *), sizeof(struct in_addr));
 
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[IPOPT_OPTVAL];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < IPOPT_OLEN + sizeof(*cp))
 				goto bad;
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
 				goto bad;
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 		case IPOPT_SSRR:
 			/*
 			 * User process specifies route as:
 			 *
 			 *	->A->B->C->D
 			 *
 			 * D must be our final destination (but we can't
 			 * check that since we may not have connected yet).
 			 * A is first hop destination, which doesn't appear
 			 * in actual IP option, but is stored before the
 			 * options.
 			 */
 			/* XXX-BZ PRIV_NETINET_SETHDROPTS? */
 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
 				goto bad;
 			m->m_len -= sizeof(struct in_addr);
 			cnt -= sizeof(struct in_addr);
 			optlen -= sizeof(struct in_addr);
 			cp[IPOPT_OLEN] = optlen;
 			/*
 			 * Move first hop before start of options.
 			 */
 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
 			    sizeof(struct in_addr));
 			/*
 			 * Then copy rest of options back
 			 * to close up the deleted entry.
 			 */
 			bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
 			    &cp[IPOPT_OFFSET+1],
 			    (unsigned)cnt - (IPOPT_MINOFF - 1));
 			break;
 		}
 	}
 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
 		goto bad;
 	*pcbopt = m;
 	return (0);
 
 bad:
 	(void)m_free(m);
 	return (EINVAL);
 }
Index: head/sys/netinet/ip_output.c
===================================================================
--- head/sys/netinet/ip_output.c	(revision 178887)
+++ head/sys/netinet/ip_output.c	(revision 178888)
@@ -1,1193 +1,1195 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #include <netipsec/ipsec.h>
 #endif /* IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 #define print_ip(x, a, y)	 printf("%s %d.%d.%d.%d%s",\
 				x, (ntohl(a.s_addr)>>24)&0xFF,\
 				  (ntohl(a.s_addr)>>16)&0xFF,\
 				  (ntohl(a.s_addr)>>8)&0xFF,\
 				  (ntohl(a.s_addr))&0xFF, y);
 
 u_short ip_id;
 
 #ifdef MBUF_STRESS_TEST
 int mbuf_frag_size = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
 static void	ip_mloopback
 	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
 
 
 extern	struct protosw inetsw[];
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * In the IP forwarding case, the packet will arrive with options already
  * inserted, so must have a NULL opt pointer.
  */
 int
 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
     struct ip_moptions *imo, struct inpcb *inp)
 {
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
 	int mtu;
 	int len, error = 0;
 	struct sockaddr_in *dst = NULL;	/* keep compiler happy */
 	struct in_ifaddr *ia = NULL;
 	int isbroadcast, sw_csum;
 	struct route iproute;
 	struct in_addr odst;
 #ifdef IPFIREWALL_FORWARD
 	struct m_tag *fwd_tag = NULL;
 #endif
 	M_ASSERTPKTHDR(m);
 
 	if (ro == NULL) {
 		ro = &iproute;
 		bzero(ro, sizeof (*ro));
 	}
 
 	if (inp != NULL)
 		INP_LOCK_ASSERT(inp);
 
 	if (opt) {
 		len = 0;
 		m = ip_insertoptions(m, opt, &len);
 		if (len != 0)
 			hlen = len;
 	}
 	ip = mtod(m, struct ip *);
 
 	/*
 	 * Fill in IP header.  If we are not allowing fragmentation,
 	 * then the ip_id field is meaningless, but we don't set it
 	 * to zero.  Doing so causes various problems when devices along
 	 * the path (routers, load balancers, firewalls, etc.) illegally
 	 * disable DF on our packet.  Note that a 16-bit counter
 	 * will wrap around in less than 10 seconds at 100 Mbit/s on a
 	 * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
 	 * for Counting NATted Hosts", Proc. IMW'02, available at
 	 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
 	 */
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
 		ip->ip_id = ip_newid();
 		ipstat.ips_localout++;
 	} else {
 		hlen = ip->ip_hl << 2;
 	}
 
 	dst = (struct sockaddr_in *)&ro->ro_dst;
 again:
 	/*
 	 * If there is a cached route,
 	 * check that it is to the same destination
 	 * and is still up.  If not, free it and try again.
 	 * The address family should also be checked in case of sharing the
 	 * cache with IPv6.
 	 */
 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
 			  dst->sin_family != AF_INET ||
 			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
 		RTFREE(ro->ro_rt);
 		ro->ro_rt = (struct rtentry *)NULL;
 	}
 #ifdef IPFIREWALL_FORWARD
 	if (ro->ro_rt == NULL && fwd_tag == NULL) {
 #else
 	if (ro->ro_rt == NULL) {
 #endif
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
 	/*
 	 * If routing to interface only, short circuit routing lookup.
 	 * The use of an all-ones broadcast address implies this; an
 	 * interface is specified by the broadcast address of an interface,
 	 * or the destination address of a ptp interface.
 	 */
 	if (flags & IP_SENDONES) {
 		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
 			ipstat.ips_noroute++;
 			error = ENETUNREACH;
 			goto bad;
 		}
 		ip->ip_dst.s_addr = INADDR_BROADCAST;
 		dst->sin_addr = ip->ip_dst;
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = 1;
 	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
 			ipstat.ips_noroute++;
 			error = ENETUNREACH;
 			goto bad;
 		}
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
 		IFP_TO_IA(ifp, ia);
 		isbroadcast = 0;	/* fool gcc */
 	} else {
 		/*
 		 * We want to do any cloning requested by the link layer,
 		 * as this is probably required in all cases for correct
 		 * operation (as it is for ARP).
 		 */
 		if (ro->ro_rt == NULL)
 #ifdef RADIX_MPATH
-			rtalloc_mpath(ro,
-			    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr));
+			rtalloc_mpath_fib(ro,
+			    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
+			    inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
 #else
-			rtalloc_ign(ro, 0);
+			in_rtalloc_ign(ro, 0,
+			    inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
 #endif
 		if (ro->ro_rt == NULL) {
 			ipstat.ips_noroute++;
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 		ia = ifatoia(ro->ro_rt->rt_ifa);
 		ifp = ro->ro_rt->rt_ifp;
 		ro->ro_rt->rt_rmx.rmx_pksent++;
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
 		if (ro->ro_rt->rt_flags & RTF_HOST)
 			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
 		else
 			isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	}
 	/*
 	 * Calculate MTU.  If we have a route that is up, use that,
 	 * otherwise use the interface's MTU.
 	 */
 	if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) {
 		/*
 		 * This case can happen if the user changed the MTU
 		 * of an interface after enabling IP on it.  Because
 		 * most netifs don't keep track of routes pointing to
 		 * them, there is no way for one to update all its
 		 * routes when the MTU is changed.
 		 */
 		if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)
 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
 		mtu = ro->ro_rt->rt_rmx.rmx_mtu;
 	} else {
 		mtu = ifp->if_mtu;
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		struct in_multi *inm;
 
 		m->m_flags |= M_MCAST;
 		/*
 		 * IP destination address is multicast.  Make sure "dst"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
 		dst = (struct sockaddr_in *)&ro->ro_dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
 			ip->ip_ttl = imo->imo_multicast_ttl;
 			if (imo->imo_multicast_vif != -1)
 				ip->ip_src.s_addr =
 				    ip_mcast_src ?
 				    ip_mcast_src(imo->imo_multicast_vif) :
 				    INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				ipstat.ips_noroute++;
 				error = ENETUNREACH;
 				goto bad;
 			}
 		}
 		/*
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
 		if (ip->ip_src.s_addr == INADDR_ANY) {
 			/* Interface may have no addresses. */
 			if (ia != NULL)
 				ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 
 		IN_MULTI_LOCK();
 		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
 		if (inm != NULL &&
 		   (imo == NULL || imo->imo_multicast_loop)) {
 			IN_MULTI_UNLOCK();
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not
 			 * forbid loopback, loop back a copy.
 			 */
 			ip_mloopback(ifp, m, dst, hlen);
 		}
 		else {
 			IN_MULTI_UNLOCK();
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IP_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip_mloopback(),
 			 * above, will be forwarded by the ip_input() routine,
 			 * if necessary.
 			 */
 			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
 				/*
 				 * If rsvp daemon is not running, do not
 				 * set ip_moptions. This ensures that the packet
 				 * is multicast and not just sent down one link
 				 * as prescribed by rsvpd.
 				 */
 				if (!rsvp_on)
 					imo = NULL;
 				if (ip_mforward &&
 				    ip_mforward(ip, ifp, m, imo) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
 			m_freem(m);
 			goto done;
 		}
 
 		goto sendit;
 	}
 
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY) {
 		/* Interface may have no addresses. */
 		if (ia != NULL) {
 			ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 	}
 
 	/*
 	 * Verify that we have any chance at all of being able to queue the
 	 * packet or packet fragments, unless ALTQ is enabled on the given
 	 * interface in which case packetdrop should be done by queueing.
 	 */
 #ifdef ALTQ
 	if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
 	    ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen))
 #else
 	if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
 	    ifp->if_snd.ifq_maxlen)
 #endif /* ALTQ */
 	{
 		error = ENOBUFS;
 		ipstat.ips_odropped++;
 		ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
 		goto bad;
 	}
 
 	/*
 	 * Look for broadcast address and
 	 * verify user is allowed to send
 	 * such a packet.
 	 */
 	if (isbroadcast) {
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if ((flags & IP_ALLOWBROADCAST) == 0) {
 			error = EACCES;
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
 		if (ip->ip_len > mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
 	}
 
 sendit:
 #ifdef IPSEC
 	switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) {
 	case 1:
 		goto bad;
 	case -1:
 		goto done;
 	case 0:
 	default:
 		break;	/* Continue with packet processing. */
 	}
 	/* Update variables that are affected by ipsec4_output(). */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 #endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&inet_pfil_hook))
 		goto passout;
 
 	/* Run through list of hooks for output packets. */
 	odst.s_addr = ip->ip_dst.s_addr;
 	error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 
 	ip = mtod(m, struct ip *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (odst.s_addr != ip->ip_dst.s_addr) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip_input(). */
 		if (in_localip(ip->ip_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 			error = netisr_queue(NETISR_IP, m);
 			goto done;
 		} else
 			goto again;	/* Redo the routing table lookup. */
 	}
 
 #ifdef IPFIREWALL_FORWARD
 	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 		m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 		error = netisr_queue(NETISR_IP, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 	if (fwd_tag) {
 		dst = (struct sockaddr_in *)&ro->ro_dst;
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m_tag_delete(m, fwd_tag);
 		goto again;
 	}
 #endif /* IPFIREWALL_FORWARD */
 
 passout:
 	/* 127/8 must not appear on wire - RFC1122. */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			ipstat.ips_badaddr++;
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
 	if (sw_csum & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m);
 		sw_csum &= ~CSUM_DELAY_DATA;
 	}
 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 */
 	if (ip->ip_len <= mtu ||
 	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
 	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		if (sw_csum & CSUM_DELAY_IP)
 			ip->ip_sum = in_cksum(m, hlen);
 
 		/*
 		 * Record statistics for this interface address.
 		 * With CSUM_TSO the byte/packet count will be slightly
 		 * incorrect because we count the IP+TCP headers only
 		 * once instead of for every generated packet.
 		 */
 		if (!(flags & IP_FORWARDING) && ia) {
 			if (m->m_pkthdr.csum_flags & CSUM_TSO)
 				ia->ia_ifa.if_opackets +=
 				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
 			else
 				ia->ia_ifa.if_opackets++;
 			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
 		}
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
 #endif
 		/*
 		 * Reset layer specific mbuf flags
 		 * to avoid confusing lower layers.
 		 */
 		m->m_flags &= ~(M_PROTOFLAGS);
 
 		error = (*ifp->if_output)(ifp, m,
 				(struct sockaddr *)dst, ro->ro_rt);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		ipstat.ips_cantfrag++;
 		goto bad;
 	}
 
 	/*
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
 	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				ia->ia_ifa.if_opackets++;
 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
 			}
 			/*
 			 * Reset layer specific mbuf flags
 			 * to avoid confusing upper layers.
 			 */
 			m->m_flags &= ~(M_PROTOFLAGS);
 
 			error = (*ifp->if_output)(ifp, m,
 			    (struct sockaddr *)dst, ro->ro_rt);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		ipstat.ips_fragmented++;
 
 done:
 	if (ro == &iproute && ro->ro_rt) {
 		RTFREE(ro->ro_rt);
 	}
 	return (error);
 bad:
 	m_freem(m);
 	goto done;
 }
 
 /*
  * Create a chain of fragments which fit the given mtu. m_frag points to the
  * mbuf to be fragmented; on return it points to the chain with the fragments.
  * Return 0 if no error. If error, m_frag may contain a partially built
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
     u_long if_hwassist_flags, int sw_csum)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
 	int off;
 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
 
 	if (ip->ip_off & IP_DF) {	/* Fragmentation not allowed */
 		ipstat.ips_cantfrag++;
 		return EMSGSIZE;
 	}
 
 	/*
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	if (len < 8)
 		return EMSGSIZE;
 
 	/*
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
 	    (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 
 	if (len > PAGE_SIZE) {
 		/* 
 		 * Fragment large datagrams such that each segment 
 		 * contains a multiple of PAGE_SIZE amount of data, 
 		 * plus headers. This enables a receiver to perform 
 		 * page-flipping zero-copy optimizations.
 		 *
 		 * XXX When does this help given that sender and receiver
 		 * could have different page sizes, and also mtu could
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
 		struct mbuf *m;
 
 		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
 			off += m->m_len;
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an 
 		 * 8-byte boundary
 		 */
 		if (off < hlen)
 			goto smart_frag_failure;
 		off = ((off - hlen) & ~7) + hlen;
 		newlen = (~PAGE_MASK) & mtu;
 		if ((newlen + sizeof (struct ip)) > mtu) {
 			/* we failed, go back the default */
 smart_frag_failure:
 			newlen = len;
 			off = hlen + len;
 		}
 		len = newlen;
 
 	} else {
 		off = hlen + len;
 	}
 
 	firstlen = off - hlen;
 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 * Here, m0 is the original packet, m is the fragment being created.
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
 	for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
 		MGETHDR(m, M_DONTWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			ipstat.ips_odropped++;
 			goto done;
 		}
 		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
 		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copy().
 		 */
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		*mhip = *ip;
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
 		/* XXX do we need to add ip->ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
 		if (off + len >= ip->ip_len) {	/* last fragment */
 			len = ip->ip_len - off;
 			m->m_flags |= M_LASTFRAG;
 		} else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		m->m_next = m_copy(m0, off, len);
 		if (m->m_next == NULL) {	/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
 			ipstat.ips_odropped++;
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 		mac_netinet_fragment(m0, m);
 #endif
 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
 		if (sw_csum & CSUM_DELAY_IP)
 			mhip->ip_sum = in_cksum(m, mhlen);
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	ipstat.ips_ofragments += nfrags;
 
 	/* set first marker for fragment chain */
 	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
 	m0->m_pkthdr.csum_data = nfrags;
 
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
 	ip->ip_off |= IP_MF;
 	ip->ip_off = htons(ip->ip_off);
 	ip->ip_sum = 0;
 	if (sw_csum & CSUM_DELAY_IP)
 		ip->ip_sum = in_cksum(m0, hlen);
 
 done:
 	*m_frag = m0;
 	return error;
 }
 
 void
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
 	u_short csum, offset;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
 	csum = in_cksum_skip(m, ip->ip_len, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
 		    m->m_len, offset, ip->ip_p);
 		/*
 		 * XXX
 		 * this shouldn't happen, but if it does, the
 		 * correct behavior may be to insert the checksum
 		 * in the appropriate next mbuf in the chain.
 		 */
 		return;
 	}
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 /*
  * IP socket option processing.
  */
 int
 ip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		return (EINVAL);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 #ifdef notyet
 		case IP_RETOPTS:
 #endif
 		{
 			struct mbuf *m;
 			if (sopt->sopt_valsize > MLEN) {
 				error = EMSGSIZE;
 				break;
 			}
 			MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
 			if (m == NULL) {
 				error = ENOBUFS;
 				break;
 			}
 			m->m_len = sopt->sopt_valsize;
 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 					    m->m_len);
 			if (error) {
 				m_free(m);
 				break;
 			}
 			INP_WLOCK(inp);
 			error = ip_pcbopts(inp, sopt->sopt_name, m);
 			INP_WUNLOCK(inp);
 			return (error);
 		}
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_FAITH:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
 				break;
 
 			case IP_TTL:
 				inp->inp_ip_ttl = optval;
 				break;
 
 			case IP_MINTTL:
 				if (optval > 0 && optval <= MAXTTL)
 					inp->inp_ip_minttl = optval;
 				else
 					error = EINVAL;
 				break;
 
 #define	OPTSET(bit) do {						\
 	INP_WLOCK(inp);							\
 	if (optval)							\
 		inp->inp_flags |= bit;					\
 	else								\
 		inp->inp_flags &= ~bit;					\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				OPTSET(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				OPTSET(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				OPTSET(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				OPTSET(INP_RECVIF);
 				break;
 
 			case IP_FAITH:
 				OPTSET(INP_FAITH);
 				break;
 
 			case IP_ONESBCAST:
 				OPTSET(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				OPTSET(INP_DONTFRAG);
 				break;
 			}
 			break;
 #undef OPTSET
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 		case IP_ADD_SOURCE_MEMBERSHIP:
 		case IP_DROP_SOURCE_MEMBERSHIP:
 		case IP_BLOCK_SOURCE:
 		case IP_UNBLOCK_SOURCE:
 		case IP_MSFILTER:
 		case MCAST_JOIN_GROUP:
 		case MCAST_LEAVE_GROUP:
 		case MCAST_JOIN_SOURCE_GROUP:
 		case MCAST_LEAVE_SOURCE_GROUP:
 		case MCAST_BLOCK_SOURCE:
 		case MCAST_UNBLOCK_SOURCE:
 			error = inp_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			INP_WLOCK(inp);
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				break;
 
 			case IP_PORTRANGE_HIGH:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags |= INP_HIGHPORT;
 				break;
 
 			case IP_PORTRANGE_LOW:
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				inp->inp_flags |= INP_LOWPORT;
 				break;
 
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			break;
 
 #ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			caddr_t req;
 			struct mbuf *m;
 
 			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 				break;
 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 				break;
 			req = mtod(m, caddr_t);
 			error = ipsec4_set_policy(inp, sopt->sopt_name, req,
 			    m->m_len, (sopt->sopt_td != NULL) ?
 			    sopt->sopt_td->td_ucred : NULL);
 			m_freem(m);
 			break;
 		}
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 		case IP_RETOPTS:
 			if (inp->inp_options)
 				error = sooptcopyout(sopt, 
 						     mtod(inp->inp_options,
 							  char *),
 						     inp->inp_options->m_len);
 			else
 				sopt->sopt_valsize = 0;
 			break;
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_PORTRANGE:
 		case IP_FAITH:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 			switch (sopt->sopt_name) {
 
 			case IP_TOS:
 				optval = inp->inp_ip_tos;
 				break;
 
 			case IP_TTL:
 				optval = inp->inp_ip_ttl;
 				break;
 
 			case IP_MINTTL:
 				optval = inp->inp_ip_minttl;
 				break;
 
 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
 
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				optval = OPTBIT(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				optval = OPTBIT(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				optval = OPTBIT(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				optval = OPTBIT(INP_RECVIF);
 				break;
 
 			case IP_PORTRANGE:
 				if (inp->inp_flags & INP_HIGHPORT)
 					optval = IP_PORTRANGE_HIGH;
 				else if (inp->inp_flags & INP_LOWPORT)
 					optval = IP_PORTRANGE_LOW;
 				else
 					optval = 0;
 				break;
 
 			case IP_FAITH:
 				optval = OPTBIT(INP_FAITH);
 				break;
 
 			case IP_ONESBCAST:
 				optval = OPTBIT(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				optval = OPTBIT(INP_DONTFRAG);
 				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_MSFILTER:
 			error = inp_getmoptions(inp, sopt);
 			break;
 
 #ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			struct mbuf *m = NULL;
 			caddr_t req = NULL;
 			size_t len = 0;
 
 			if (m != 0) {
 				req = mtod(m, caddr_t);
 				len = m->m_len;
 			}
 			error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
 			if (error == 0)
 				error = soopt_mcopyout(sopt, m); /* XXX */
 			if (error == 0)
 				m_freem(m);
 			break;
 		}
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be a loopback interface -- evil, but easier than
  * replicating that code here.
  */
 static void
 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
     int hlen)
 {
 	register struct ip *ip;
 	struct mbuf *copym;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
 		/* If needed, compute the checksum and mark it as valid. */
 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			in_delayed_cksum(copym);
 			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 			copym->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			copym->m_pkthdr.csum_data = 0xffff;
 		}
 		/*
 		 * We don't bother to fragment if the IP length is greater
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 		/*
 		 * NB:
 		 * It's not clear whether there are any lingering
 		 * reentrancy problems in other areas which might
 		 * be exposed by using ip_input directly (in
 		 * particular, everything which modifies the packet
 		 * in-place).  Yet another option is using the
 		 * protosw directly to deliver the looped back
 		 * packet.  For the moment, we'll err on the side
 		 * of safety by using if_simloop().
 		 */
 #if 1 /* XXX */
 		if (dst->sin_family != AF_INET) {
 			printf("ip_mloopback: bad address family %d\n",
 						dst->sin_family);
 			dst->sin_family = AF_INET;
 		}
 #endif
 
 #ifdef notdef
 		copym->m_pkthdr.rcvif = ifp;
 		ip_input(copym);
 #else
 		if_simloop(ifp, copym, dst->sin_family, 0);
 #endif
 	}
 }
Index: head/sys/netinet/ip_var.h
===================================================================
--- head/sys/netinet/ip_var.h	(revision 178887)
+++ head/sys/netinet/ip_var.h	(revision 178888)
@@ -1,248 +1,248 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_var.h	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IP_VAR_H_
 #define	_NETINET_IP_VAR_H_
 
 #include <sys/queue.h>
 
 /*
  * Overlay for ip header used by other protocols (tcp, udp).
  */
 struct ipovly {
 	u_char	ih_x1[9];		/* (unused) */
 	u_char	ih_pr;			/* protocol */
 	u_short	ih_len;			/* protocol length */
 	struct	in_addr ih_src;		/* source internet address */
 	struct	in_addr ih_dst;		/* destination internet address */
 };
 
 #ifdef _KERNEL
 /*
  * Ip reassembly queue structure.  Each fragment
  * being reassembled is attached to one of these structures.
  * They are timed out after ipq_ttl drops to 0, and may also
  * be reclaimed if memory becomes tight.
  */
 struct ipq {
 	TAILQ_ENTRY(ipq) ipq_list;	/* to other reass headers */
 	u_char	ipq_ttl;		/* time for reass q to live */
 	u_char	ipq_p;			/* protocol of this fragment */
 	u_short	ipq_id;			/* sequence id for reassembly */
 	struct mbuf *ipq_frags;		/* to ip headers of fragments */
 	struct	in_addr ipq_src,ipq_dst;
 	u_char	ipq_nfrags;		/* # frags in this packet */
 	struct label *ipq_label;	/* MAC label */
 };
 #endif /* _KERNEL */
 
 /*
  * Structure stored in mbuf in inpcb.ip_options
  * and passed to ip_output when ip options are in use.
  * The actual length of the options (including ipopt_dst)
  * is in m_len.
  */
 #define MAX_IPOPTLEN	40
 
 struct ipoption {
 	struct	in_addr ipopt_dst;	/* first-hop dst if source routed */
 	char	ipopt_list[MAX_IPOPTLEN];	/* options proper */
 };
 
 /*
  * Multicast source list entry.
  */
 struct in_msource {
 	TAILQ_ENTRY(in_msource) ims_next;	/* next source */
 	struct sockaddr_storage ims_addr;	/* address of this source */
 };
 
 /*
  * Multicast filter descriptor; there is one instance per group membership
  * on a socket, allocated as an expandable vector hung off ip_moptions.
  * struct in_multi contains separate IPv4-stack-wide state for IGMPv3.
  */
 struct in_mfilter {
 	uint16_t	imf_fmode;	/* filter mode for this socket/group */
 	uint16_t	imf_nsources;	/* # of sources for this socket/group */
 	TAILQ_HEAD(, in_msource) imf_sources;	/* source list */
 };
 
 /*
  * Structure attached to inpcb.ip_moptions and
  * passed to ip_output when IP multicast options are in use.
  * This structure is lazy-allocated.
  */
 struct ip_moptions {
 	struct	ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
 	struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */
 	u_long	imo_multicast_vif;	/* vif num outgoing multicasts */
 	u_char	imo_multicast_ttl;	/* TTL for outgoing multicasts */
 	u_char	imo_multicast_loop;	/* 1 => hear sends if a member */
 	u_short	imo_num_memberships;	/* no. memberships this socket */
 	u_short	imo_max_memberships;	/* max memberships this socket */
 	struct	in_multi **imo_membership;	/* group memberships */
 	struct	in_mfilter *imo_mfilters;	/* source filters */
 };
 
 struct	ipstat {
 	u_long	ips_total;		/* total packets received */
 	u_long	ips_badsum;		/* checksum bad */
 	u_long	ips_tooshort;		/* packet too short */
 	u_long	ips_toosmall;		/* not enough data */
 	u_long	ips_badhlen;		/* ip header length < data size */
 	u_long	ips_badlen;		/* ip length < ip header length */
 	u_long	ips_fragments;		/* fragments received */
 	u_long	ips_fragdropped;	/* frags dropped (dups, out of space) */
 	u_long	ips_fragtimeout;	/* fragments timed out */
 	u_long	ips_forward;		/* packets forwarded */
 	u_long	ips_fastforward;	/* packets fast forwarded */
 	u_long	ips_cantforward;	/* packets rcvd for unreachable dest */
 	u_long	ips_redirectsent;	/* packets forwarded on same net */
 	u_long	ips_noproto;		/* unknown or unsupported protocol */
 	u_long	ips_delivered;		/* datagrams delivered to upper level*/
 	u_long	ips_localout;		/* total ip packets generated here */
 	u_long	ips_odropped;		/* lost packets due to nobufs, etc. */
 	u_long	ips_reassembled;	/* total packets reassembled ok */
 	u_long	ips_fragmented;		/* datagrams successfully fragmented */
 	u_long	ips_ofragments;		/* output fragments created */
 	u_long	ips_cantfrag;		/* don't fragment flag was set, etc. */
 	u_long	ips_badoptions;		/* error in option processing */
 	u_long	ips_noroute;		/* packets discarded due to no route */
 	u_long	ips_badvers;		/* ip version != 4 */
 	u_long	ips_rawout;		/* total raw ip packets generated */
 	u_long	ips_toolong;		/* ip length > max ip packet size */
 	u_long	ips_notmember;		/* multicasts for unregistered grps */
 	u_long	ips_nogif;		/* no match gif found */
 	u_long	ips_badaddr;		/* invalid address on header */
 };
 
 #ifdef _KERNEL
 
 /* flags passed to ip_output as last parameter */
 #define	IP_FORWARDING		0x1		/* most of ip header exists */
 #define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
 #define	IP_SENDONES		0x4		/* send all-ones broadcast */
 #define	IP_SENDTOIF		0x8		/* send on specific ifnet */
 #define IP_ROUTETOIF		SO_DONTROUTE	/* 0x10 bypass routing tables */
 #define IP_ALLOWBROADCAST	SO_BROADCAST	/* 0x20 can send broadcast packets */
 
 /*
  * mbuf flag used by ip_fastfwd
  */
 #define	M_FASTFWD_OURS		M_PROTO1	/* changed dst to local */
 
 #ifdef __NO_STRICT_ALIGNMENT
 #define IP_HDR_ALIGNED_P(ip)	1
 #else
 #define IP_HDR_ALIGNED_P(ip)	((((intptr_t) (ip)) & 3) == 0)
 #endif
 
 struct ip;
 struct inpcb;
 struct route;
 struct sockopt;
 
 extern struct	ipstat	ipstat;
 extern u_short	ip_id;			/* ip packet ctr, for ids */
 extern int	ip_defttl;		/* default IP ttl */
 extern int	ipforwarding;		/* ip forwarding */
 #ifdef IPSTEALTH
 extern int	ipstealth;		/* stealth forwarding */
 #endif
 extern u_char	ip_protox[];
 extern struct socket *ip_rsvpd;		/* reservation protocol daemon */
 extern struct socket *ip_mrouter;	/* multicast routing daemon */
 extern int	(*legal_vif_num)(int);
 extern u_long	(*ip_mcast_src)(int);
 extern int rsvp_on;
 extern struct	pr_usrreqs rip_usrreqs;
 
 void	inp_freemoptions(struct ip_moptions *);
 int	inp_getmoptions(struct inpcb *, struct sockopt *);
 int	inp_setmoptions(struct inpcb *, struct sockopt *);
 
 int	ip_ctloutput(struct socket *, struct sockopt *sopt);
 void	ip_drain(void);
 void	ip_fini(void *xtp);
 int	ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
 	    u_long if_hwassist_flags, int sw_csum);
 void	ip_forward(struct mbuf *m, int srcrt);
 void	ip_init(void);
 extern int
 	(*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 	    struct ip_moptions *);
 int	ip_output(struct mbuf *,
 	    struct mbuf *, struct route *, int, struct ip_moptions *,
 	    struct inpcb *);
 int	ipproto_register(u_char);
 int	ipproto_unregister(u_char);
 struct mbuf *
 	ip_reass(struct mbuf *);
 struct in_ifaddr *
-	ip_rtaddr(struct in_addr);
+	ip_rtaddr(struct in_addr, u_int fibnum);
 void	ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
 	    struct mbuf *);
 void	ip_slowtimo(void);
 u_int16_t	ip_randomid(void);
 int	rip_ctloutput(struct socket *, struct sockopt *);
 void	rip_ctlinput(int, struct sockaddr *, void *);
 void	rip_init(void);
 void	rip_input(struct mbuf *, int);
 int	rip_output(struct mbuf *, struct socket *, u_long);
 void	ipip_input(struct mbuf *, int);
 void	rsvp_input(struct mbuf *, int);
 int	ip_rsvp_init(struct socket *);
 int	ip_rsvp_done(void);
 extern int	(*ip_rsvp_vif)(struct socket *, struct sockopt *);
 extern void	(*ip_rsvp_force_done)(struct socket *);
 extern void	(*rsvp_input_p)(struct mbuf *m, int off);
 
 extern	struct pfil_head inet_pfil_hook;	/* packet filter hooks */
 
 void	in_delayed_cksum(struct mbuf *m);
 
 static __inline uint16_t ip_newid(void);
 extern int ip_do_randomid;
 
 static __inline uint16_t
 ip_newid(void)
 {
 	if (ip_do_randomid)
 		return ip_randomid();
 
 	return htons(ip_id++);
 }
 
 #endif /* _KERNEL */
 
 #endif /* !_NETINET_IP_VAR_H_ */
Index: head/sys/netinet/raw_ip.c
===================================================================
--- head/sys/netinet/raw_ip.c	(revision 178887)
+++ head/sys/netinet/raw_ip.c	(revision 178888)
@@ -1,918 +1,918 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_mroute.h>
 
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #endif /*IPSEC*/
 
 #include <security/mac/mac_framework.h>
 
 struct	inpcbhead ripcb;
 struct	inpcbinfo ripcbinfo;
 
 /* control hooks for ipfw and dummynet */
 ip_fw_ctl_t *ip_fw_ctl_ptr = NULL;
 ip_dn_ctl_t *ip_dn_ctl_ptr = NULL;
 
 /*
  * hooks for multicast routing. They all default to NULL,
  * so leave them not initialized and rely on BSS being set to 0.
  */
 
 /* The socket used to communicate with the multicast routing daemon.  */
 struct socket  *ip_mrouter;
 
 /* The various mrouter and rsvp functions */
 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip_mrouter_done)(void);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *);
-int (*mrt_ioctl)(int, caddr_t);
+int (*mrt_ioctl)(int, caddr_t, int);
 int (*legal_vif_num)(int);
 u_long (*ip_mcast_src)(int);
 
 void (*rsvp_input_p)(struct mbuf *m, int off);
 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
 void (*ip_rsvp_force_done)(struct socket *);
 
 /*
  * Raw interface to IP protocol.
  */
 
 /*
  * Initialize raw connection block q.
  */
 static void
 rip_zone_change(void *tag)
 {
 
 	uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
 }
 
 static int
 rip_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "rawinp");
 	return (0);
 }
 
 void
 rip_init(void)
 {
 
 	INP_INFO_LOCK_INIT(&ripcbinfo, "rip");
 	LIST_INIT(&ripcb);
 	ripcbinfo.ipi_listhead = &ripcb;
 	/*
 	 * XXX We don't use the hash list for raw IP, but it's easier
 	 * to allocate a one entry hash list than it is to check all
 	 * over the place for hashbase == NULL.
 	 */
 	ripcbinfo.ipi_hashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_hashmask);
 	ripcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
 	    &ripcbinfo.ipi_porthashmask);
 	ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
 	    NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
 	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change,
 		NULL, EVENTHANDLER_PRI_ANY);
 }
 
 static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
 
 static int
 raw_append(struct inpcb *last, struct ip *ip, struct mbuf *n)
 {
 	int policyfail = 0;
 
 	INP_RLOCK_ASSERT(last);
 
 #ifdef IPSEC
 	/* check AH/ESP integrity. */
 	if (ipsec4_in_reject(n, last)) {
 		policyfail = 1;
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
 		policyfail = 1;
 #endif
 	/* Check the minimum TTL for socket. */
 	if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
 		policyfail = 1;
 	if (!policyfail) {
 		struct mbuf *opts = NULL;
 		struct socket *so;
 
 		so = last->inp_socket;
 		if ((last->inp_flags & INP_CONTROLOPTS) ||
 		    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
 			ip_savecontrol(last, &opts, ip, n);
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (sbappendaddr_locked(&so->so_rcv,
 		    (struct sockaddr *)&ripsrc, n, opts) == 0) {
 			/* should notify about lost packet */
 			m_freem(n);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		} else
 			sorwakeup_locked(so);
 	} else
 		m_freem(n);
 	return policyfail;
 }
 
 /*
  * Setup generic address and protocol structures
  * for raw_input routine, then pass them along with
  * mbuf chain.
  */
 void
 rip_input(struct mbuf *m, int off)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	int proto = ip->ip_p;
 	struct inpcb *inp, *last;
 
 	INP_INFO_RLOCK(&ripcbinfo);
 	ripsrc.sin_addr = ip->ip_src;
 	last = NULL;
 	LIST_FOREACH(inp, &ripcb, inp_list) {
 		INP_RLOCK(inp);
 		if (inp->inp_ip_p && inp->inp_ip_p != proto) {
 	docontinue:
 			INP_RUNLOCK(inp);
 			continue;
 		}
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			goto docontinue;
 #endif
 		if (inp->inp_laddr.s_addr &&
 		    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 			goto docontinue;
 		if (inp->inp_faddr.s_addr &&
 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 			goto docontinue;
 		if (jailed(inp->inp_socket->so_cred))
 			if (htonl(prison_getip(inp->inp_socket->so_cred)) !=
 			    ip->ip_dst.s_addr)
 				goto docontinue;
 		if (last) {
 			struct mbuf *n;
 
 			n = m_copy(m, 0, (int)M_COPYALL);
 			if (n != NULL)
 				(void) raw_append(last, ip, n);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 		}
 		last = inp;
 	}
 	if (last != NULL) {
 		if (raw_append(last, ip, m) != 0)
 			ipstat.ips_delivered--;
 		INP_RUNLOCK(last);
 	} else {
 		m_freem(m);
 		ipstat.ips_noproto++;
 		ipstat.ips_delivered--;
 	}
 	INP_INFO_RUNLOCK(&ripcbinfo);
 }
 
 /*
  * Generate IP header and pass packet to ip_output.
  * Tack on options user may have setup with control call.
  */
 int
 rip_output(struct mbuf *m, struct socket *so, u_long dst)
 {
 	struct ip *ip;
 	int error;
 	struct inpcb *inp = sotoinpcb(so);
 	int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
 	    IP_ALLOWBROADCAST;
 
 	/*
 	 * If the user handed us a complete IP packet, use it.
 	 * Otherwise, allocate an mbuf for a header and fill it in.
 	 */
 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
 		if (m == NULL)
 			return(ENOBUFS);
 
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		ip->ip_tos = inp->inp_ip_tos;
 		if (inp->inp_flags & INP_DONTFRAG)
 			ip->ip_off = IP_DF;
 		else
 			ip->ip_off = 0;
 		ip->ip_p = inp->inp_ip_p;
 		ip->ip_len = m->m_pkthdr.len;
 		if (jailed(inp->inp_socket->so_cred))
 			ip->ip_src.s_addr =
 			    htonl(prison_getip(inp->inp_socket->so_cred));
 		else
 			ip->ip_src = inp->inp_laddr;
 		ip->ip_dst.s_addr = dst;
 		ip->ip_ttl = inp->inp_ip_ttl;
 	} else {
 		if (m->m_pkthdr.len > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		if (jailed(inp->inp_socket->so_cred)) {
 			if (ip->ip_src.s_addr !=
 			    htonl(prison_getip(inp->inp_socket->so_cred))) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (EPERM);
 			}
 		}
 		/* don't allow both user specified and setsockopt options,
 		   and don't allow packet length sizes that will crash */
 		if (((ip->ip_hl != (sizeof (*ip) >> 2))
 		     && inp->inp_options)
 		    || (ip->ip_len > m->m_pkthdr.len)
 		    || (ip->ip_len < (ip->ip_hl << 2))) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return EINVAL;
 		}
 		if (ip->ip_id == 0)
 			ip->ip_id = ip_newid();
 		/* XXX prevent ip_output from overwriting header fields */
 		flags |= IP_RAWOUTPUT;
 		ipstat.ips_rawout++;
 	}
 
 	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	error = ip_output(m, inp->inp_options, NULL, flags,
 	    inp->inp_moptions, inp);
 	INP_RUNLOCK(inp);
 	return error;
 }
 
 /*
  * Raw IP socket option processing.
  *
  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  * only be created by a privileged process, and as such, socket option
  * operations to manage system properties on any raw socket were allowed to
  * take place without explicit additional access control checks.  However,
  * raw sockets can now also be created in jail(), and therefore explicit
  * checks are now required.  Likewise, raw sockets can be used by a process
  * after it gives up privilege, so some caution is required.  For options
  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  * performed in ip_ctloutput() and therefore no check occurs here.
  * Unilaterally checking priv_check() here breaks normal IP socket option
  * operations on raw sockets.
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	if (sopt->sopt_level != IPPROTO_IP)
 		return (EINVAL);
 
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			optval = inp->inp_flags & INP_HDRINCL;
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_FW_ADD:	/* ADD actually returns the body... */
 		case IP_FW_GET:
 		case IP_FW_TABLE_GETSIZE:
 		case IP_FW_TABLE_LIST:
 		case IP_FW_NAT_GET_CONFIG:
 		case IP_FW_NAT_GET_LOG:
 			/*
 			 * XXXRW: Isn't this checked one layer down?  Yes, it
 			 * is.
 			 */
 			error = priv_check(curthread, PRIV_NETINET_IPFW);
 			if (error != 0)
 				return (error);
 			if (ip_fw_ctl_ptr != NULL)
 				error = ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET_GET:
 			error = priv_check(curthread, PRIV_NETINET_DUMMYNET);
 			if (error != 0)
 				return (error);
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break ;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
 				EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
 			break;
 
 		case IP_FW_ADD:
 		case IP_FW_DEL:
 		case IP_FW_FLUSH:
 		case IP_FW_ZERO:
 		case IP_FW_RESETLOG:
 		case IP_FW_TABLE_ADD:
 		case IP_FW_TABLE_DEL:
 		case IP_FW_TABLE_FLUSH:
 		case IP_FW_NAT_CFG:
 		case IP_FW_NAT_DEL:
 			/*
 			 * XXXRW: Isn't this checked one layer down?
 			 */
 			error = priv_check(curthread, PRIV_NETINET_IPFW);
 			if (error != 0)
 				return (error);
 			if (ip_fw_ctl_ptr != NULL)
 				error = ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET_CONFIGURE:
 		case IP_DUMMYNET_DEL:
 		case IP_DUMMYNET_FLUSH:
 			error = priv_check(curthread, PRIV_NETINET_DUMMYNET);
 			if (error != 0)
 				return (error);
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT ;
 			break ;
 
 		case IP_RSVP_ON:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_init(so);
 			break;
 
 		case IP_RSVP_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_done();
 			break;
 
 		case IP_RSVP_VIF_ON:
 		case IP_RSVP_VIF_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_vif ?
 				ip_rsvp_vif(so, sopt) : EINVAL;
 			break;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
 					EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * This function exists solely to receive the PRC_IFDOWN messages which
  * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
  * and calls in_ifadown() to remove all routes corresponding to that address.
  * It also receives the PRC_IFUP messages from if_up() and reinstalls the
  * interface routes.
  */
 void
 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct in_ifaddr *ia;
 	struct ifnet *ifp;
 	int err;
 	int flags;
 
 	switch (cmd) {
 	case PRC_IFDOWN:
 		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa
 			    && (ia->ia_flags & IFA_ROUTE)) {
 				/*
 				 * in_ifscrub kills the interface route.
 				 */
 				in_ifscrub(ia->ia_ifp, ia);
 				/*
 				 * in_ifadown gets rid of all the rest of
 				 * the routes.  This is not quite the right
 				 * thing to do, but at least if we are running
 				 * a routing process they will come back.
 				 */
 				in_ifadown(&ia->ia_ifa, 0);
 				break;
 			}
 		}
 		break;
 
 	case PRC_IFUP:
 		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa)
 				break;
 		}
 		if (ia == 0 || (ia->ia_flags & IFA_ROUTE))
 			return;
 		flags = RTF_UP;
 		ifp = ia->ia_ifa.ifa_ifp;
 
 		if ((ifp->if_flags & IFF_LOOPBACK)
 		    || (ifp->if_flags & IFF_POINTOPOINT))
 			flags |= RTF_HOST;
 
 		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
 		if (err == 0)
 			ia->ia_flags |= IFA_ROUTE;
 		break;
 	}
 }
 
 u_long	rip_sendspace = 9216;
 u_long	rip_recvspace = 9216;
 
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
 
 static int
 rip_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return error;
 	if (proto >= IPPROTO_MAX || proto < 0)
 		return EPROTONOSUPPORT;
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return error;
 	INP_INFO_WLOCK(&ripcbinfo);
 	error = in_pcballoc(so, &ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&ripcbinfo);
 		return error;
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	INP_INFO_WUNLOCK(&ripcbinfo);
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_p = proto;
 	inp->inp_ip_ttl = ip_defttl;
 	INP_WUNLOCK(inp);
 	return 0;
 }
 
 static void
 rip_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
 	    ("rip_detach: not closed"));
 
 	INP_INFO_WLOCK(&ripcbinfo);
 	INP_WLOCK(inp);
 	if (so == ip_mrouter && ip_mrouter_done)
 		ip_mrouter_done();
 	if (ip_rsvp_force_done)
 		ip_rsvp_force_done(so);
 	if (so == ip_rsvpd)
 		ip_rsvp_done();
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
 }
 
 static void
 rip_dodisconnect(struct socket *so, struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;
 	SOCK_UNLOCK(so);
 }
 
 static void
 rip_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
 
 	INP_INFO_WLOCK(&ripcbinfo);
 	INP_WLOCK(inp);
 	rip_dodisconnect(so, inp);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
 }
 
 static void
 rip_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
 
 	INP_INFO_WLOCK(&ripcbinfo);
 	INP_WLOCK(inp);
 	rip_dodisconnect(so, inp);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
 }
 
 static int
 rip_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return ENOTCONN;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
 	INP_INFO_WLOCK(&ripcbinfo);
 	INP_WLOCK(inp);
 	rip_dodisconnect(so, inp);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
 	return (0);
 }
 
 static int
 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 
 	if (nam->sa_len != sizeof(*addr))
 		return EINVAL;
 
 	if (jailed(td->td_ucred)) {
 		if (addr->sin_addr.s_addr == INADDR_ANY)
 			addr->sin_addr.s_addr =
 			    htonl(prison_getip(td->td_ucred));
 		if (htonl(prison_getip(td->td_ucred)) != addr->sin_addr.s_addr)
 			return (EADDRNOTAVAIL);
 	}
 
 	if (TAILQ_EMPTY(&ifnet) ||
 	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
 	    (addr->sin_addr.s_addr &&
 	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
 		return EADDRNOTAVAIL;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
 	INP_INFO_WLOCK(&ripcbinfo);
 	INP_WLOCK(inp);
 	inp->inp_laddr = addr->sin_addr;
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
 	return 0;
 }
 
 static int
 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 
 	if (nam->sa_len != sizeof(*addr))
 		return EINVAL;
 	if (TAILQ_EMPTY(&ifnet))
 		return EADDRNOTAVAIL;
 	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
 		return EAFNOSUPPORT;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
 	INP_INFO_WLOCK(&ripcbinfo);
 	INP_WLOCK(inp);
 	inp->inp_faddr = addr->sin_addr;
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&ripcbinfo);
 	return 0;
 }
 
 static int
 rip_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return 0;
 }
 
 static int
 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	u_long dst;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
 	/*
 	 * Note: 'dst' reads below are unlocked.
 	 */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			m_freem(m);
 			return EISCONN;
 		}
 		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
 	} else {
 		if (nam == NULL) {
 			m_freem(m);
 			return ENOTCONN;
 		}
 		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 	}
 	return rip_output(m, so, dst);
 }
 
 static int
 rip_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = ripcbinfo.ipi_count;
 		req->oldidx = 2 * (sizeof xig)
 			+ (n + n/8) * sizeof(struct xinpcb);
 		return 0;
 	}
 
 	if (req->newptr != 0)
 		return EPERM;
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_RLOCK(&ripcbinfo);
 	gencnt = ripcbinfo.ipi_gencnt;
 	n = ripcbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&ripcbinfo);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return error;
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == 0)
 		return ENOMEM;
 
 	INP_INFO_RLOCK(&ripcbinfo);
 	for (inp = LIST_FIRST(ripcbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
 		    cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) {
 			/* XXX held references? */
 			inp_list[i++] = inp;
 		}
 		INP_RUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&ripcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 			bzero(&xi, sizeof(xi));
 			xi.xi_len = sizeof xi;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&ripcbinfo);
 		xig.xig_gen = ripcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = ripcbinfo.ipi_count;
 		INP_INFO_RUNLOCK(&ripcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return error;
 }
 
 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
 	    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 
 struct pr_usrreqs rip_usrreqs = {
 	.pru_abort =		rip_abort,
 	.pru_attach =		rip_attach,
 	.pru_bind =		rip_bind,
 	.pru_connect =		rip_connect,
 	.pru_control =		in_control,
 	.pru_detach =		rip_detach,
 	.pru_disconnect =	rip_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		rip_send,
 	.pru_shutdown =		rip_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		rip_close,
 };
Index: head/sys/netinet/sctp_os_bsd.h
===================================================================
--- head/sys/netinet/sctp_os_bsd.h	(revision 178887)
+++ head/sys/netinet/sctp_os_bsd.h	(revision 178888)
@@ -1,465 +1,465 @@
 /*-
  * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *   this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *   the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #ifndef __sctp_os_bsd_h__
 #define __sctp_os_bsd_h__
 /*
  * includes
  */
 #include "opt_ipsec.h"
 #include "opt_compat.h"
 #include "opt_inet6.h"
 #include "opt_inet.h"
 #include "opt_sctp.h"
 #include <sys/param.h>
 #include <sys/ktr.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/jail.h>
 #include <sys/sysctl.h>
 #include <sys/resourcevar.h>
 #include <sys/uio.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/kthread.h>
 #include <sys/priv.h>
 #include <sys/random.h>
 #include <sys/limits.h>
 #include <sys/queue.h>
 #include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #endif				/* IPSEC */
 
 #ifdef INET6
 #include <sys/domain.h>
 #ifdef IPSEC
 #include <netipsec/ipsec6.h>
 #endif
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/nd6.h>
 #include <netinet6/scope6_var.h>
 #endif				/* INET6 */
 
 
 #include <netinet/ip_options.h>
 
 #ifndef in6pcb
 #define in6pcb		inpcb
 #endif
 /* Declare all the malloc names for all the various mallocs */
 MALLOC_DECLARE(SCTP_M_MAP);
 MALLOC_DECLARE(SCTP_M_STRMI);
 MALLOC_DECLARE(SCTP_M_STRMO);
 MALLOC_DECLARE(SCTP_M_ASC_ADDR);
 MALLOC_DECLARE(SCTP_M_ASC_IT);
 MALLOC_DECLARE(SCTP_M_AUTH_CL);
 MALLOC_DECLARE(SCTP_M_AUTH_KY);
 MALLOC_DECLARE(SCTP_M_AUTH_HL);
 MALLOC_DECLARE(SCTP_M_AUTH_IF);
 MALLOC_DECLARE(SCTP_M_STRESET);
 MALLOC_DECLARE(SCTP_M_CMSG);
 MALLOC_DECLARE(SCTP_M_COPYAL);
 MALLOC_DECLARE(SCTP_M_VRF);
 MALLOC_DECLARE(SCTP_M_IFA);
 MALLOC_DECLARE(SCTP_M_IFN);
 MALLOC_DECLARE(SCTP_M_TIMW);
 MALLOC_DECLARE(SCTP_M_MVRF);
 MALLOC_DECLARE(SCTP_M_ITER);
 MALLOC_DECLARE(SCTP_M_SOCKOPT);
 
 #if defined(SCTP_LOCAL_TRACE_BUF)
 
 #define SCTP_GET_CYCLECOUNT get_cyclecount()
 #define SCTP_CTR6 sctp_log_trace
 
 #else
 #define SCTP_CTR6 CTR6
 #endif
 
 
 /*
  *
  */
 #define USER_ADDR_NULL	(NULL)	/* FIX ME: temp */
 #define SCTP_LIST_EMPTY(list)	LIST_EMPTY(list)
 
 #if defined(SCTP_DEBUG)
 #define SCTPDBG(level, params...)					\
 {									\
     do {								\
 	if (sctp_debug_on & level ) {					\
 	    printf(params);						\
 	}								\
     } while (0);							\
 }
 #define SCTPDBG_ADDR(level, addr)					\
 {									\
     do {								\
 	if (sctp_debug_on & level ) {					\
 	    sctp_print_address(addr);					\
 	}								\
     } while (0);							\
 }
 #define SCTPDBG_PKT(level, iph, sh)					\
 {									\
     do {								\
 	    if (sctp_debug_on & level) {				\
 		    sctp_print_address_pkt(iph, sh);			\
 	    }								\
     } while (0);							\
 }
 #else
 #define SCTPDBG(level, params...)
 #define SCTPDBG_ADDR(level, addr)
 #define SCTPDBG_PKT(level, iph, sh)
 #endif
 #define SCTP_PRINTF(params...)	printf(params)
 
 #ifdef SCTP_LTRACE_CHUNKS
 #define SCTP_LTRACE_CHK(a, b, c, d) if(sctp_logging_level & SCTP_LTRACE_CHUNK_ENABLE) CTR6(KTR_SUBSYS, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_CHUNK_PROC, 0, a, b, c, d)
 #else
 #define SCTP_LTRACE_CHK(a, b, c, d)
 #endif
 
 #ifdef SCTP_LTRACE_ERRORS
 #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) if(sctp_logging_level & SCTP_LTRACE_ERROR_ENABLE) \
                                                          printf("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
 								     m, inp, stcb, net, file, __LINE__, err);
 #define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) if(sctp_logging_level & SCTP_LTRACE_ERROR_ENABLE) \
                                                           printf("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
 								     inp, stcb, net, file, __LINE__, err);
 #else
 #define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err)
 #define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err)
 #endif
 
 
 /*
  * Local address and interface list handling
  */
 #define SCTP_MAX_VRF_ID		0
 #define SCTP_SIZE_OF_VRF_HASH	3
 #define SCTP_IFNAMSIZ		IFNAMSIZ
 #define SCTP_DEFAULT_VRFID	0
 #define SCTP_VRF_ADDR_HASH_SIZE	16
 #define SCTP_VRF_IFN_HASH_SIZE	3
 #define	SCTP_INIT_VRF_TABLEID(vrf)
 
 #define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP)
 
 /*
  * Access to IFN's to help with src-addr-selection
  */
 /* This could return VOID if the index works but for BSD we provide both. */
 #define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp
 #define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_rt->rt_ifp->if_index
 #define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifp)
 
 /*
  * general memory allocation
  */
 #define SCTP_MALLOC(var, type, size, name) \
     do { \
 	MALLOC(var, type, size, name, M_NOWAIT); \
     } while (0)
 
 #define SCTP_FREE(var, type)	FREE(var, type)
 
 #define SCTP_MALLOC_SONAME(var, type, size) \
     do { \
 	MALLOC(var, type, size, M_SONAME, M_WAITOK | M_ZERO); \
     } while (0)
 
 #define SCTP_FREE_SONAME(var)	FREE(var, M_SONAME)
 
 #define SCTP_PROCESS_STRUCT struct proc *
 
 /*
  * zone allocation functions
  */
 #include <vm/uma.h>
 /* SCTP_ZONE_INIT: initialize the zone */
 typedef struct uma_zone *sctp_zone_t;
 
 #define UMA_ZFLAG_FULL	0x0020
 #define SCTP_ZONE_INIT(zone, name, size, number) { \
 	zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,\
 		UMA_ZFLAG_FULL); \
 	uma_zone_set_max(zone, number); \
 }
 
 /* SCTP_ZONE_GET: allocate element from the zone */
 #define SCTP_ZONE_GET(zone, type) \
 	(type *)uma_zalloc(zone, M_NOWAIT);
 
 /* SCTP_ZONE_FREE: free element from the zone */
 #define SCTP_ZONE_FREE(zone, element) \
 	uma_zfree(zone, element);
 #define SCTP_HASH_INIT(size, hashmark) hashinit_flags(size, M_PCB, hashmark, HASH_NOWAIT)
 #define SCTP_HASH_FREE(table, hashmark) hashdestroy(table, M_PCB, hashmark)
 
 #define SCTP_M_COPYM	m_copym
 
 /*
  * timers
  */
 #include <sys/callout.h>
 typedef struct callout sctp_os_timer_t;
 
 #define SCTP_OS_TIMER_INIT(tmr)	callout_init(tmr, 1)
 #define SCTP_OS_TIMER_START	callout_reset
 #define SCTP_OS_TIMER_STOP	callout_stop
 #define SCTP_OS_TIMER_STOP_DRAIN callout_drain
 #define SCTP_OS_TIMER_PENDING	callout_pending
 #define SCTP_OS_TIMER_ACTIVE	callout_active
 #define SCTP_OS_TIMER_DEACTIVATE callout_deactivate
 
 #define sctp_get_tick_count() (ticks)
 
 /* The packed define for 64 bit platforms */
 #define SCTP_PACKED __attribute__((packed))
 #define SCTP_UNUSED __attribute__((unused))
 
 /*
  * Functions
  */
 /* Mbuf manipulation and access macros  */
 #define SCTP_BUF_LEN(m) (m->m_len)
 #define SCTP_BUF_NEXT(m) (m->m_next)
 #define SCTP_BUF_NEXT_PKT(m) (m->m_nextpkt)
 #define SCTP_BUF_RESV_UF(m, size) m->m_data += size
 #define SCTP_BUF_AT(m, size) m->m_data + size
 #define SCTP_BUF_IS_EXTENDED(m) (m->m_flags & M_EXT)
 #define SCTP_BUF_EXTEND_SIZE(m) (m->m_ext.ext_size)
 #define SCTP_BUF_TYPE(m) (m->m_type)
 #define SCTP_BUF_RECVIF(m) (m->m_pkthdr.rcvif)
 #define SCTP_BUF_PREPEND	M_PREPEND
 
 #define SCTP_ALIGN_TO_END(m, len) if(m->m_flags & M_PKTHDR) { \
                                      MH_ALIGN(m, len); \
                                   } else if ((m->m_flags & M_EXT) == 0) { \
                                      M_ALIGN(m, len); \
                                   }
 
 /* We make it so if you have up to 4 threads
  * writting based on the default size of
  * the packet log 65 k, that would be
  * 4 16k packets before we would hit
  * a problem.
  */
 #define SCTP_PKTLOG_WRITERS_NEED_LOCK 3
 
 /*************************/
 /*      MTU              */
 /*************************/
 #define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu
 #define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((rt != NULL) ? rt->rt_rmx.rmx_mtu : 0)
 #define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0)
 #define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \
                                               if (rt != NULL) \
                                                  rt->rt_rmx.rmx_mtu = mtu; \
                                            } while(0)
 
 /* (de-)register interface event notifications */
 #define SCTP_REGISTER_INTERFACE(ifhandle, af)
 #define SCTP_DEREGISTER_INTERFACE(ifhandle, af)
 
 
 /*************************/
 /* These are for logging */
 /*************************/
 /* return the base ext data pointer */
 #define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf)
  /* return the refcnt of the data pointer */
 #define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ref_cnt)
 /* return any buffer related flags, this is
  * used beyond logging for apple only.
  */
 #define SCTP_BUF_GET_FLAGS(m) (m->m_flags)
 
 /* For BSD this just accesses the M_PKTHDR length
  * so it operates on an mbuf with hdr flag. Other
  * O/S's may have seperate packet header and mbuf
  * chain pointers.. thus the macro.
  */
 #define SCTP_HEADER_TO_CHAIN(m) (m)
 #define SCTP_DETACH_HEADER_FROM_CHAIN(m)
 #define SCTP_HEADER_LEN(m) (m->m_pkthdr.len)
 #define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0
 #define SCTP_RELEASE_HEADER(m)
 #define SCTP_RELEASE_PKT(m)	sctp_m_freem(m)
 
 #define SCTP_GET_PKT_VRFID(m, vrf_id)  ((vrf_id = SCTP_DEFAULT_VRFID) != SCTP_DEFAULT_VRFID)
 
 
 
 /* Attach the chain of data into the sendable packet. */
 #define SCTP_ATTACH_CHAIN(pak, m, packet_length) do { \
                                                  pak = m; \
                                                  pak->m_pkthdr.len = packet_length; \
                          } while(0)
 
 /* Other m_pkthdr type things */
 #define SCTP_IS_IT_BROADCAST(dst, m) ((m->m_flags & M_PKTHDR) ? in_broadcast(dst, m->m_pkthdr.rcvif) : 0)
 #define SCTP_IS_IT_LOOPBACK(m) ((m->m_flags & M_PKTHDR) && ((m->m_pkthdr.rcvif == NULL) || (m->m_pkthdr.rcvif->if_type == IFT_LOOP)))
 
 
 /* This converts any input packet header
  * into the chain of data holders, for BSD
  * its a NOP.
  */
 
 /* Macro's for getting length from V6/V4 header */
 #define SCTP_GET_IPV4_LENGTH(iph) (iph->ip_len)
 #define SCTP_GET_IPV6_LENGTH(ip6) (ntohs(ip6->ip6_plen))
 
 /* get the v6 hop limit */
 #define SCTP_GET_HLIM(inp, ro)	in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
 
 /* is the endpoint v6only? */
 #define SCTP_IPV6_V6ONLY(inp)	(((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY)
 /* is the socket non-blocking? */
 #define SCTP_SO_IS_NBIO(so)	((so)->so_state & SS_NBIO)
 #define SCTP_SET_SO_NBIO(so)	((so)->so_state |= SS_NBIO)
 #define SCTP_CLEAR_SO_NBIO(so)	((so)->so_state &= ~SS_NBIO)
 /* get the socket type */
 #define SCTP_SO_TYPE(so)	((so)->so_type)
 /* reserve sb space for a socket */
 #define SCTP_SORESERVE(so, send, recv)	soreserve(so, send, recv)
 /* wakeup a socket */
 #define SCTP_SOWAKEUP(so)	wakeup(&(so)->so_timeo)
 /* clear the socket buffer state */
 #define SCTP_SB_CLEAR(sb)	\
 	(sb).sb_cc = 0;		\
 	(sb).sb_mb = NULL;	\
 	(sb).sb_mbcnt = 0;
 
 #define SCTP_SB_LIMIT_RCV(so) so->so_rcv.sb_hiwat
 #define SCTP_SB_LIMIT_SND(so) so->so_snd.sb_hiwat
 
 /*
  * routes, output, etc.
  */
 typedef struct route sctp_route_t;
 typedef struct rtentry sctp_rtentry_t;
 
-#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL)
+#define SCTP_RTALLOC(ro, vrf_id) in_rtalloc_ign((struct route *)ro, 0UL, vrf_id)
 
 /* Future zero copy wakeup/send  function */
 #define SCTP_ZERO_COPY_EVENT(inp, so)
 /* This is re-pulse ourselves for sendbuf */
 #define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so)
 
 /*
  * IP output routines
  */
 #define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \
 { \
 	int o_flgs = 0; \
 	if (stcb && stcb->sctp_ep && stcb->sctp_ep->sctp_socket) { \
 		o_flgs = IP_RAWOUTPUT | (stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE); \
 	} else { \
 		o_flgs = IP_RAWOUTPUT; \
 	} \
 	result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \
 }
 
 #define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \
 { \
  	if (stcb && stcb->sctp_ep) \
 		result = ip6_output(o_pak, \
 				    ((struct in6pcb *)(stcb->sctp_ep))->in6p_outputopts, \
 				    (ro), 0, 0, ifp, NULL); \
 	else \
 		result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \
 }
 
 struct mbuf *
 sctp_get_mbuf_for_msg(unsigned int space_needed,
     int want_header, int how, int allonebuf, int type);
 
 
 /*
  * SCTP AUTH
  */
 #define HAVE_SHA2
 
 #define SCTP_READ_RANDOM(buf, len)	read_random(buf, len)
 
 #ifdef USE_SCTP_SHA1
 #include <netinet/sctp_sha1.h>
 #else
 #include <crypto/sha1.h>
 /* map standard crypto API names */
 #define SHA1_Init	SHA1Init
 #define SHA1_Update	SHA1Update
 #define SHA1_Final(x,y)	SHA1Final((caddr_t)x, y)
 #endif
 
 #if defined(HAVE_SHA2)
 #include <crypto/sha2/sha2.h>
 #endif
 
 #include <sys/md5.h>
 /* map standard crypto API names */
 #define MD5_Init	MD5Init
 #define MD5_Update	MD5Update
 #define MD5_Final	MD5Final
 
 #endif
Index: head/sys/netinet/tcp_input.c
===================================================================
--- head/sys/netinet/tcp_input.c	(revision 178887)
+++ head/sys/netinet/tcp_input.c	(revision 178888)
@@ -1,3033 +1,3034 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipfw.h"		/* for ipfw_fwd	*/
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #define TCPSTATES		/* for logging */
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/tcp_syncache.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 static const int tcprexmtthresh = 3;
 
 struct	tcpstat tcpstat;
 SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW,
     &tcpstat , tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
 
 int tcp_log_in_vain = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
     &tcp_log_in_vain, 0, "Log all incoming TCP segments to closed ports");
 
 static int blackhole = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
     &blackhole, 0, "Do not send RST on segments to closed ports");
 
 int tcp_delack_enabled = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW,
     &tcp_delack_enabled, 0,
     "Delay ACK to try and piggyback it onto a data packet");
 
 static int drop_synfin = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
     &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
 
 static int tcp_do_rfc3042 = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW,
     &tcp_do_rfc3042, 0, "Enable RFC 3042 (Limited Transmit)");
 
 static int tcp_do_rfc3390 = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
     &tcp_do_rfc3390, 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
 static int tcp_insecure_rst = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW,
     &tcp_insecure_rst, 0,
     "Follow the old (insecure) criteria for accepting RST packets");
 
 int	tcp_do_autorcvbuf = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
     &tcp_do_autorcvbuf, 0, "Enable automatic receive buffer sizing");
 
 int	tcp_autorcvbuf_inc = 16*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
     &tcp_autorcvbuf_inc, 0,
     "Incrementor step size of automatic receive buffer");
 
 int	tcp_autorcvbuf_max = 256*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
     &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
 
 struct inpcbhead tcb;
 #define	tcb6	tcb  /* for KAME src sync over BSD*'s */
 struct inpcbinfo tcbinfo;
 
 static void	 tcp_dooptions(struct tcpopt *, u_char *, int, int);
 static void	 tcp_do_segment(struct mbuf *, struct tcphdr *,
 		     struct socket *, struct tcpcb *, int, int);
 static void	 tcp_dropwithreset(struct mbuf *, struct tcphdr *,
 		     struct tcpcb *, int, int);
 static void	 tcp_pulloutofband(struct socket *,
 		     struct tcphdr *, struct mbuf *, int);
 static void	 tcp_xmit_timer(struct tcpcb *, int);
 static void	 tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
 
 /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
 #ifdef INET6
 #define ND6_HINT(tp) \
 do { \
 	if ((tp) && (tp)->t_inpcb && \
 	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \
 		nd6_nud_hint(NULL, NULL, 0); \
 } while (0)
 #else
 #define ND6_HINT(tp)
 #endif
 
 /*
  * Indicate whether this ack should be delayed.  We can delay the ack if
  *	- there is no delayed ack timer in progress and
  *	- our last ack wasn't a 0-sized window.  We never want to delay
  *	  the ack that opens up a 0-sized window and
  *		- delayed acks are enabled or
  *		- this is a half-synchronized T/TCP connection.
  */
 #define DELAY_ACK(tp)							\
 	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
 	    (tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 
 /*
  * TCP input handling is split into multiple parts:
  *   tcp6_input is a thin wrapper around tcp_input for the extended
  *	ip6_protox[] call format in ip6_input
  *   tcp_input handles primary segment validation, inpcb lookup and
  *	SYN processing on listen sockets
  *   tcp_do_segment processes the ACK and text of the segment for
  *	establishing, established and closing connections
  */
 #ifdef INET6
 int
 tcp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct in6_ifaddr *ia6;
 
 	IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
 
 	/*
 	 * draft-itojun-ipv6-tcp-to-anycast
 	 * better place to put this in?
 	 */
 	ia6 = ip6_getdstifaddr(m);
 	if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
 		struct ip6_hdr *ip6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
 			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
 		return IPPROTO_DONE;
 	}
 
 	tcp_input(m, *offp);
 	return IPPROTO_DONE;
 }
 #endif
 
 void
 tcp_input(struct mbuf *m, int off0)
 {
 	struct tcphdr *th;
 	struct ip *ip = NULL;
 	struct ipovly *ipov;
 	struct inpcb *inp = NULL;
 	struct tcpcb *tp = NULL;
 	struct socket *so = NULL;
 	u_char *optp = NULL;
 	int optlen = 0;
 	int len, tlen, off;
 	int drop_hdrlen;
 	int thflags;
 	int rstreason = 0;	/* For badport_bandlim accounting purposes */
 #ifdef IPFIREWALL_FORWARD
 	struct m_tag *fwd_tag;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 #else
 	const void *ip6 = NULL;
 	const int isipv6 = 0;
 #endif
 	struct tcpopt to;		/* options in this segment */
 	char *s = NULL;			/* address and port logging */
 
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 
 #ifdef INET6
 	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
 #endif
 
 	to.to_flags = 0;
 	tcpstat.tcps_rcvtotal++;
 
 	if (isipv6) {
 #ifdef INET6
 		/* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
 		ip6 = mtod(m, struct ip6_hdr *);
 		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
 		if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
 			tcpstat.tcps_rcvbadsum++;
 			goto drop;
 		}
 		th = (struct tcphdr *)((caddr_t)ip6 + off0);
 
 		/*
 		 * Be proactive about unspecified IPv6 address in source.
 		 * As we use all-zero to indicate unbounded/unconnected pcb,
 		 * unspecified IPv6 address can be used to confuse us.
 		 *
 		 * Note that packets with unspecified IPv6 destination is
 		 * already dropped in ip6_input.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 			/* XXX stat */
 			goto drop;
 		}
 #else
 		th = NULL;		/* XXX: Avoid compiler warning. */
 #endif
 	} else {
 		/*
 		 * Get IP and TCP header together in first mbuf.
 		 * Note: IP leaves IP header in first mbuf.
 		 */
 		if (off0 > sizeof (struct ip)) {
 			ip_stripoptions(m, (struct mbuf *)0);
 			off0 = sizeof(struct ip);
 		}
 		if (m->m_len < sizeof (struct tcpiphdr)) {
 			if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
 			    == NULL) {
 				tcpstat.tcps_rcvshort++;
 				return;
 			}
 		}
 		ip = mtod(m, struct ip *);
 		ipov = (struct ipovly *)ip;
 		th = (struct tcphdr *)((caddr_t)ip + off0);
 		tlen = ip->ip_len;
 
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
 			else
 				th->th_sum = in_pseudo(ip->ip_src.s_addr,
 						ip->ip_dst.s_addr,
 						htonl(m->m_pkthdr.csum_data +
 							ip->ip_len +
 							IPPROTO_TCP));
 			th->th_sum ^= 0xffff;
 #ifdef TCPDEBUG
 			ipov->ih_len = (u_short)tlen;
 			ipov->ih_len = htons(ipov->ih_len);
 #endif
 		} else {
 			/*
 			 * Checksum extended TCP header and data.
 			 */
 			len = sizeof (struct ip) + tlen;
 			bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
 			ipov->ih_len = (u_short)tlen;
 			ipov->ih_len = htons(ipov->ih_len);
 			th->th_sum = in_cksum(m, len);
 		}
 		if (th->th_sum) {
 			tcpstat.tcps_rcvbadsum++;
 			goto drop;
 		}
 		/* Re-initialization for later version check */
 		ip->ip_v = IPVERSION;
 	}
 
 	/*
 	 * Check that TCP offset makes sense,
 	 * pull out TCP options and adjust length.		XXX
 	 */
 	off = th->th_off << 2;
 	if (off < sizeof (struct tcphdr) || off > tlen) {
 		tcpstat.tcps_rcvbadoff++;
 		goto drop;
 	}
 	tlen -= off;	/* tlen is used instead of ti->ti_len */
 	if (off > sizeof (struct tcphdr)) {
 		if (isipv6) {
 #ifdef INET6
 			IP6_EXTHDR_CHECK(m, off0, off, );
 			ip6 = mtod(m, struct ip6_hdr *);
 			th = (struct tcphdr *)((caddr_t)ip6 + off0);
 #endif
 		} else {
 			if (m->m_len < sizeof(struct ip) + off) {
 				if ((m = m_pullup(m, sizeof (struct ip) + off))
 				    == NULL) {
 					tcpstat.tcps_rcvshort++;
 					return;
 				}
 				ip = mtod(m, struct ip *);
 				ipov = (struct ipovly *)ip;
 				th = (struct tcphdr *)((caddr_t)ip + off0);
 			}
 		}
 		optlen = off - sizeof (struct tcphdr);
 		optp = (u_char *)(th + 1);
 	}
 	thflags = th->th_flags;
 
 	/*
 	 * Convert TCP protocol specific fields to host format.
 	 */
 	th->th_seq = ntohl(th->th_seq);
 	th->th_ack = ntohl(th->th_ack);
 	th->th_win = ntohs(th->th_win);
 	th->th_urp = ntohs(th->th_urp);
 
 	/*
 	 * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options.
 	 */
 	drop_hdrlen = off0 + off;
 
 	/*
 	 * Locate pcb for segment.
 	 */
 	INP_INFO_WLOCK(&tcbinfo);
 findpcb:
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 #ifdef IPFIREWALL_FORWARD
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 
 	if (fwd_tag != NULL && isipv6 == 0) {	/* IPv6 support is not yet */
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag+1);
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * already got one like this?
 		 */
 		inp = in_pcblookup_hash(&tcbinfo,
 					ip->ip_src, th->th_sport,
 					ip->ip_dst, th->th_dport,
 					0, m->m_pkthdr.rcvif);
 		if (!inp) {
 			/* It's new.  Try to find the ambushing socket. */
 			inp = in_pcblookup_hash(&tcbinfo,
 						ip->ip_src, th->th_sport,
 						next_hop->sin_addr,
 						next_hop->sin_port ?
 						    ntohs(next_hop->sin_port) :
 						    th->th_dport,
 						INPLOOKUP_WILDCARD,
 						m->m_pkthdr.rcvif);
 		}
 		/* Remove the tag from the packet.  We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 	} else
 #endif /* IPFIREWALL_FORWARD */
 	{
 		if (isipv6) {
 #ifdef INET6
 			inp = in6_pcblookup_hash(&tcbinfo,
 						 &ip6->ip6_src, th->th_sport,
 						 &ip6->ip6_dst, th->th_dport,
 						 INPLOOKUP_WILDCARD,
 						 m->m_pkthdr.rcvif);
 #endif
 		} else
 			inp = in_pcblookup_hash(&tcbinfo,
 						ip->ip_src, th->th_sport,
 						ip->ip_dst, th->th_dport,
 						INPLOOKUP_WILDCARD,
 						m->m_pkthdr.rcvif);
 	}
 
 	/*
 	 * If the INPCB does not exist then all data in the incoming
 	 * segment is discarded and an appropriate RST is sent back.
+	 * XXX MRT Send RST using which routing table?
 	 */
 	if (inp == NULL) {
 		/*
 		 * Log communication attempts to ports that are not
 		 * in use.
 		 */
 		if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
 		    tcp_log_in_vain == 2) {
 			if ((s = tcp_log_addrs(NULL, th, (void *)ip, ip6)))
 				log(LOG_INFO, "%s; %s: Connection attempt "
 				    "to closed port\n", s, __func__);
 		}
 		/*
 		 * When blackholing do not respond with a RST but
 		 * completely ignore the segment and drop it.
 		 */
 		if ((blackhole == 1 && (thflags & TH_SYN)) ||
 		    blackhole == 2)
 			goto dropunlock;
 
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 	INP_WLOCK(inp);
 
 #ifdef IPSEC
 #ifdef INET6
 	if (isipv6 && ipsec6_in_reject(m, inp)) {
 		ipsec6stat.in_polvio++;
 		goto dropunlock;
 	} else
 #endif /* INET6 */
 	if (ipsec4_in_reject(m, inp) != 0) {
 		ipsec4stat.in_polvio++;
 		goto dropunlock;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Check the minimum TTL for socket.
 	 */
 	if (inp->inp_ip_minttl != 0) {
 #ifdef INET6
 		if (isipv6 && inp->inp_ip_minttl > ip6->ip6_hlim)
 			goto dropunlock;
 		else
 #endif
 		if (inp->inp_ip_minttl > ip->ip_ttl)
 			goto dropunlock;
 	}
 
 	/*
 	 * A previous connection in TIMEWAIT state is supposed to catch
 	 * stray or duplicate segments arriving late.  If this segment
 	 * was a legitimate new connection attempt the old INPCB gets
 	 * removed and we can try again to find a listening socket.
 	 */
 	if (inp->inp_vflag & INP_TIMEWAIT) {
 		if (thflags & TH_SYN)
 			tcp_dooptions(&to, optp, optlen, TO_SYN);
 		/*
 		 * NB: tcp_twcheck unlocks the INP and frees the mbuf.
 		 */
 		if (tcp_twcheck(inp, &to, th, m, tlen))
 			goto findpcb;
 		INP_INFO_WUNLOCK(&tcbinfo);
 		return;
 	}
 	/*
 	 * The TCPCB may no longer exist if the connection is winding
 	 * down or it is in the CLOSED state.  Either way we drop the
 	 * segment and send an appropriate response.
 	 */
 	tp = intotcpcb(inp);
 	if (tp == NULL || tp->t_state == TCPS_CLOSED) {
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 
 #ifdef MAC
 	INP_WLOCK_ASSERT(inp);
 	if (mac_inpcb_check_deliver(inp, m))
 		goto dropunlock;
 #endif
 	so = inp->inp_socket;
 	KASSERT(so != NULL, ("%s: so == NULL", __func__));
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG) {
 		ostate = tp->t_state;
 		if (isipv6) {
 #ifdef INET6
 			bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
 #endif
 		} else
 			bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
 		tcp_savetcp = *th;
 	}
 #endif
 	/*
 	 * When the socket is accepting connections (the INPCB is in LISTEN
 	 * state) we look into the SYN cache if this is a new connection
 	 * attempt or the completion of a previous one.
 	 */
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct in_conninfo inc;
 
 		KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
 		    "tp not listening", __func__));
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_isipv6 = isipv6;
 #ifdef INET6
 		if (isipv6) {
 			inc.inc6_faddr = ip6->ip6_src;
 			inc.inc6_laddr = ip6->ip6_dst;
 		} else
 #endif
 		{
 			inc.inc_faddr = ip->ip_src;
 			inc.inc_laddr = ip->ip_dst;
 		}
 		inc.inc_fport = th->th_sport;
 		inc.inc_lport = th->th_dport;
 
 		/*
 		 * Check for an existing connection attempt in syncache if
 		 * the flag is only ACK.  A successful lookup creates a new
 		 * socket appended to the listen queue in SYN_RECEIVED state.
 		 */
 		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
 			/*
 			 * Parse the TCP options here because
 			 * syncookies need access to the reflected
 			 * timestamp.
 			 */
 			tcp_dooptions(&to, optp, optlen, 0);
 			/*
 			 * NB: syncache_expand() doesn't unlock
 			 * inp and tcpinfo locks.
 			 */
 			if (!syncache_expand(&inc, &to, th, &so, m)) {
 				/*
 				 * No syncache entry or ACK was not
 				 * for our SYN/ACK.  Send a RST.
 				 * NB: syncache did its own logging
 				 * of the failure cause.
 				 */
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 			if (so == NULL) {
 				/*
 				 * We completed the 3-way handshake
 				 * but could not allocate a socket
 				 * either due to memory shortage,
 				 * listen queue length limits or
 				 * global socket limits.  Send RST
 				 * or wait and have the remote end
 				 * retransmit the ACK for another
 				 * try.
 				 */
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 					log(LOG_DEBUG, "%s; %s: Listen socket: "
 					    "Socket allocation failed due to "
 					    "limits or memory shortage, %s\n",
 					    s, __func__, (tcp_sc_rst_sock_fail ?
 					    "sending RST" : "try again"));
 				if (tcp_sc_rst_sock_fail) {
 					rstreason = BANDLIM_UNLIMITED;
 					goto dropwithreset;
 				} else
 					goto dropunlock;
 			}
 			/*
 			 * Socket is created in state SYN_RECEIVED.
 			 * Unlock the listen socket, lock the newly
 			 * created socket and update the tp variable.
 			 */
 			INP_WUNLOCK(inp);	/* listen socket */
 			inp = sotoinpcb(so);
 			INP_WLOCK(inp);		/* new connection */
 			tp = intotcpcb(inp);
 			KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
 			    ("%s: ", __func__));
 			/*
 			 * Process the segment and the data it
 			 * contains.  tcp_do_segment() consumes
 			 * the mbuf chain and unlocks the inpcb.
 			 */
 			tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen);
 			INP_INFO_UNLOCK_ASSERT(&tcbinfo);
 			return;
 		}
 		/*
 		 * Segment flag validation for new connection attempts:
 		 *
 		 * Our (SYN|ACK) response was rejected.
 		 * Check with syncache and remove entry to prevent
 		 * retransmits.
 		 *
 		 * NB: syncache_chkrst does its own logging of failure
 		 * causes.
 		 */
 		if (thflags & TH_RST) {
 			syncache_chkrst(&inc, th);
 			goto dropunlock;
 		}
 		/*
 		 * We can't do anything without SYN.
 		 */
 		if ((thflags & TH_SYN) == 0) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN is missing, segment ignored\n",
 				    s, __func__);
 			tcpstat.tcps_badsyn++;
 			goto dropunlock;
 		}
 		/*
 		 * (SYN|ACK) is bogus on a listen socket.
 		 */
 		if (thflags & TH_ACK) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN|ACK invalid, segment rejected\n",
 				    s, __func__);
 			syncache_badack(&inc);	/* XXX: Not needed! */
 			tcpstat.tcps_badsyn++;
 			rstreason = BANDLIM_RST_OPENPORT;
 			goto dropwithreset;
 		}
 		/*
 		 * If the drop_synfin option is enabled, drop all
 		 * segments with both the SYN and FIN bits set.
 		 * This prevents e.g. nmap from identifying the
 		 * TCP/IP stack.
 		 * XXX: Poor reasoning.  nmap has other methods
 		 * and is constantly refining its stack detection
 		 * strategies.
 		 * XXX: This is a violation of the TCP specification
 		 * and was used by RFC1644.
 		 */
 		if ((thflags & TH_FIN) && drop_synfin) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN|FIN segment ignored (based on "
 				    "sysctl setting)\n", s, __func__);
 			tcpstat.tcps_badsyn++;
                 	goto dropunlock;
 		}
 		/*
 		 * Segment's flags are (SYN) or (SYN|FIN).
 		 *
 		 * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored
 		 * as they do not affect the state of the TCP FSM.
 		 * The data pointed to by TH_URG and th_urp is ignored.
 		 */
 		KASSERT((thflags & (TH_RST|TH_ACK)) == 0,
 		    ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
 		KASSERT(thflags & (TH_SYN),
 		    ("%s: Listen socket: TH_SYN not set", __func__));
 #ifdef INET6
 		/*
 		 * If deprecated address is forbidden,
 		 * we do not accept SYN to deprecated interface
 		 * address to prevent any new inbound connection from
 		 * getting established.
 		 * When we do not accept SYN, we send a TCP RST,
 		 * with deprecated source address (instead of dropping
 		 * it).  We compromise it as it is much better for peer
 		 * to send a RST, and RST will be the final packet
 		 * for the exchange.
 		 *
 		 * If we do not forbid deprecated addresses, we accept
 		 * the SYN packet.  RFC2462 does not suggest dropping
 		 * SYN in this case.
 		 * If we decipher RFC2462 5.5.4, it says like this:
 		 * 1. use of deprecated addr with existing
 		 *    communication is okay - "SHOULD continue to be
 		 *    used"
 		 * 2. use of it with new communication:
 		 *   (2a) "SHOULD NOT be used if alternate address
 		 *        with sufficient scope is available"
 		 *   (2b) nothing mentioned otherwise.
 		 * Here we fall into (2b) case as we have no choice in
 		 * our source address selection - we must obey the peer.
 		 *
 		 * The wording in RFC2462 is confusing, and there are
 		 * multiple description text for deprecated address
 		 * handling - worse, they are not exactly the same.
 		 * I believe 5.5.4 is the best one, so we follow 5.5.4.
 		 */
 		if (isipv6 && !ip6_use_deprecated) {
 			struct in6_ifaddr *ia6;
 
 			if ((ia6 = ip6_getdstifaddr(m)) &&
 			    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt to deprecated "
 					"IPv6 address rejected\n",
 					s, __func__);
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 		}
 #endif
 		/*
 		 * Basic sanity checks on incoming SYN requests:
 		 *   Don't respond if the destination is a link layer
 		 *	broadcast according to RFC1122 4.2.3.10, p. 104.
 		 *   If it is from this socket it must be forged.
 		 *   Don't respond if the source or destination is a
 		 *	global or subnet broad- or multicast address.
 		 *   Note that it is quite possible to receive unicast
 		 *	link-layer packets with a broadcast IP address. Use
 		 *	in_broadcast() to find them.
 		 */
 		if (m->m_flags & (M_BCAST|M_MCAST)) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 			    log(LOG_DEBUG, "%s; %s: Listen socket: "
 				"Connection attempt from broad- or multicast "
 				"link layer address ignored\n", s, __func__);
 			goto dropunlock;
 		}
 		if (isipv6) {
 #ifdef INET6
 			if (th->th_dport == th->th_sport &&
 			    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt to/from self "
 					"ignored\n", s, __func__);
 				goto dropunlock;
 			}
 			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to multicast "
 					"address ignored\n", s, __func__);
 				goto dropunlock;
 			}
 #endif
 		} else {
 			if (th->th_dport == th->th_sport &&
 			    ip->ip_dst.s_addr == ip->ip_src.s_addr) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to self "
 					"ignored\n", s, __func__);
 				goto dropunlock;
 			}
 			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 			    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 			    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 			    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to broad- "
 					"or multicast address ignored\n",
 					s, __func__);
 				goto dropunlock;
 			}
 		}
 		/*
 		 * SYN appears to be valid.  Create compressed TCP state
 		 * for syncache.
 		 */
 #ifdef TCPDEBUG
 		if (so->so_options & SO_DEBUG)
 			tcp_trace(TA_INPUT, ostate, tp,
 			    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 		tcp_dooptions(&to, optp, optlen, TO_SYN);
 		syncache_add(&inc, &to, th, inp, &so, m);
 		/*
 		 * Entry added to syncache and mbuf consumed.
 		 * Everything already unlocked by syncache_add().
 		 */
 		INP_INFO_UNLOCK_ASSERT(&tcbinfo);
 		return;
 	}
 
 	/*
 	 * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
 	 * state.  tcp_do_segment() always consumes the mbuf chain, unlocks
 	 * the inpcb, and unlocks pcbinfo.
 	 */
 	tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen);
 	INP_INFO_UNLOCK_ASSERT(&tcbinfo);
 	return;
 
 dropwithreset:
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	tcp_dropwithreset(m, th, tp, tlen, rstreason);
 	m = NULL;	/* mbuf chain got consumed. */
 dropunlock:
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&tcbinfo);
 drop:
 	INP_INFO_UNLOCK_ASSERT(&tcbinfo);
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	if (m != NULL)
 		m_freem(m);
 	return;
 }
 
 static void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen)
 {
 	int thflags, acked, ourfinisacked, needoutput = 0;
 	int headlocked = 1;
 	int rstreason, todrop, win;
 	u_long tiwin;
 	struct tcpopt to;
 
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 	thflags = th->th_flags;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 	    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 	    __func__));
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 * XXX: This should be done after segment
 	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
 		tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
 
 	/*
 	 * Unscale the window into a 32-bit value.
 	 * For the SYN_SENT state the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 	    (th->th_off << 2) - sizeof(struct tcphdr),
 	    (thflags & TH_SYN) ? TO_SYN : 0);
 
 	/*
 	 * If echoed timestamp is later than the current time,
 	 * fall back to non RFC1323 RTT calculation.  Normalize
 	 * timestamp if syncookies were used when this connection
 	 * was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, ticks))
 			to.to_tsecr = 0;
 	}
 
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE)) {
 			tp->t_flags |= TF_RCVD_SCALE;
 			tp->snd_scale = to.to_wscale;
 		}
 		/*
 		 * Initial send window.  It will be updated with
 		 * the next incoming segment to the scaled value.
 		 */
 		tp->snd_wnd = th->th_win;
 		if (to.to_flags & TOF_TS) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
 			tp->ts_recent_age = ticks;
 		}
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (to.to_flags & TOF_SACKPERM) == 0)
 			tp->t_flags &= ~TF_SACK_PERMIT;
 	}
 
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
 	 * change and we're not retransmitting, it's a
 	 * candidate.  If the length is zero and the ack moved
 	 * forward, we're the sender side of the xfer.  Just
 	 * free the data acked & wake any higher level process
 	 * that was blocked waiting for space.  If the length
 	 * is non-zero and the ack didn't move, we're the
 	 * receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
 	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    th->th_seq == tp->rcv_nxt &&
 	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
 	    tp->snd_nxt == tp->snd_max &&
 	    tiwin && tiwin == tp->snd_wnd && 
 	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
 	    LIST_EMPTY(&tp->t_segq) &&
 	    ((to.to_flags & TOF_TS) == 0 ||
 	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
 
 		/*
 		 * If last ACK falls within this segment's sequence numbers,
 		 * record the timestamp.
 		 * NOTE that the test is modified according to the latest
 		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 		 */
 		if ((to.to_flags & TOF_TS) != 0 &&
 		    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 			tp->ts_recent_age = ticks;
 			tp->ts_recent = to.to_tsval;
 		}
 
 		if (tlen == 0) {
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
 			    tp->snd_cwnd >= tp->snd_wnd &&
 			    ((!tcp_do_newreno &&
 			      !(tp->t_flags & TF_SACK_PERMIT) &&
 			      tp->t_dupacks < tcprexmtthresh) ||
 			     ((tcp_do_newreno ||
 			       (tp->t_flags & TF_SACK_PERMIT)) &&
 			      !IN_FASTRECOVERY(tp) &&
 			      (to.to_flags & TOF_SACK) == 0 &&
 			      TAILQ_EMPTY(&tp->snd_holes)))) {
 				KASSERT(headlocked,
 				    ("%s: headlocked", __func__));
 				INP_INFO_WUNLOCK(&tcbinfo);
 				headlocked = 0;
 				/*
 				 * This is a pure ack for outstanding data.
 				 */
 				++tcpstat.tcps_predack;
 				/*
 				 * "bad retransmit" recovery.
 				 */
 				if (tp->t_rxtshift == 1 &&
 				    ticks < tp->t_badrxtwin) {
 					++tcpstat.tcps_sndrexmitbad;
 					tp->snd_cwnd = tp->snd_cwnd_prev;
 					tp->snd_ssthresh =
 					    tp->snd_ssthresh_prev;
 					tp->snd_recover = tp->snd_recover_prev;
 					if (tp->t_flags & TF_WASFRECOVERY)
 					    ENTER_FASTRECOVERY(tp);
 					tp->snd_nxt = tp->snd_max;
 					tp->t_badrxtwin = 0;
 				}
 
 				/*
 				 * Recalculate the transmit timer / rtt.
 				 *
 				 * Some boxes send broken timestamp replies
 				 * during the SYN+ACK phase, ignore
 				 * timestamps of 0 or we could calculate a
 				 * huge RTT and blow up the retransmit timer.
 				 */
 				if ((to.to_flags & TOF_TS) != 0 &&
 				    to.to_tsecr) {
 					if (!tp->t_rttlow ||
 					    tp->t_rttlow > ticks - to.to_tsecr)
 						tp->t_rttlow = ticks - to.to_tsecr;
 					tcp_xmit_timer(tp,
 					    ticks - to.to_tsecr + 1);
 				} else if (tp->t_rtttime &&
 				    SEQ_GT(th->th_ack, tp->t_rtseq)) {
 					if (!tp->t_rttlow ||
 					    tp->t_rttlow > ticks - tp->t_rtttime)
 						tp->t_rttlow = ticks - tp->t_rtttime;
 					tcp_xmit_timer(tp,
 							ticks - tp->t_rtttime);
 				}
 				tcp_xmit_bandwidth_limit(tp, th->th_ack);
 				acked = th->th_ack - tp->snd_una;
 				tcpstat.tcps_rcvackpack++;
 				tcpstat.tcps_rcvackbyte += acked;
 				sbdrop(&so->so_snd, acked);
 				if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 				    SEQ_LEQ(th->th_ack, tp->snd_recover))
 					tp->snd_recover = th->th_ack - 1;
 				tp->snd_una = th->th_ack;
 				/*
 				 * Pull snd_wl2 up to prevent seq wrap relative
 				 * to th_ack.
 				 */
 				tp->snd_wl2 = th->th_ack;
 				tp->t_dupacks = 0;
 				m_freem(m);
 				ND6_HINT(tp); /* Some progress has been made. */
 
 				/*
 				 * If all outstanding data are acked, stop
 				 * retransmit timer, otherwise restart timer
 				 * using current (possibly backed-off) value.
 				 * If process is waiting for space,
 				 * wakeup/selwakeup/signal.  If data
 				 * are ready to send, let tcp_output
 				 * decide between more output or persist.
 				 */
 #ifdef TCPDEBUG
 				if (so->so_options & SO_DEBUG)
 					tcp_trace(TA_INPUT, ostate, tp,
 					    (void *)tcp_saveipgen,
 					    &tcp_savetcp, 0);
 #endif
 				if (tp->snd_una == tp->snd_max)
 					tcp_timer_activate(tp, TT_REXMT, 0);
 				else if (!tcp_timer_active(tp, TT_PERSIST))
 					tcp_timer_activate(tp, TT_REXMT,
 						      tp->t_rxtcur);
 				sowwakeup(so);
 				if (so->so_snd.sb_cc)
 					(void) tcp_output(tp);
 				goto check_delack;
 			}
 		} else if (th->th_ack == tp->snd_una &&
 		    tlen <= sbspace(&so->so_rcv)) {
 			int newsize = 0;	/* automatic sockbuf scaling */
 
 			KASSERT(headlocked, ("%s: headlocked", __func__));
 			INP_INFO_WUNLOCK(&tcbinfo);
 			headlocked = 0;
 			/*
 			 * This is a pure, in-sequence data packet
 			 * with nothing on the reassembly queue and
 			 * we have enough buffer space to take it.
 			 */
 			/* Clean receiver SACK report if present */
 			if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
 				tcp_clean_sackreport(tp);
 			++tcpstat.tcps_preddat;
 			tp->rcv_nxt += tlen;
 			/*
 			 * Pull snd_wl1 up to prevent seq wrap relative to
 			 * th_seq.
 			 */
 			tp->snd_wl1 = th->th_seq;
 			/*
 			 * Pull rcv_up up to prevent seq wrap relative to
 			 * rcv_nxt.
 			 */
 			tp->rcv_up = tp->rcv_nxt;
 			tcpstat.tcps_rcvpack++;
 			tcpstat.tcps_rcvbyte += tlen;
 			ND6_HINT(tp);	/* Some progress has been made */
 #ifdef TCPDEBUG
 			if (so->so_options & SO_DEBUG)
 				tcp_trace(TA_INPUT, ostate, tp,
 				    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 		/*
 		 * Automatic sizing of receive socket buffer.  Often the send
 		 * buffer size is not optimally adjusted to the actual network
 		 * conditions at hand (delay bandwidth product).  Setting the
 		 * buffer size too small limits throughput on links with high
 		 * bandwidth and high delay (eg. trans-continental/oceanic links).
 		 *
 		 * On the receive side the socket buffer memory is only rarely
 		 * used to any significant extent.  This allows us to be much
 		 * more aggressive in scaling the receive socket buffer.  For
 		 * the case that the buffer space is actually used to a large
 		 * extent and we run out of kernel memory we can simply drop
 		 * the new segments; TCP on the sender will just retransmit it
 		 * later.  Setting the buffer size too big may only consume too
 		 * much kernel memory if the application doesn't read() from
 		 * the socket or packet loss or reordering makes use of the
 		 * reassembly queue.
 		 *
 		 * The criteria to step up the receive buffer one notch are:
 		 *  1. the number of bytes received during the time it takes
 		 *     one timestamp to be reflected back to us (the RTT);
 		 *  2. received bytes per RTT is within seven eighth of the
 		 *     current socket buffer size;
 		 *  3. receive buffer size has not hit maximal automatic size;
 		 *
 		 * This algorithm does one step per RTT at most and only if
 		 * we receive a bulk stream w/o packet losses or reorderings.
 		 * Shrinking the buffer during idle times is not necessary as
 		 * it doesn't consume any memory when idle.
 		 *
 		 * TODO: Only step up if the application is actually serving
 		 * the buffer to better manage the socket buffer resources.
 		 */
 			if (tcp_do_autorcvbuf &&
 			    to.to_tsecr &&
 			    (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
 				if (to.to_tsecr > tp->rfbuf_ts &&
 				    to.to_tsecr - tp->rfbuf_ts < hz) {
 					if (tp->rfbuf_cnt >
 					    (so->so_rcv.sb_hiwat / 8 * 7) &&
 					    so->so_rcv.sb_hiwat <
 					    tcp_autorcvbuf_max) {
 						newsize =
 						    min(so->so_rcv.sb_hiwat +
 						    tcp_autorcvbuf_inc,
 						    tcp_autorcvbuf_max);
 					}
 					/* Start over with next RTT. */
 					tp->rfbuf_ts = 0;
 					tp->rfbuf_cnt = 0;
 				} else
 					tp->rfbuf_cnt += tlen;	/* add up */
 			}
 
 			/* Add data to socket buffer. */
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				m_freem(m);
 			} else {
 				/*
 				 * Set new socket buffer size.
 				 * Give up when limit is reached.
 				 */
 				if (newsize)
 					if (!sbreserve_locked(&so->so_rcv,
 					    newsize, so, curthread))
 						so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
 				sbappendstream_locked(&so->so_rcv, m);
 			}
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 			if (DELAY_ACK(tp)) {
 				tp->t_flags |= TF_DELACK;
 			} else {
 				tp->t_flags |= TF_ACKNOW;
 				tcp_output(tp);
 			}
 			goto check_delack;
 		}
 	}
 
 	/*
 	 * Calculate amount of space in receive window,
 	 * and then do TCP input processing.
 	 * Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
 	win = sbspace(&so->so_rcv);
 	if (win < 0)
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 
 	/* Reset receive buffer auto scaling when not in bulk receive mode. */
 	tp->rfbuf_ts = 0;
 	tp->rfbuf_cnt = 0;
 
 	switch (tp->t_state) {
 
 	/*
 	 * If the state is SYN_RECEIVED:
 	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
 	 */
 	case TCPS_SYN_RECEIVED:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 		}
 		break;
 
 	/*
 	 * If the state is SYN_SENT:
 	 *	if seg contains an ACK, but not for our SYN, drop the input.
 	 *	if seg contains a RST, then drop the connection.
 	 *	if seg does not contain SYN, then drop it.
 	 * Otherwise this is an acceptable SYN segment
 	 *	initialize tp->rcv_nxt and tp->irs
 	 *	if seg contains ack then advance tp->snd_una
 	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
 	 *	arrange for segment to be acked (eventually)
 	 *	continue processing rest of data/controls, beginning with URG
 	 */
 	case TCPS_SYN_SENT:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->iss) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 			rstreason = BANDLIM_UNLIMITED;
 			goto dropwithreset;
 		}
 		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST))
 			tp = tcp_drop(tp, ECONNREFUSED);
 		if (thflags & TH_RST)
 			goto drop;
 		if (!(thflags & TH_SYN))
 			goto drop;
 
 		tp->irs = th->th_seq;
 		tcp_rcvseqinit(tp);
 		if (thflags & TH_ACK) {
 			tcpstat.tcps_connects++;
 			soisconnected(so);
 #ifdef MAC
 			SOCK_LOCK(so);
 			mac_socketpeer_set_from_mbuf(m, so);
 			SOCK_UNLOCK(so);
 #endif
 			/* Do window scaling on this connection? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			tp->rcv_adv += tp->rcv_wnd;
 			tp->snd_una++;		/* SYN is acked */
 			/*
 			 * If there's data, delay ACK; if there's also a FIN
 			 * ACKNOW will be turned on later.
 			 */
 			if (DELAY_ACK(tp) && tlen != 0)
 				tcp_timer_activate(tp, TT_DELACK,
 				    tcp_delacktime);
 			else
 				tp->t_flags |= TF_ACKNOW;
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
 			 * Transitions:
 			 *	SYN_SENT  --> ESTABLISHED
 			 *	SYN_SENT* --> FIN_WAIT_1
 			 */
 			tp->t_starttime = ticks;
 			if (tp->t_flags & TF_NEEDFIN) {
 				tp->t_state = TCPS_FIN_WAIT_1;
 				tp->t_flags &= ~TF_NEEDFIN;
 				thflags &= ~TH_SYN;
 			} else {
 				tp->t_state = TCPS_ESTABLISHED;
 				tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
 			}
 		} else {
 			/*
 			 * Received initial SYN in SYN-SENT[*] state =>
 			 * simultaneous open.  If segment contains CC option
 			 * and there is a cached CC, apply TAO test.
 			 * If it succeeds, connection is * half-synchronized.
 			 * Otherwise, do 3-way handshake:
 			 *        SYN-SENT -> SYN-RECEIVED
 			 *        SYN-SENT* -> SYN-RECEIVED*
 			 * If there was no CC option, clear cached CC value.
 			 */
 			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tp->t_state = TCPS_SYN_RECEIVED;
 		}
 
 		KASSERT(headlocked, ("%s: trimthenstep6: head not locked",
 		    __func__));
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Advance th->th_seq to correspond to first data byte.
 		 * If data, trim to stay within window,
 		 * dropping FIN if necessary.
 		 */
 		th->th_seq++;
 		if (tlen > tp->rcv_wnd) {
 			todrop = tlen - tp->rcv_wnd;
 			m_adj(m, -todrop);
 			tlen = tp->rcv_wnd;
 			thflags &= ~TH_FIN;
 			tcpstat.tcps_rcvpackafterwin++;
 			tcpstat.tcps_rcvbyteafterwin += todrop;
 		}
 		tp->snd_wl1 = th->th_seq - 1;
 		tp->rcv_up = th->th_seq;
 		/*
 		 * Client side of transaction: already sent SYN and data.
 		 * If the remote host used T/TCP to validate the SYN,
 		 * our data will be ACK'd; if so, enter normal data segment
 		 * processing in the middle of step 5, ack processing.
 		 * Otherwise, goto step 6.
 		 */
 		if (thflags & TH_ACK)
 			goto process_ACK;
 
 		goto step6;
 
 	/*
 	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
 	 *      do normal processing.
 	 *
 	 * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later.
 	 */
 	case TCPS_LAST_ACK:
 	case TCPS_CLOSING:
 		break;  /* continue normal processing */
 	}
 
 	/*
 	 * States other than LISTEN or SYN_SENT.
 	 * First check the RST flag and sequence number since reset segments
 	 * are exempt from the timestamp and connection count tests.  This
 	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
 	 * below which allowed reset segments in half the sequence space
 	 * to fall though and be processed (which gives forged reset
 	 * segments with a random sequence number a 50 percent chance of
 	 * killing a connection).
 	 * Then check timestamp, if present.
 	 * Then check the connection count, if present.
 	 * Then check that at least some bytes of segment are within
 	 * receive window.  If segment begins before rcv_nxt,
 	 * drop leading data (and SYN); if nothing left, just ack.
 	 *
 	 *
 	 * If the RST bit is set, check the sequence number to see
 	 * if this is a valid reset segment.
 	 * RFC 793 page 37:
 	 *   In all states except SYN-SENT, all reset (RST) segments
 	 *   are validated by checking their SEQ-fields.  A reset is
 	 *   valid if its sequence number is in the window.
 	 * Note: this does not take into account delayed ACKs, so
 	 *   we should test against last_ack_sent instead of rcv_nxt.
 	 *   The sequence number in the reset segment is normally an
 	 *   echo of our outgoing acknowlegement numbers, but some hosts
 	 *   send a reset with the sequence number at the rightmost edge
 	 *   of our receive window, and we have to handle this case.
 	 * Note 2: Paul Watson's paper "Slipping in the Window" has shown
 	 *   that brute force RST attacks are possible.  To combat this,
 	 *   we use a much stricter check while in the ESTABLISHED state,
 	 *   only accepting RSTs where the sequence number is equal to
 	 *   last_ack_sent.  In all other states (the states in which a
 	 *   RST is more likely), the more permissive check is used.
 	 * If we have multiple segments in flight, the intial reset
 	 * segment sequence numbers will be to the left of last_ack_sent,
 	 * but they will eventually catch up.
 	 * In any case, it never made sense to trim reset segments to
 	 * fit the receive window since RFC 1122 says:
 	 *   4.2.2.12  RST Segment: RFC-793 Section 3.4
 	 *
 	 *    A TCP SHOULD allow a received RST segment to include data.
 	 *
 	 *    DISCUSSION
 	 *         It has been suggested that a RST segment could contain
 	 *         ASCII text that encoded and explained the cause of the
 	 *         RST.  No standard has yet been established for such
 	 *         data.
 	 *
 	 * If the reset segment passes the sequence number test examine
 	 * the state:
 	 *    SYN_RECEIVED STATE:
 	 *	If passive open, return to LISTEN state.
 	 *	If active open, inform user that connection was refused.
 	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSE_WAIT STATES:
 	 *	Inform user that connection was reset, and close tcb.
 	 *    CLOSING, LAST_ACK STATES:
 	 *	Close the tcb.
 	 *    TIME_WAIT STATE:
 	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
 	 *      RFC 1337.
 	 */
 	if (thflags & TH_RST) {
 		if (SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
 		    SEQ_LEQ(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
 			switch (tp->t_state) {
 
 			case TCPS_SYN_RECEIVED:
 				so->so_error = ECONNREFUSED;
 				goto close;
 
 			case TCPS_ESTABLISHED:
 				if (tcp_insecure_rst == 0 &&
 				    !(SEQ_GEQ(th->th_seq, tp->rcv_nxt - 1) &&
 				    SEQ_LEQ(th->th_seq, tp->rcv_nxt + 1)) &&
 				    !(SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
 				    SEQ_LEQ(th->th_seq, tp->last_ack_sent + 1))) {
 					tcpstat.tcps_badrst++;
 					goto drop;
 				}
 				/* FALLTHROUGH */
 			case TCPS_FIN_WAIT_1:
 			case TCPS_FIN_WAIT_2:
 			case TCPS_CLOSE_WAIT:
 				so->so_error = ECONNRESET;
 			close:
 				tp->t_state = TCPS_CLOSED;
 				tcpstat.tcps_drops++;
 				KASSERT(headlocked, ("%s: trimthenstep6: "
 				    "tcp_close: head not locked", __func__));
 				tp = tcp_close(tp);
 				break;
 
 			case TCPS_CLOSING:
 			case TCPS_LAST_ACK:
 				KASSERT(headlocked, ("%s: trimthenstep6: "
 				    "tcp_close.2: head not locked", __func__));
 				tp = tcp_close(tp);
 				break;
 			}
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
 	 * and it's less than ts_recent, drop it.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
 
 		/* Check to see if ts_recent is over 24 days old.  */
 		if ((int)(ticks - tp->ts_recent_age) > TCP_PAWS_IDLE) {
 			/*
 			 * Invalidate ts_recent.  If this segment updates
 			 * ts_recent, the age will be reset later and ts_recent
 			 * will get a valid value.  If it does not, setting
 			 * ts_recent to zero will at least satisfy the
 			 * requirement that zero be placed in the timestamp
 			 * echo reply when ts_recent isn't valid.  The
 			 * age isn't reset until we get a valid ts_recent
 			 * because we don't want out-of-order segments to be
 			 * dropped when ts_recent is old.
 			 */
 			tp->ts_recent = 0;
 		} else {
 			tcpstat.tcps_rcvduppack++;
 			tcpstat.tcps_rcvdupbyte += tlen;
 			tcpstat.tcps_pawsdrop++;
 			if (tlen)
 				goto dropafterack;
 			goto drop;
 		}
 	}
 
 	/*
 	 * In the SYN-RECEIVED state, validate that the packet belongs to
 	 * this connection before trimming the data to fit the receive
 	 * window.  Check the sequence number versus IRS since we know
 	 * the sequence numbers haven't wrapped.  This is a partial fix
 	 * for the "LAND" DoS attack.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 
 	todrop = tp->rcv_nxt - th->th_seq;
 	if (todrop > 0) {
 		if (thflags & TH_SYN) {
 			thflags &= ~TH_SYN;
 			th->th_seq++;
 			if (th->th_urp > 1)
 				th->th_urp--;
 			else
 				thflags &= ~TH_URG;
 			todrop--;
 		}
 		/*
 		 * Following if statement from Stevens, vol. 2, p. 960.
 		 */
 		if (todrop > tlen
 		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
 			/*
 			 * Any valid FIN must be to the left of the window.
 			 * At this point the FIN must be a duplicate or out
 			 * of sequence; drop it.
 			 */
 			thflags &= ~TH_FIN;
 
 			/*
 			 * Send an ACK to resynchronize and drop any data.
 			 * But keep on processing for RST or ACK.
 			 */
 			tp->t_flags |= TF_ACKNOW;
 			todrop = tlen;
 			tcpstat.tcps_rcvduppack++;
 			tcpstat.tcps_rcvdupbyte += todrop;
 		} else {
 			tcpstat.tcps_rcvpartduppack++;
 			tcpstat.tcps_rcvpartdupbyte += todrop;
 		}
 		drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
 		if (th->th_urp > todrop)
 			th->th_urp -= todrop;
 		else {
 			thflags &= ~TH_URG;
 			th->th_urp = 0;
 		}
 	}
 
 	/*
 	 * If new data are received on a connection after the
 	 * user processes are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
 		char *s;
 
 		KASSERT(headlocked, ("%s: trimthenstep6: tcp_close.3: head "
 		    "not locked", __func__));
 		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket "
 			    "was closed, sending RST and removing tcpcb\n",
 			    s, __func__, tcpstates[tp->t_state], tlen);
 			free(s, M_TCPLOG);
 		}
 		tp = tcp_close(tp);
 		tcpstat.tcps_rcvafterclose++;
 		rstreason = BANDLIM_UNLIMITED;
 		goto dropwithreset;
 	}
 
 	/*
 	 * If segment ends after window, drop trailing data
 	 * (and PUSH and FIN); if nothing left, just ACK.
 	 */
 	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
 	if (todrop > 0) {
 		tcpstat.tcps_rcvpackafterwin++;
 		if (todrop >= tlen) {
 			tcpstat.tcps_rcvbyteafterwin += tlen;
 			/*
 			 * If window is closed can only take segments at
 			 * window edge, and have to drop data and PUSH from
 			 * incoming segments.  Continue processing, but
 			 * remember to ack.  Otherwise, drop segment
 			 * and ack.
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
 				tp->t_flags |= TF_ACKNOW;
 				tcpstat.tcps_rcvwinprobe++;
 			} else
 				goto dropafterack;
 		} else
 			tcpstat.tcps_rcvbyteafterwin += todrop;
 		m_adj(m, -todrop);
 		tlen -= todrop;
 		thflags &= ~(TH_PUSH|TH_FIN);
 	}
 
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record its timestamp.
 	 * NOTE: 
 	 * 1) That the test incorporates suggestions from the latest
 	 *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 * 2) That updating only on newer timestamps interferes with
 	 *    our earlier PAWS tests, so this check should be solely
 	 *    predicated on the sequence space of this segment.
 	 * 3) That we modify the segment boundary check to be 
 	 *        Last.ACK.Sent <= SEG.SEQ + SEG.Len  
 	 *    instead of RFC1323's
 	 *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
 	 *    This modified check allows us to overcome RFC1323's
 	 *    limitations as described in Stevens TCP/IP Illustrated
 	 *    Vol. 2 p.869. In such cases, we can still calculate the
 	 *    RTT correctly when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 		((thflags & (TH_SYN|TH_FIN)) != 0))) {
 		tp->ts_recent_age = ticks;
 		tp->ts_recent = to.to_tsval;
 	}
 
 	/*
 	 * If a SYN is in the window, then this is an
 	 * error and we send an RST and drop the connection.
 	 */
 	if (thflags & TH_SYN) {
 		KASSERT(headlocked, ("%s: tcp_drop: trimthenstep6: "
 		    "head not locked", __func__));
 		tp = tcp_drop(tp, ECONNRESET);
 		rstreason = BANDLIM_UNLIMITED;
 		goto drop;
 	}
 
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
 	 * flag is on (half-synchronized state), then queue data for
 	 * later processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_state == TCPS_SYN_RECEIVED ||
 		    (tp->t_flags & TF_NEEDSYN))
 			goto step6;
 		else if (tp->t_flags & TF_ACKNOW)
 			goto dropafterack;
 		else
 			goto drop;
 	}
 
 	/*
 	 * Ack processing.
 	 */
 	switch (tp->t_state) {
 
 	/*
 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
 	 * ESTABLISHED state and continue processing.
 	 * The ACK was checked above.
 	 */
 	case TCPS_SYN_RECEIVED:
 
 		tcpstat.tcps_connects++;
 		soisconnected(so);
 		/* Do window scaling? */
 		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 			tp->rcv_scale = tp->request_r_scale;
 			tp->snd_wnd = tiwin;
 		}
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
 		 *      SYN-RECEIVED* -> FIN-WAIT-1
 		 */
 		tp->t_starttime = ticks;
 		if (tp->t_flags & TF_NEEDFIN) {
 			tp->t_state = TCPS_FIN_WAIT_1;
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
 			tp->t_state = TCPS_ESTABLISHED;
 			tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
 		}
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
 		 * later; if not, do so now to pass queued data to user.
 		 */
 		if (tlen == 0 && (thflags & TH_FIN) == 0)
 			(void) tcp_reass(tp, (struct tcphdr *)0, 0,
 			    (struct mbuf *)0);
 		tp->snd_wl1 = th->th_seq - 1;
 		/* FALLTHROUGH */
 
 	/*
 	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
 	 * ACKs.  If the ack is in the range
 	 *	tp->snd_una < th->th_ack <= tp->snd_max
 	 * then advance tp->snd_una to th->th_ack and drop
 	 * data from the retransmission queue.  If this ACK reflects
 	 * more up to date window information we update our window information.
 	 */
 	case TCPS_ESTABLISHED:
 	case TCPS_FIN_WAIT_1:
 	case TCPS_FIN_WAIT_2:
 	case TCPS_CLOSE_WAIT:
 	case TCPS_CLOSING:
 	case TCPS_LAST_ACK:
 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
 			tcpstat.tcps_rcvacktoomuch++;
 			goto dropafterack;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    ((to.to_flags & TOF_SACK) ||
 		     !TAILQ_EMPTY(&tp->snd_holes)))
 			tcp_sack_doack(tp, &to, th->th_ack);
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
 			if (tlen == 0 && tiwin == tp->snd_wnd) {
 				tcpstat.tcps_rcvdupack++;
 				/*
 				 * If we have outstanding data (other than
 				 * a window probe), this is a completely
 				 * duplicate ack (ie, window info didn't
 				 * change), the ack is the biggest we've
 				 * seen and we've seen exactly our rexmt
 				 * threshhold of them, assume a packet
 				 * has been dropped and retransmit it.
 				 * Kludge snd_nxt & the congestion
 				 * window so we send only this one
 				 * packet.
 				 *
 				 * We know we're losing at the current
 				 * window size so do congestion avoidance
 				 * (set ssthresh to half the current window
 				 * and pull our congestion window back to
 				 * the new ssthresh).
 				 *
 				 * Dup acks mean that packets have left the
 				 * network (they're now cached at the receiver)
 				 * so bump cwnd by the amount in the receiver
 				 * to keep a constant cwnd packets in the
 				 * network.
 				 */
 				if (!tcp_timer_active(tp, TT_REXMT) ||
 				    th->th_ack != tp->snd_una)
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
 				    ((tcp_do_newreno ||
 				      (tp->t_flags & TF_SACK_PERMIT)) &&
 				     IN_FASTRECOVERY(tp))) {
 					if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    IN_FASTRECOVERY(tp)) {
 						int awnd;
 						
 						/*
 						 * Compute the amount of data in flight first.
 						 * We can inject new data into the pipe iff 
 						 * we have less than 1/2 the original window's 	
 						 * worth of data in flight.
 						 */
 						awnd = (tp->snd_nxt - tp->snd_fack) +
 							tp->sackhint.sack_bytes_rexmit;
 						if (awnd < tp->snd_ssthresh) {
 							tp->snd_cwnd += tp->t_maxseg;
 							if (tp->snd_cwnd > tp->snd_ssthresh)
 								tp->snd_cwnd = tp->snd_ssthresh;
 						}
 					} else
 						tp->snd_cwnd += tp->t_maxseg;
 					(void) tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh) {
 					tcp_seq onxt = tp->snd_nxt;
 					u_int win;
 
 					/*
 					 * If we're doing sack, check to
 					 * see if we're already in sack
 					 * recovery. If we're not doing sack,
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						if (IN_FASTRECOVERY(tp)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					} else if (tcp_do_newreno) {
 						if (SEQ_LEQ(th->th_ack,
 						    tp->snd_recover)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					}
 					win = min(tp->snd_wnd, tp->snd_cwnd) /
 					    2 / tp->t_maxseg;
 					if (win < 2)
 						win = 2;
 					tp->snd_ssthresh = win * tp->t_maxseg;
 					ENTER_FASTRECOVERY(tp);
 					tp->snd_recover = tp->snd_max;
 					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						tcpstat.tcps_sack_recovery_episode++;
 						tp->sack_newdata = tp->snd_nxt;
 						tp->snd_cwnd = tp->t_maxseg;
 						(void) tcp_output(tp);
 						goto drop;
 					}
 					tp->snd_nxt = th->th_ack;
 					tp->snd_cwnd = tp->t_maxseg;
 					(void) tcp_output(tp);
 					KASSERT(tp->snd_limited <= 2,
 					    ("%s: tp->snd_limited too big",
 					    __func__));
 					tp->snd_cwnd = tp->snd_ssthresh +
 					     tp->t_maxseg *
 					     (tp->t_dupacks - tp->snd_limited);
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
 					goto drop;
 				} else if (tcp_do_rfc3042) {
 					u_long oldcwnd = tp->snd_cwnd;
 					tcp_seq oldsndmax = tp->snd_max;
 					u_int sent;
 
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
 					    ("%s: dupacks not 1 or 2",
 					    __func__));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
 					tp->snd_cwnd =
 					    (tp->snd_nxt - tp->snd_una) +
 					    (tp->t_dupacks - tp->snd_limited) *
 					    tp->t_maxseg;
 					(void) tcp_output(tp);
 					sent = tp->snd_max - oldsndmax;
 					if (sent > tp->t_maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
 						    tp->snd_limited == 0) ||
 						   (sent == tp->t_maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
 						    ("%s: sent too much",
 						    __func__));
 						tp->snd_limited = 2;
 					} else if (sent > 0)
 						++tp->snd_limited;
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
 			} else
 				tp->t_dupacks = 0;
 			break;
 		}
 
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
 		    ("%s: th_ack <= snd_una", __func__));
 
 		/*
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
 		if (tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) {
 			if (IN_FASTRECOVERY(tp)) {
 				if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 					if (tp->t_flags & TF_SACK_PERMIT)
 						tcp_sack_partialack(tp, th);
 					else
 						tcp_newreno_partial_ack(tp, th);
 				} else {
 					/*
 					 * Out of fast recovery.
 					 * Window inflation should have left us
 					 * with approximately snd_ssthresh
 					 * outstanding data.
 					 * But in case we would be inclined to
 					 * send a burst, better to do it via
 					 * the slow start mechanism.
 					 */
 					if (SEQ_GT(th->th_ack +
 							tp->snd_ssthresh,
 						   tp->snd_max))
 						tp->snd_cwnd = tp->snd_max -
 								th->th_ack +
 								tp->t_maxseg;
 					else
 						tp->snd_cwnd = tp->snd_ssthresh;
 				}
 			}
 		} else {
 			if (tp->t_dupacks >= tcprexmtthresh &&
 			    tp->snd_cwnd > tp->snd_ssthresh)
 				tp->snd_cwnd = tp->snd_ssthresh;
 		}
 		tp->t_dupacks = 0;
 		/*
 		 * If we reach this point, ACK is not a duplicate,
 		 *     i.e., it ACKs something we sent.
 		 */
 		if (tp->t_flags & TF_NEEDSYN) {
 			/*
 			 * T/TCP: Connection was half-synchronized, and our
 			 * SYN has been ACK'd (so connection is now fully
 			 * synchronized).  Go to non-starred state,
 			 * increment snd_una for ACK of SYN, and check if
 			 * we can do window scaling.
 			 */
 			tp->t_flags &= ~TF_NEEDSYN;
 			tp->snd_una++;
 			/* Do window scaling? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 				/* Send window already scaled. */
 			}
 		}
 
 process_ACK:
 		KASSERT(headlocked, ("%s: process_ACK: head not locked",
 		    __func__));
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		acked = th->th_ack - tp->snd_una;
 		tcpstat.tcps_rcvackpack++;
 		tcpstat.tcps_rcvackbyte += acked;
 
 		/*
 		 * If we just performed our first retransmit, and the ACK
 		 * arrives within our recovery window, then it was a mistake
 		 * to do the retransmit in the first place.  Recover our
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
 		if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) {
 			++tcpstat.tcps_sndrexmitbad;
 			tp->snd_cwnd = tp->snd_cwnd_prev;
 			tp->snd_ssthresh = tp->snd_ssthresh_prev;
 			tp->snd_recover = tp->snd_recover_prev;
 			if (tp->t_flags & TF_WASFRECOVERY)
 				ENTER_FASTRECOVERY(tp);
 			tp->snd_nxt = tp->snd_max;
 			tp->t_badrxtwin = 0;	/* XXX probably not required */
 		}
 
 		/*
 		 * If we have a timestamp reply, update smoothed
 		 * round trip time.  If no timestamp is present but
 		 * transmit timer is running and timed sequence
 		 * number was acked, update smoothed round trip time.
 		 * Since we now have an rtt measurement, cancel the
 		 * timer backoff (cf., Phil Karn's retransmit alg.).
 		 * Recompute the initial retransmit timer.
 		 *
 		 * Some boxes send broken timestamp replies
 		 * during the SYN+ACK phase, ignore
 		 * timestamps of 0 or we could calculate a
 		 * huge RTT and blow up the retransmit timer.
 		 */
 		if ((to.to_flags & TOF_TS) != 0 &&
 		    to.to_tsecr) {
 			if (!tp->t_rttlow || tp->t_rttlow > ticks - to.to_tsecr)
 				tp->t_rttlow = ticks - to.to_tsecr;
 			tcp_xmit_timer(tp, ticks - to.to_tsecr + 1);
 		} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
 			if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
 				tp->t_rttlow = ticks - tp->t_rtttime;
 			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
 		}
 		tcp_xmit_bandwidth_limit(tp, th->th_ack);
 
 		/*
 		 * If all outstanding data is acked, stop retransmit
 		 * timer and remember to restart (more output or persist).
 		 * If there is more data to be acked, restart retransmit
 		 * timer, using current (possibly backed-off) value.
 		 */
 		if (th->th_ack == tp->snd_max) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			needoutput = 1;
 		} else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
 		 *    skip rest of ACK processing.
 		 */
 		if (acked == 0)
 			goto step6;
 
 		/*
 		 * When new data is acked, open the congestion window.
 		 * If the window gives us less than ssthresh packets
 		 * in flight, open exponentially (maxseg per packet).
 		 * Otherwise open linearly: maxseg per window
 		 * (maxseg^2 / cwnd per packet).
 		 */
 		if ((!tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
 		    !IN_FASTRECOVERY(tp)) {
 			u_int cw = tp->snd_cwnd;
 			u_int incr = tp->t_maxseg;
 			if (cw > tp->snd_ssthresh)
 				incr = incr * incr / cw;
 			tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
 		}
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > so->so_snd.sb_cc) {
 			tp->snd_wnd -= so->so_snd.sb_cc;
 			sbdrop_locked(&so->so_snd, (int)so->so_snd.sb_cc);
 			ourfinisacked = 1;
 		} else {
 			sbdrop_locked(&so->so_snd, acked);
 			tp->snd_wnd -= acked;
 			ourfinisacked = 0;
 		}
 		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
 		/* Detect una wraparound. */
 		if ((tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
 		    !IN_FASTRECOVERY(tp) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 		if ((tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
 		    IN_FASTRECOVERY(tp) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover))
 			EXIT_FASTRECOVERY(tp);
 		tp->snd_una = th->th_ack;
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
 				tp->snd_recover = tp->snd_una;
 		}
 		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
 			tp->snd_nxt = tp->snd_una;
 
 		switch (tp->t_state) {
 
 		/*
 		 * In FIN_WAIT_1 STATE in addition to the processing
 		 * for the ESTABLISHED state if our FIN is now acknowledged
 		 * then enter FIN_WAIT_2.
 		 */
 		case TCPS_FIN_WAIT_1:
 			if (ourfinisacked) {
 				/*
 				 * If we can't receive any more
 				 * data, then closing user can proceed.
 				 * Starting the timer is contrary to the
 				 * specification, but if we don't get a FIN
 				 * we'll hang forever.
 				 *
 				 * XXXjl:
 				 * we should release the tp also, and use a
 				 * compressed state.
 				 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 					int timeout;
 
 					soisdisconnected(so);
 					timeout = (tcp_fast_finwait2_recycle) ? 
 						tcp_finwait2_timeout : tcp_maxidle;
 					tcp_timer_activate(tp, TT_2MSL, timeout);
 				}
 				tp->t_state = TCPS_FIN_WAIT_2;
 			}
 			break;
 
 		/*
 		 * In CLOSING STATE in addition to the processing for
 		 * the ESTABLISHED state if the ACK acknowledges our FIN
 		 * then enter the TIME-WAIT state, otherwise ignore
 		 * the segment.
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
 				KASSERT(headlocked, ("%s: process_ACK: "
 				    "head not locked", __func__));
 				tcp_twstart(tp);
 				INP_INFO_WUNLOCK(&tcbinfo);
 				headlocked = 0;
 				m_freem(m);
 				return;
 			}
 			break;
 
 		/*
 		 * In LAST_ACK, we may still be waiting for data to drain
 		 * and/or to be acked, as well as for the ack of our FIN.
 		 * If our FIN is now acknowledged, delete the TCB,
 		 * enter the closed state and return.
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
 				KASSERT(headlocked, ("%s: process_ACK: "
 				    "tcp_close: head not locked", __func__));
 				tp = tcp_close(tp);
 				goto drop;
 			}
 			break;
 		}
 	}
 
 step6:
 	KASSERT(headlocked, ("%s: step6: head not locked", __func__));
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Update window information.
 	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
 	 */
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tlen == 0 &&
 		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
 			tcpstat.tcps_rcvwinupd++;
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 		needoutput = 1;
 	}
 
 	/*
 	 * Process segments with URG.
 	 */
 	if ((thflags & TH_URG) && th->th_urp &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		/*
 		 * This is a kludge, but if we receive and accept
 		 * random urgent pointers, we'll crash in
 		 * soreceive.  It's hard to imagine someone
 		 * actually wanting to send this much urgent data.
 		 */
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
 			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
 			goto dodata;			/* XXX */
 		}
 		/*
 		 * If this segment advances the known urgent pointer,
 		 * then mark the data stream.  This should not happen
 		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
 		 * a FIN has been received from the remote side.
 		 * In these states we ignore the URG.
 		 *
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section as the original
 		 * spec states (in one of two places).
 		 */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
 			so->so_oobmark = so->so_rcv.sb_cc +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SBS_RCVATMARK;
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		/*
 		 * Remove out of band data so doesn't get presented to user.
 		 * This can happen independent of advancing the URG pointer,
 		 * but if two URG's are pending at once, some out-of-band
 		 * data may creep in... ick.
 		 */
 		if (th->th_urp <= (u_long)tlen &&
 		    !(so->so_options & SO_OOBINLINE)) {
 			/* hdr drop is delayed */
 			tcp_pulloutofband(so, th, m, drop_hdrlen);
 		}
 	} else {
 		/*
 		 * If no out of band data is expected,
 		 * pull receive urgent pointer along
 		 * with the receive window.
 		 */
 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:							/* XXX */
 	KASSERT(headlocked, ("%s: dodata: head not locked", __func__));
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Process the segment text, merging it into the TCP sequencing queue,
 	 * and arranging for acknowledgment of receipt if necessary.
 	 * This process logically involves adjusting tp->rcv_wnd as data
 	 * is presented to the user (this happens in tcp_usrreq.c,
 	 * case PRU_RCVD).  If a FIN has already been received on this
 	 * connection then we just ignore the text.
 	 */
 	if ((tlen || (thflags & TH_FIN)) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly queue
 		 * with control block tp.  Set thflags to whether reassembly now
 		 * includes a segment with FIN.  This handles the common case
 		 * inline (segment is the next to be received on an established
 		 * connection, and the queue is empty), avoiding linkage into
 		 * and removal from the queue and repetition of various
 		 * conversions.
 		 * Set DELACK for segments received in order, but ack
 		 * immediately when segments are out of order (so
 		 * fast retransmit can work).
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    LIST_EMPTY(&tp->t_segq) &&
 		    TCPS_HAVEESTABLISHED(tp->t_state)) {
 			if (DELAY_ACK(tp))
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt += tlen;
 			thflags = th->th_flags & TH_FIN;
 			tcpstat.tcps_rcvpack++;
 			tcpstat.tcps_rcvbyte += tlen;
 			ND6_HINT(tp);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m);
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 		} else {
 			/*
 			 * XXX: Due to the header drop above "th" is
 			 * theoretically invalid by now.  Fortunately
 			 * m_adj() doesn't actually frees any mbufs
 			 * when trimming from the head.
 			 */
 			thflags = tcp_reass(tp, th, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
 		}
 		if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
 			tcp_update_sack_list(tp, save_start, save_start + tlen);
 #if 0
 		/*
 		 * Note the amount of data that peer has sent into
 		 * our window, in order to estimate the sender's
 		 * buffer size.
 		 * XXX: Unused.
 		 */
 		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
 #endif
 	} else {
 		m_freem(m);
 		thflags &= ~TH_FIN;
 	}
 
 	/*
 	 * If FIN is received ACK the FIN and let the user know
 	 * that the connection is closing.
 	 */
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			socantrcvmore(so);
 			/*
 			 * If connection is half-synchronized
 			 * (ie NEEDSYN flag on) then delay ACK,
 			 * so it may be piggybacked when SYN is sent.
 			 * Otherwise, since we received a FIN then no
 			 * more input can be expected, send ACK now.
 			 */
 			if (tp->t_flags & TF_NEEDSYN)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt++;
 		}
 		switch (tp->t_state) {
 
 		/*
 		 * In SYN_RECEIVED and ESTABLISHED STATES
 		 * enter the CLOSE_WAIT state.
 		 */
 		case TCPS_SYN_RECEIVED:
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
 			tp->t_state = TCPS_CLOSE_WAIT;
 			break;
 
 		/*
 		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
 		 * enter the CLOSING state.
 		 */
 		case TCPS_FIN_WAIT_1:
 			tp->t_state = TCPS_CLOSING;
 			break;
 
 		/*
 		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
 		 * starting the time-wait timer, turning off the other
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
 			KASSERT(headlocked == 1, ("%s: dodata: "
 			    "TCP_FIN_WAIT_2: head not locked", __func__));
 			tcp_twstart(tp);
 			INP_INFO_WUNLOCK(&tcbinfo);
 			return;
 		}
 	}
 	INP_INFO_WUNLOCK(&tcbinfo);
 	headlocked = 0;
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 
 	/*
 	 * Return any desired output.
 	 */
 	if (needoutput || (tp->t_flags & TF_ACKNOW))
 		(void) tcp_output(tp);
 
 check_delack:
 	KASSERT(headlocked == 0, ("%s: check_delack: head locked",
 	    __func__));
 	INP_INFO_UNLOCK_ASSERT(&tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 	return;
 
 dropafterack:
 	KASSERT(headlocked, ("%s: dropafterack: head not locked", __func__));
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies
 	 * sequence space, where the ACK reflects our state.
 	 *
 	 * We can now skip the test for the RST flag since all
 	 * paths to this code happen after packets containing
 	 * RST have been dropped.
 	 *
 	 * In the SYN-RECEIVED state, don't send an ACK unless the
 	 * segment we received passes the SYN-RECEIVED ACK test.
 	 * If it fails send a RST.  This breaks the loop in the
 	 * "LAND" DoS attack, and also prevents an ACK storm
 	 * between two listening ports that have been sent forged
 	 * SYN segments, each with the source address of the other.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	KASSERT(headlocked, ("%s: headlocked should be 1", __func__));
 	INP_INFO_WUNLOCK(&tcbinfo);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
 	INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 	return;
 
 dropwithreset:
 	KASSERT(headlocked, ("%s: dropwithreset: head not locked", __func__));
 
 	tcp_dropwithreset(m, th, tp, tlen, rstreason);
 
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	if (headlocked)
 		INP_INFO_WUNLOCK(&tcbinfo);
 	return;
 
 drop:
 	/*
 	 * Drop space held by incoming segment and return.
 	 */
 #ifdef TCPDEBUG
 	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	if (headlocked)
 		INP_INFO_WUNLOCK(&tcbinfo);
 	m_freem(m);
 	return;
 }
 
 /*
  * Issue RST and make ACK acceptable to originator of segment.
  * The mbuf must still include the original packet header.
  * tp may be NULL.
  */
 static void
 tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
     int tlen, int rstreason)
 {
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 
 	if (tp != NULL) {
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 	}
 
 	/* Don't bother if destination was broadcast/multicast. */
 	if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
 		goto drop;
 #ifdef INET6
 	if (mtod(m, struct ip *)->ip_v == 6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
 			goto drop;
 		/* IPv6 anycast check is done at tcp6_input() */
 	} else
 #endif
 	{
 		ip = mtod(m, struct ip *);
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
 			goto drop;
 	}
 
 	/* Perform bandwidth limiting. */
 	if (badport_bandlim(rstreason) < 0)
 		goto drop;
 
 	/* tcp_respond consumes the mbuf chain. */
 	if (th->th_flags & TH_ACK) {
 		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0,
 		    th->th_ack, TH_RST);
 	} else {
 		if (th->th_flags & TH_SYN)
 			tlen++;
 		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
 		    (tcp_seq)0, TH_RST|TH_ACK);
 	}
 	return;
 drop:
 	m_freem(m);
 	return;
 }
 
 /*
  * Parse TCP options and place in tcpopt.
  */
 static void
 tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
 {
 	int opt, optlen;
 
 	to->to_flags = 0;
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < 2)
 				break;
 			optlen = cp[1];
 			if (optlen < 2 || optlen > cnt)
 				break;
 		}
 		switch (opt) {
 		case TCPOPT_MAXSEG:
 			if (optlen != TCPOLEN_MAXSEG)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_MSS;
 			bcopy((char *)cp + 2,
 			    (char *)&to->to_mss, sizeof(to->to_mss));
 			to->to_mss = ntohs(to->to_mss);
 			break;
 		case TCPOPT_WINDOW:
 			if (optlen != TCPOLEN_WINDOW)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_SCALE;
 			to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
 			break;
 		case TCPOPT_TIMESTAMP:
 			if (optlen != TCPOLEN_TIMESTAMP)
 				continue;
 			to->to_flags |= TOF_TS;
 			bcopy((char *)cp + 2,
 			    (char *)&to->to_tsval, sizeof(to->to_tsval));
 			to->to_tsval = ntohl(to->to_tsval);
 			bcopy((char *)cp + 6,
 			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
 			to->to_tsecr = ntohl(to->to_tsecr);
 			break;
 #ifdef TCP_SIGNATURE
 		/*
 		 * XXX In order to reply to a host which has set the
 		 * TCP_SIGNATURE option in its initial SYN, we have to
 		 * record the fact that the option was observed here
 		 * for the syncache code to perform the correct response.
 		 */
 		case TCPOPT_SIGNATURE:
 			if (optlen != TCPOLEN_SIGNATURE)
 				continue;
 			to->to_flags |= TOF_SIGNATURE;
 			to->to_signature = cp + 2;
 			break;
 #endif
 		case TCPOPT_SACK_PERMITTED:
 			if (optlen != TCPOLEN_SACK_PERMITTED)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			if (!tcp_do_sack)
 				continue;
 			to->to_flags |= TOF_SACKPERM;
 			break;
 		case TCPOPT_SACK:
 			if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
 				continue;
 			if (flags & TO_SYN)
 				continue;
 			to->to_flags |= TOF_SACK;
 			to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
 			to->to_sacks = cp + 2;
 			tcpstat.tcps_sack_rcv_blocks++;
 			break;
 		default:
 			continue;
 		}
 	}
 }
 
 /*
  * Pull out of band byte out of a segment so
  * it doesn't appear in the user's data queue.
  * It is still reflected in the segment length for
  * sequencing purposes.
  */
 static void
 tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
     int off)
 {
 	int cnt = off + th->th_urp - 1;
 
 	while (cnt >= 0) {
 		if (m->m_len > cnt) {
 			char *cp = mtod(m, caddr_t) + cnt;
 			struct tcpcb *tp = sototcpcb(so);
 
 			INP_WLOCK_ASSERT(tp->t_inpcb);
 
 			tp->t_iobc = *cp;
 			tp->t_oobflags |= TCPOOB_HAVEDATA;
 			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
 			m->m_len--;
 			if (m->m_flags & M_PKTHDR)
 				m->m_pkthdr.len--;
 			return;
 		}
 		cnt -= m->m_len;
 		m = m->m_next;
 		if (m == NULL)
 			break;
 	}
 	panic("tcp_pulloutofband");
 }
 
 /*
  * Collect new round-trip time estimate
  * and update averages and current timeout.
  */
 static void
 tcp_xmit_timer(struct tcpcb *tp, int rtt)
 {
 	int delta;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcpstat.tcps_rttupdated++;
 	tp->t_rttupdated++;
 	if (tp->t_srtt != 0) {
 		/*
 		 * srtt is stored as fixed point with 5 bits after the
 		 * binary point (i.e., scaled by 8).  The following magic
 		 * is equivalent to the smoothing algorithm in rfc793 with
 		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
 		 * point).  Adjust rtt to origin 0.
 		 */
 		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
 			- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
 
 		if ((tp->t_srtt += delta) <= 0)
 			tp->t_srtt = 1;
 
 		/*
 		 * We accumulate a smoothed rtt variance (actually, a
 		 * smoothed mean difference), then set the retransmit
 		 * timer to smoothed rtt + 4 times the smoothed variance.
 		 * rttvar is stored as fixed point with 4 bits after the
 		 * binary point (scaled by 16).  The following is
 		 * equivalent to rfc793 smoothing with an alpha of .75
 		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
 		 * rfc793's wired-in beta.
 		 */
 		if (delta < 0)
 			delta = -delta;
 		delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
 		if ((tp->t_rttvar += delta) <= 0)
 			tp->t_rttvar = 1;
 		if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
 		    tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	} else {
 		/*
 		 * No rtt measurement yet - use the unsmoothed rtt.
 		 * Set the variance to half the rtt (so our first
 		 * retransmit happens at 3*rtt).
 		 */
 		tp->t_srtt = rtt << TCP_RTT_SHIFT;
 		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
 		tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	}
 	tp->t_rtttime = 0;
 	tp->t_rxtshift = 0;
 
 	/*
 	 * the retransmit should happen at rtt + 4 * rttvar.
 	 * Because of the way we do the smoothing, srtt and rttvar
 	 * will each average +1/2 tick of bias.  When we compute
 	 * the retransmit timer, we want 1/2 tick of rounding and
 	 * 1 extra tick because of +-1/2 tick uncertainty in the
 	 * firing of the timer.  The bias will give us exactly the
 	 * 1.5 tick we need.  But, because the bias is
 	 * statistical, we have to test that we don't drop below
 	 * the minimum feasible timer (which is 2 ticks).
 	 */
 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
 		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
 
 	/*
 	 * We received an ack for a packet that wasn't retransmitted;
 	 * it is probably safe to discard any error indications we've
 	 * received recently.  This isn't quite right, but close enough
 	 * for now (a route might have failed after we sent a segment,
 	 * and the return path might not be symmetrical).
 	 */
 	tp->t_softerror = 0;
 }
 
 /*
  * Determine a reasonable value for maxseg size.
  * If the route is known, check route for mtu.
  * If none, use an mss that can be handled on the outgoing
  * interface without forcing IP to fragment; if bigger than
  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
  * to utilize large mbufs.  If no route is found, route has no mtu,
  * or the destination isn't local, use a default, hopefully conservative
  * size (usually 512 or the default IP max size, but no more than the mtu
  * of the interface), as we can't discover anything about intervening
  * gateways or networks.  We also initialize the congestion/slow start
  * window to be a single segment if the destination isn't local.
  * While looking at the routing entry, we also initialize other path-dependent
  * parameters from pre-set or cached values in the routing entry.
  *
  * Also take into account the space needed for options that we
  * send regularly.  Make maxseg shorter by that amount to assure
  * that we can send maxseg amount of data even when the options
  * are present.  Store the upper limit of the length of options plus
  * data in maxopd.
  *
  * In case of T/TCP, we call this routine during implicit connection
  * setup as well (offer = -1), to initialize maxseg from the cached
  * MSS of our peer.
  *
  * NOTE that this routine is only called when we process an incoming
  * segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt().
  */
 void
 tcp_mss(struct tcpcb *tp, int offer)
 {
 	int rtt, mss;
 	u_long bufsize;
 	u_long maxmtu;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
 	struct hc_metrics_lite metrics;
 	int origoffer = offer;
 	int mtuflags = 0;
 #ifdef INET6
 	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
 	size_t min_protoh = isipv6 ?
 			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
 			    sizeof (struct tcpiphdr);
 #else
 	const size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
 		maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags);
 		tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
 	} else
 #endif
 	{
 		maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags);
 		tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
 	}
 
 	/*
 	 * No route to sender, stay with default mss and return.
 	 */
 	if (maxmtu == 0)
 		return;
 
 	/* What have we got? */
 	switch (offer) {
 		case 0:
 			/*
 			 * Offer == 0 means that there was no MSS on the SYN
 			 * segment, in this case we use tcp_mssdflt as
 			 * already assigned to t_maxopd above.
 			 */
 			offer = tp->t_maxopd;
 			break;
 
 		case -1:
 			/*
 			 * Offer == -1 means that we didn't receive SYN yet.
 			 */
 			/* FALLTHROUGH */
 
 		default:
 			/*
 			 * Prevent DoS attack with too small MSS. Round up
 			 * to at least minmss.
 			 */
 			offer = max(offer, tcp_minmss);
 			/*
 			 * Sanity check: make sure that maxopd will be large
 			 * enough to allow some data on segments even if the
 			 * all the option space is used (40bytes).  Otherwise
 			 * funny things may happen in tcp_output.
 			 */
 			offer = max(offer, 64);
 	}
 
 	/*
 	 * rmx information is now retrieved from tcp_hostcache.
 	 */
 	tcp_hc_get(&inp->inp_inc, &metrics);
 
 	/*
 	 * If there's a discovered mtu int tcp hostcache, use it
 	 * else, use the link mtu.
 	 */
 	if (metrics.rmx_mtu)
 		mss = min(metrics.rmx_mtu, maxmtu) - min_protoh;
 	else {
 #ifdef INET6
 		if (isipv6) {
 			mss = maxmtu - min_protoh;
 			if (!path_mtu_discovery &&
 			    !in6_localaddr(&inp->in6p_faddr))
 				mss = min(mss, tcp_v6mssdflt);
 		} else
 #endif
 		{
 			mss = maxmtu - min_protoh;
 			if (!path_mtu_discovery &&
 			    !in_localaddr(inp->inp_faddr))
 				mss = min(mss, tcp_mssdflt);
 		}
 	}
 	mss = min(mss, offer);
 
 	/*
 	 * maxopd stores the maximum length of data AND options
 	 * in a segment; maxseg is the amount of data in a normal
 	 * segment.  We need to store this value (maxopd) apart
 	 * from maxseg, because now every segment carries options
 	 * and thus we normally have somewhat less data in segments.
 	 */
 	tp->t_maxopd = mss;
 
 	/*
 	 * origoffer==-1 indicates that no segments were received yet.
 	 * In this case we just guess.
 	 */
 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
 	    (origoffer == -1 ||
 	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
 		mss -= TCPOLEN_TSTAMP_APPA;
 
 #if	(MCLBYTES & (MCLBYTES - 1)) == 0
 	if (mss > MCLBYTES)
 		mss &= ~(MCLBYTES-1);
 #else
 	if (mss > MCLBYTES)
 		mss = mss / MCLBYTES * MCLBYTES;
 #endif
 	tp->t_maxseg = mss;
 
 	/*
 	 * If there's a pipesize, change the socket buffer to that size,
 	 * don't change if sb_hiwat is different than default (then it
 	 * has been changed on purpose with setsockopt).
 	 * Make the socket buffers an integral number of mss units;
 	 * if the mss is larger than the socket buffer, decrease the mss.
 	 */
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	if ((so->so_snd.sb_hiwat == tcp_sendspace) && metrics.rmx_sendpipe)
 		bufsize = metrics.rmx_sendpipe;
 	else
 		bufsize = so->so_snd.sb_hiwat;
 	if (bufsize < mss)
 		mss = bufsize;
 	else {
 		bufsize = roundup(bufsize, mss);
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_snd.sb_hiwat)
 			(void)sbreserve_locked(&so->so_snd, bufsize, so, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_snd);
 	tp->t_maxseg = mss;
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe)
 		bufsize = metrics.rmx_recvpipe;
 	else
 		bufsize = so->so_rcv.sb_hiwat;
 	if (bufsize > mss) {
 		bufsize = roundup(bufsize, mss);
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_rcv.sb_hiwat)
 			(void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	/*
 	 * While we're here, check the others too.
 	 */
 	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
 		tp->t_srtt = rtt;
 		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
 		tcpstat.tcps_usedrtt++;
 		if (metrics.rmx_rttvar) {
 			tp->t_rttvar = metrics.rmx_rttvar;
 			tcpstat.tcps_usedrttvar++;
 		} else {
 			/* default variation is +- 1 rtt */
 			tp->t_rttvar =
 			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
 		}
 		TCPT_RANGESET(tp->t_rxtcur,
 			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
 			      tp->t_rttmin, TCPTV_REXMTMAX);
 	}
 	if (metrics.rmx_ssthresh) {
 		/*
 		 * There's some sort of gateway or interface
 		 * buffer limit on the path.  Use this to set
 		 * the slow start threshhold, but set the
 		 * threshold to no less than 2*mss.
 		 */
 		tp->snd_ssthresh = max(2 * mss, metrics.rmx_ssthresh);
 		tcpstat.tcps_usedssthresh++;
 	}
 	if (metrics.rmx_bandwidth)
 		tp->snd_bandwidth = metrics.rmx_bandwidth;
 
 	/*
 	 * Set the slow-start flight size depending on whether this
 	 * is a local network or not.
 	 *
 	 * Extend this so we cache the cwnd too and retrieve it here.
 	 * Make cwnd even bigger than RFC3390 suggests but only if we
 	 * have previous experience with the remote host. Be careful
 	 * not make cwnd bigger than remote receive window or our own
 	 * send socket buffer. Maybe put some additional upper bound
 	 * on the retrieved cwnd. Should do incremental updates to
 	 * hostcache when cwnd collapses so next connection doesn't
 	 * overloads the path again.
 	 *
 	 * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
 	 * We currently check only in syncache_socket for that.
 	 */
 #define TCP_METRICS_CWND
 #ifdef TCP_METRICS_CWND
 	if (metrics.rmx_cwnd)
 		tp->snd_cwnd = max(mss,
 				min(metrics.rmx_cwnd / 2,
 				 min(tp->snd_wnd, so->so_snd.sb_hiwat)));
 	else
 #endif
 	if (tcp_do_rfc3390)
 		tp->snd_cwnd = min(4 * mss, max(2 * mss, 4380));
 #ifdef INET6
 	else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
 		 (!isipv6 && in_localaddr(inp->inp_faddr)))
 #else
 	else if (in_localaddr(inp->inp_faddr))
 #endif
 		tp->snd_cwnd = mss * ss_fltsz_local;
 	else
 		tp->snd_cwnd = mss * ss_fltsz;
 
 	/* Check the interface for TSO capabilities. */
 	if (mtuflags & CSUM_TSO)
 		tp->t_flags |= TF_TSO;
 }
 
 /*
  * Determine the MSS option to send on an outgoing SYN.
  */
 int
 tcp_mssopt(struct in_conninfo *inc)
 {
 	int mss = 0;
 	u_long maxmtu = 0;
 	u_long thcmtu = 0;
 	size_t min_protoh;
 #ifdef INET6
 	int isipv6 = inc->inc_isipv6 ? 1 : 0;
 #endif
 
 	KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer"));
 
 #ifdef INET6
 	if (isipv6) {
 		mss = tcp_v6mssdflt;
 		maxmtu = tcp_maxmtu6(inc, NULL);
 		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
 		min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	} else
 #endif
 	{
 		mss = tcp_mssdflt;
 		maxmtu = tcp_maxmtu(inc, NULL);
 		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
 		min_protoh = sizeof(struct tcpiphdr);
 	}
 	if (maxmtu && thcmtu)
 		mss = min(maxmtu, thcmtu) - min_protoh;
 	else if (maxmtu || thcmtu)
 		mss = max(maxmtu, thcmtu) - min_protoh;
 
 	return (mss);
 }
 
 
 /*
  * On a partial ack arrives, force the retransmission of the
  * next unacknowledged segment.  Do not clear tp->t_dupacks.
  * By setting snd_nxt to ti_ack, this forces retransmission timer to
  * be started again.
  */
 static void
 tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
 {
 	tcp_seq onxt = tp->snd_nxt;
 	u_long  ocwnd = tp->snd_cwnd;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcp_timer_activate(tp, TT_REXMT, 0);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = th->th_ack;
 	/*
 	 * Set snd_cwnd to one segment beyond acknowledged offset.
 	 * (tp->snd_una has not yet been updated when this function is called.)
 	 */
 	tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tcp_output(tp);
 	tp->snd_cwnd = ocwnd;
 	if (SEQ_GT(onxt, tp->snd_nxt))
 		tp->snd_nxt = onxt;
 	/*
 	 * Partial window deflation.  Relies on fact that tp->snd_una
 	 * not updated yet.
 	 */
 	if (tp->snd_cwnd > th->th_ack - tp->snd_una)
 		tp->snd_cwnd -= th->th_ack - tp->snd_una;
 	else
 		tp->snd_cwnd = 0;
 	tp->snd_cwnd += tp->t_maxseg;
 }
Index: head/sys/netinet/tcp_subr.c
===================================================================
--- head/sys/netinet/tcp_subr.c	(revision 178887)
+++ head/sys/netinet/tcp_subr.c	(revision 178888)
@@ -1,2171 +1,2177 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/random.h>
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/if.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 #include <netinet/in_pcb.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 #include <netinet/ip_icmp.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_offload.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #include <netinet6/ip6protosw.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/key.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 #include <sys/md5.h>
 
 #include <security/mac/mac_framework.h>
 
 int	tcp_mssdflt = TCP_MSS;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW,
     &tcp_mssdflt, 0, "Default TCP Maximum Segment Size");
 
 #ifdef INET6
 int	tcp_v6mssdflt = TCP6_MSS;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
     CTLFLAG_RW, &tcp_v6mssdflt , 0,
     "Default TCP Maximum Segment Size for IPv6");
 #endif
 
 /*
  * Minimum MSS we accept and use. This prevents DoS attacks where
  * we are forced to a ridiculous low MSS like 20 and send hundreds
  * of packets instead of one. The effect scales with the available
  * bandwidth and quickly saturates the CPU and network interface
  * with packet generation and sending. Set to zero to disable MINMSS
  * checking. This setting prevents us from sending too small packets.
  */
 int	tcp_minmss = TCP_MINMSS;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
     &tcp_minmss , 0, "Minmum TCP Maximum Segment Size");
 
 int	tcp_do_rfc1323 = 1;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
     &tcp_do_rfc1323, 0, "Enable rfc1323 (high performance TCP) extensions");
 
 static int	tcp_log_debug = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
     &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
 
 static int	tcp_tcbhashsize = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN,
     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
 
 static int	do_tcpdrain = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW,
     &do_tcpdrain, 0,
     "Enable tcp_drain routine for extra help when low on mbufs");
 
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD,
     &tcbinfo.ipi_count, 0, "Number of active PCBs");
 
 static int	icmp_may_rst = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW,
     &icmp_may_rst, 0,
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static int	tcp_isn_reseed_interval = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
     &tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret");
 
 /*
  * TCP bandwidth limiting sysctls.  Note that the default lower bound of
  * 1024 exists only for debugging.  A good production default would be
  * something like 6100.
  */
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, inflight, CTLFLAG_RW, 0,
     "TCP inflight data limiting");
 
 static int	tcp_inflight_enable = 1;
 SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, enable, CTLFLAG_RW,
     &tcp_inflight_enable, 0, "Enable automatic TCP inflight data limiting");
 
 static int	tcp_inflight_debug = 0;
 SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, debug, CTLFLAG_RW,
     &tcp_inflight_debug, 0, "Debug TCP inflight calculations");
 
 static int	tcp_inflight_rttthresh;
 SYSCTL_PROC(_net_inet_tcp_inflight, OID_AUTO, rttthresh, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_inflight_rttthresh, 0, sysctl_msec_to_ticks, "I",
     "RTT threshold below which inflight will deactivate itself");
 
 static int	tcp_inflight_min = 6144;
 SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, min, CTLFLAG_RW,
     &tcp_inflight_min, 0, "Lower-bound for TCP inflight window");
 
 static int	tcp_inflight_max = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, max, CTLFLAG_RW,
     &tcp_inflight_max, 0, "Upper-bound for TCP inflight window");
 
 static int	tcp_inflight_stab = 20;
 SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW,
     &tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets");
 
 uma_zone_t sack_hole_zone;
 
 static struct inpcb *tcp_notify(struct inpcb *, int);
 static void	tcp_isn_tick(void *);
 
 /*
  * Target size of TCP PCB hash tables. Must be a power of two.
  *
  * Note that this can be overridden by the kernel environment
  * variable net.inet.tcp.tcbhashsize
  */
 #ifndef TCBHASHSIZE
 #define TCBHASHSIZE	512
 #endif
 
 /*
  * XXX
  * Callouts should be moved into struct tcp directly.  They are currently
  * separate because the tcpcb structure is exported to userland for sysctl
  * parsing purposes, which do not know about callouts.
  */
 struct tcpcb_mem {
 	struct	tcpcb		tcb;
 	struct	tcp_timer	tt;
 };
 
 static uma_zone_t tcpcb_zone;
 MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
 struct callout isn_callout;
 static struct mtx isn_mtx;
 
 #define	ISN_LOCK_INIT()	mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
 #define	ISN_LOCK()	mtx_lock(&isn_mtx)
 #define	ISN_UNLOCK()	mtx_unlock(&isn_mtx)
 
 /*
  * TCP initialization.
  */
 static void
 tcp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
 	uma_zone_set_max(tcpcb_zone, maxsockets);
 	tcp_tw_zone_change();
 }
 
 static int
 tcp_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "tcpinp");
 	return (0);
 }
 
 void
 tcp_init(void)
 {
 
 	int hashsize = TCBHASHSIZE;
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
 	tcp_keepidle = TCPTV_KEEP_IDLE;
 	tcp_keepintvl = TCPTV_KEEPINTVL;
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_msl = TCPTV_MSL;
 	tcp_rexmit_min = TCPTV_MIN;
 	if (tcp_rexmit_min < 1)
 		tcp_rexmit_min = 1;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
 	tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH;
 	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 
 	INP_INFO_LOCK_INIT(&tcbinfo, "tcp");
 	LIST_INIT(&tcb);
 	tcbinfo.ipi_listhead = &tcb;
 	TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
 	if (!powerof2(hashsize)) {
 		printf("WARNING: TCB hash size not a power of 2\n");
 		hashsize = 512; /* safe default */
 	}
 	tcp_tcbhashsize = hashsize;
 	tcbinfo.ipi_hashbase = hashinit(hashsize, M_PCB,
 	    &tcbinfo.ipi_hashmask);
 	tcbinfo.ipi_porthashbase = hashinit(hashsize, M_PCB,
 	    &tcbinfo.ipi_porthashmask);
 	tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
 	    NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
 #ifdef INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
 #else /* INET6 */
 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
 #endif /* INET6 */
 	if (max_protohdr < TCP_MINPROTOHDR)
 		max_protohdr = TCP_MINPROTOHDR;
 	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
 		panic("tcp_init");
 #undef TCP_MINPROTOHDR
 	/*
 	 * These have to be type stable for the benefit of the timers.
 	 */
 	tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(tcpcb_zone, maxsockets);
 	tcp_tw_init();
 	syncache_init();
 	tcp_hc_init();
 	tcp_reass_init();
 	ISN_LOCK_INIT();
 	callout_init(&isn_callout, CALLOUT_MPSAFE);
 	tcp_isn_tick(NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
 }
 
 void
 tcp_fini(void *xtp)
 {
 
 	callout_stop(&isn_callout);
 }
 
 /*
  * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
  * tcp_template used to store this data in mbufs, but we now recopy it out
  * of the tcpcb each time to conserve mbufs.
  */
 void
 tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
 {
 	struct tcphdr *th = (struct tcphdr *)tcp_ptr;
 
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		struct ip6_hdr *ip6;
 
 		ip6 = (struct ip6_hdr *)ip_ptr;
 		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 			(inp->in6p_flowinfo & IPV6_FLOWINFO_MASK);
 		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 			(IPV6_VERSION & IPV6_VERSION_MASK);
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = sizeof(struct tcphdr);
 		ip6->ip6_src = inp->in6p_laddr;
 		ip6->ip6_dst = inp->in6p_faddr;
 	} else
 #endif
 	{
 		struct ip *ip;
 
 		ip = (struct ip *)ip_ptr;
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = 5;
 		ip->ip_tos = inp->inp_ip_tos;
 		ip->ip_len = 0;
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_ttl = inp->inp_ip_ttl;
 		ip->ip_sum = 0;
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst = inp->inp_faddr;
 	}
 	th->th_sport = inp->inp_lport;
 	th->th_dport = inp->inp_fport;
 	th->th_seq = 0;
 	th->th_ack = 0;
 	th->th_x2 = 0;
 	th->th_off = 5;
 	th->th_flags = 0;
 	th->th_win = 0;
 	th->th_urp = 0;
 	th->th_sum = 0;		/* in_pseudo() is called later for ipv4 */
 }
 
 /*
  * Create template to be used to send tcp packets on a connection.
  * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only
  * use for this function is in keepalives, which use tcp_respond.
  */
 struct tcptemp *
 tcpip_maketemplate(struct inpcb *inp)
 {
 	struct mbuf *m;
 	struct tcptemp *n;
 
 	m = m_get(M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (0);
 	m->m_len = sizeof(struct tcptemp);
 	n = mtod(m, struct tcptemp *);
 
 	tcpip_fillheaders(inp, (void *)&n->tt_ipgen, (void *)&n->tt_t);
 	return (n);
 }
 
 /*
  * Send a single message to the TCP at address specified by
  * the given TCP/IP header.  If m == NULL, then we make a copy
  * of the tcpiphdr at ti and send directly to the addressed host.
  * This is used to force keep alive messages out using the TCP
  * template for a connection.  If flags are given then we send
  * a message back to the TCP which originated the * segment ti,
  * and discard the mbuf containing it and any other attached mbufs.
  *
  * In any case the ack and sequence number of the transmitted
  * segment are as specified by the parameters.
  *
  * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
  */
 void
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
     tcp_seq ack, tcp_seq seq, int flags)
 {
 	int tlen;
 	int win = 0;
 	struct ip *ip;
 	struct tcphdr *nth;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
 	int ipflags = 0;
 	struct inpcb *inp;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 
 #ifdef INET6
 	isipv6 = ((struct ip *)ipgen)->ip_v == 6;
 	ip6 = ipgen;
 #endif /* INET6 */
 	ip = ipgen;
 
 	if (tp != NULL) {
 		inp = tp->t_inpcb;
 		KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
 		INP_WLOCK_ASSERT(inp);
 	} else
 		inp = NULL;
 
 	if (tp != NULL) {
 		if (!(flags & TH_RST)) {
 			win = sbspace(&inp->inp_socket->so_rcv);
 			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
 				win = (long)TCP_MAXWIN << tp->rcv_scale;
 		}
 	}
 	if (m == NULL) {
 		m = m_gethdr(M_DONTWAIT, MT_DATA);
 		if (m == NULL)
 			return;
 		tlen = 0;
 		m->m_data += max_linkhdr;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(m, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(m, struct ip6_hdr *);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 	      {
 		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
 		ip = mtod(m, struct ip *);
 		nth = (struct tcphdr *)(ip + 1);
 	      }
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		flags = TH_ACK;
 	} else {
+		/*
+		 *  reuse the mbuf. 
+		 * XXX MRT We inherrit the FIB, which is lucky.
+		 */
 		m_freem(m->m_next);
 		m->m_next = NULL;
 		m->m_data = (caddr_t)ipgen;
 		/* m_len is set later */
 		tlen = 0;
 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 #ifdef INET6
 		if (isipv6) {
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 	      {
 		xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long);
 		nth = (struct tcphdr *)(ip + 1);
 	      }
 		if (th != nth) {
 			/*
 			 * this is usually a case when an extension header
 			 * exists between the IPv6 header and the
 			 * TCP header.
 			 */
 			nth->th_sport = th->th_sport;
 			nth->th_dport = th->th_dport;
 		}
 		xchg(nth->th_dport, nth->th_sport, n_short);
 #undef xchg
 	}
 #ifdef INET6
 	if (isipv6) {
 		ip6->ip6_flow = 0;
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) +
 						tlen));
 		tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 	} else
 #endif
 	{
 		tlen += sizeof (struct tcpiphdr);
 		ip->ip_len = tlen;
 		ip->ip_ttl = ip_defttl;
 		if (path_mtu_discovery)
 			ip->ip_off |= IP_DF;
 	}
 	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 	if (inp != NULL) {
 		/*
 		 * Packet is associated with a socket, so allow the
 		 * label of the response to reflect the socket label.
 		 */
 		INP_WLOCK_ASSERT(inp);
 		mac_inpcb_create_mbuf(inp, m);
 	} else {
 		/*
 		 * Packet is not associated with a socket, so possibly
 		 * update the label in place.
 		 */
 		mac_netinet_tcp_reply(m);
 	}
 #endif
 	nth->th_seq = htonl(seq);
 	nth->th_ack = htonl(ack);
 	nth->th_x2 = 0;
 	nth->th_off = sizeof (struct tcphdr) >> 2;
 	nth->th_flags = flags;
 	if (tp != NULL)
 		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
 	else
 		nth->th_win = htons((u_short)win);
 	nth->th_urp = 0;
 #ifdef INET6
 	if (isipv6) {
 		nth->th_sum = 0;
 		nth->th_sum = in6_cksum(m, IPPROTO_TCP,
 					sizeof(struct ip6_hdr),
 					tlen - sizeof(struct ip6_hdr));
 		ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
 		    NULL, NULL);
 	} else
 #endif /* INET6 */
 	{
 		nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 	}
 #ifdef TCPDEBUG
 	if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
 #ifdef INET6
 	if (isipv6)
 		(void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
 	else
 #endif /* INET6 */
 	(void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
 }
 
 /*
  * Create a new TCP control block, making an
  * empty reassembly queue and hooking it to the argument
  * protocol control block.  The `inp' parameter must have
  * come from the zone allocator set up in tcp_init().
  */
 struct tcpcb *
 tcp_newtcpcb(struct inpcb *inp)
 {
 	struct tcpcb_mem *tm;
 	struct tcpcb *tp;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 
 	tm = uma_zalloc(tcpcb_zone, M_NOWAIT | M_ZERO);
 	if (tm == NULL)
 		return (NULL);
 	tp = &tm->tcb;
 	tp->t_timers = &tm->tt;
 	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
 	tp->t_maxseg = tp->t_maxopd =
 #ifdef INET6
 		isipv6 ? tcp_v6mssdflt :
 #endif /* INET6 */
 		tcp_mssdflt;
 
 	/* Set up our timeouts. */
 	callout_init(&tp->t_timers->tt_rexmt, CALLOUT_MPSAFE);
 	callout_init(&tp->t_timers->tt_persist, CALLOUT_MPSAFE);
 	callout_init(&tp->t_timers->tt_keep, CALLOUT_MPSAFE);
 	callout_init(&tp->t_timers->tt_2msl, CALLOUT_MPSAFE);
 	callout_init(&tp->t_timers->tt_delack, CALLOUT_MPSAFE);
 
 	if (tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (tcp_do_sack)
 		tp->t_flags |= TF_SACK_PERMIT;
 	TAILQ_INIT(&tp->snd_holes);
 	tp->t_inpcb = inp;	/* XXX */
 	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
 	 * reasonable initial retransmit time.
 	 */
 	tp->t_srtt = TCPTV_SRTTBASE;
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = tcp_rexmit_min;
 	tp->t_rxtcur = TCPTV_RTOBASE;
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->t_rcvtime = ticks;
 	tp->t_bw_rtttime = ticks;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = ip_defttl;
 	inp->inp_ppcb = tp;
 	return (tp);		/* XXX */
 }
 
 /*
  * Drop a TCP connection, reporting
  * the specified error.  If connection is synchronized,
  * then send a RST to peer.
  */
 struct tcpcb *
 tcp_drop(struct tcpcb *tp, int errno)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
 		tp->t_state = TCPS_CLOSED;
 		(void) tcp_output_reset(tp);
 		tcpstat.tcps_drops++;
 	} else
 		tcpstat.tcps_conndrops++;
 	if (errno == ETIMEDOUT && tp->t_softerror)
 		errno = tp->t_softerror;
 	so->so_error = errno;
 	return (tcp_close(tp));
 }
 
 void
 tcp_discardcb(struct tcpcb *tp)
 {
 	struct tseg_qent *q;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Make sure that all of our timers are stopped before we
 	 * delete the PCB.
 	 */
 	callout_stop(&tp->t_timers->tt_rexmt);
 	callout_stop(&tp->t_timers->tt_persist);
 	callout_stop(&tp->t_timers->tt_keep);
 	callout_stop(&tp->t_timers->tt_2msl);
 	callout_stop(&tp->t_timers->tt_delack);
 
 	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
 	 * 'Enough' is arbitrarily defined as 4 rtt samples.
 	 * 4 samples is enough for the srtt filter to converge
 	 * to within enough % of the correct value; fewer samples
 	 * and we could save a bogus rtt. The danger is not high
 	 * as tcp quickly recovers from everything.
 	 * XXX: Works very well but needs some more statistics!
 	 */
 	if (tp->t_rttupdated >= 4) {
 		struct hc_metrics_lite metrics;
 		u_long ssthresh;
 
 		bzero(&metrics, sizeof(metrics));
 		/*
 		 * Update the ssthresh always when the conditions below
 		 * are satisfied. This gives us better new start value
 		 * for the congestion avoidance for new connections.
 		 * ssthresh is only set if packet loss occured on a session.
 		 *
 		 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
 		 * being torn down.  Ideally this code would not use 'so'.
 		 */
 		ssthresh = tp->snd_ssthresh;
 		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
 			/*
 			 * convert the limit from user data bytes to
 			 * packets then to packet data bytes.
 			 */
 			ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
 			if (ssthresh < 2)
 				ssthresh = 2;
 			ssthresh *= (u_long)(tp->t_maxseg +
 #ifdef INET6
 				      (isipv6 ? sizeof (struct ip6_hdr) +
 					       sizeof (struct tcphdr) :
 #endif
 				       sizeof (struct tcpiphdr)
 #ifdef INET6
 				       )
 #endif
 				      );
 		} else
 			ssthresh = 0;
 		metrics.rmx_ssthresh = ssthresh;
 
 		metrics.rmx_rtt = tp->t_srtt;
 		metrics.rmx_rttvar = tp->t_rttvar;
 		/* XXX: This wraps if the pipe is more than 4 Gbit per second */
 		metrics.rmx_bandwidth = tp->snd_bandwidth;
 		metrics.rmx_cwnd = tp->snd_cwnd;
 		metrics.rmx_sendpipe = 0;
 		metrics.rmx_recvpipe = 0;
 
 		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
 
 	/* free the reassembly queue, if any */
 	while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
 		LIST_REMOVE(q, tqe_q);
 		m_freem(q->tqe_m);
 		uma_zfree(tcp_reass_zone, q);
 		tp->t_segqlen--;
 		tcp_reass_qsize--;
 	}
 	/* Disconnect offload device, if any. */
 	tcp_offload_detach(tp);
 		
 	tcp_free_sackholes(tp);
 	inp->inp_ppcb = NULL;
 	tp->t_inpcb = NULL;
 	uma_zfree(tcpcb_zone, tp);
 }
 
 /*
  * Attempt to close a TCP control block, marking it as dropped, and freeing
  * the socket if we hold the only reference.
  */
 struct tcpcb *
 tcp_close(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	/* Notify any offload devices of listener close */
 	if (tp->t_state == TCPS_LISTEN)
 		tcp_offload_listen_close(tp);
 	in_pcbdrop(inp);
 	tcpstat.tcps_closed++;
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
 	so = inp->inp_socket;
 	soisdisconnected(so);
 	if (inp->inp_vflag & INP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("tcp_close: !SS_PROTOREF"));
 		inp->inp_vflag &= ~INP_SOCKREF;
 		INP_WUNLOCK(inp);
 		ACCEPT_LOCK();
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 		return (NULL);
 	}
 	return (tp);
 }
 
 void
 tcp_drain(void)
 {
 
 	if (do_tcpdrain) {
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
 		struct tseg_qent *te;
 
 	/*
 	 * Walk the tcpbs, if existing, and flush the reassembly queue,
 	 * if there is one...
 	 * XXX: The "Net/3" implementation doesn't imply that the TCP
 	 *      reassembly queue should be flushed, but in a situation
 	 *	where we're really low on mbufs, this is potentially
 	 *	usefull.
 	 */
 		INP_INFO_RLOCK(&tcbinfo);
 		LIST_FOREACH(inpb, tcbinfo.ipi_listhead, inp_list) {
 			if (inpb->inp_vflag & INP_TIMEWAIT)
 				continue;
 			INP_WLOCK(inpb);
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
 				while ((te = LIST_FIRST(&tcpb->t_segq))
 			            != NULL) {
 					LIST_REMOVE(te, tqe_q);
 					m_freem(te->tqe_m);
 					uma_zfree(tcp_reass_zone, te);
 					tcpb->t_segqlen--;
 					tcp_reass_qsize--;
 				}
 				tcp_clean_sackreport(tcpb);
 			}
 			INP_WUNLOCK(inpb);
 		}
 		INP_INFO_RUNLOCK(&tcbinfo);
 	}
 }
 
 /*
  * Notify a tcp user of an asynchronous error;
  * store error as soft error, but wake up user
  * (for now, won't do anything until can select for soft error).
  *
  * Do not wake up user since there currently is no mechanism for
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 static struct inpcb *
 tcp_notify(struct inpcb *inp, int error)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_vflag & INP_TIMEWAIT) ||
 	    (inp->inp_vflag & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
 
 	/*
 	 * Ignore some errors if we are hooked up.
 	 * If connection hasn't completed, has retransmitted several times,
 	 * and receives a second error, give up now.  This is better
 	 * than waiting a long time to establish a connection that
 	 * can never complete.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN)) {
 		return (inp);
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror) {
 		tp = tcp_drop(tp, error);
 		if (tp != NULL)
 			return (inp);
 		else
 			return (NULL);
 	} else {
 		tp->t_softerror = error;
 		return (inp);
 	}
 #if 0
 	wakeup( &so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 #endif
 }
 
 static int
 tcp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, m, n, pcb_count;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		m = syncache_pcbcount();
 		n = tcbinfo.ipi_count;
 		req->oldidx = 2 * (sizeof xig)
 			+ ((m + n) + n/8) * sizeof(struct xtcpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_RLOCK(&tcbinfo);
 	gencnt = tcbinfo.ipi_gencnt;
 	n = tcbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&tcbinfo);
 
 	m = syncache_pcbcount();
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ (n + m) * sizeof(struct xtcpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n + m;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	error = syncache_pcblist(req, m, &pcb_count);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == NULL)
 		return (ENOMEM);
 
 	INP_INFO_RLOCK(&tcbinfo);
 	for (inp = LIST_FIRST(tcbinfo.ipi_listhead), i = 0; inp != NULL && i
 	    < n; inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			/*
 			 * XXX: This use of cr_cansee(), introduced with
 			 * TCP state changes, is not quite right, but for
 			 * now, better than nothing.
 			 */
 			if (inp->inp_vflag & INP_TIMEWAIT) {
 				if (intotw(inp) != NULL)
 					error = cr_cansee(req->td->td_ucred,
 					    intotw(inp)->tw_cred);
 				else
 					error = EINVAL;	/* Skip this inp. */
 			} else
 				error = cr_canseesocket(req->td->td_ucred,
 				    inp->inp_socket);
 			if (error == 0)
 				inp_list[i++] = inp;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&tcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xtcpcb xt;
 			void *inp_ppcb;
 
 			bzero(&xt, sizeof(xt));
 			xt.xt_len = sizeof xt;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xt.xt_inp, sizeof *inp);
 			inp_ppcb = inp->inp_ppcb;
 			if (inp_ppcb == NULL)
 				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
 			else if (inp->inp_vflag & INP_TIMEWAIT) {
 				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
 				xt.xt_tp.t_state = TCPS_TIME_WAIT;
 			} else
 				bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
 			if (inp->inp_socket != NULL)
 				sotoxsocket(inp->inp_socket, &xt.xt_socket);
 			else {
 				bzero(&xt.xt_socket, sizeof xt.xt_socket);
 				xt.xt_socket.xso_protocol = IPPROTO_TCP;
 			}
 			xt.xt_inp.inp_gencnt = inp->inp_gencnt;
 			INP_WUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 		} else
 			INP_WUNLOCK(inp);
 	
 	}
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&tcbinfo);
 		xig.xig_gen = tcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = tcbinfo.ipi_count + pcb_count;
 		INP_INFO_RUNLOCK(&tcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
     tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
 
 static int
 tcp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	INP_INFO_RLOCK(&tcbinfo);
 	inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
 	if (inp == NULL) {
 		error = ENOENT;
 		goto outunlocked;
 	}
 	INP_WLOCK(inp);
 	if (inp->inp_socket == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
 	if (error)
 		goto out;
 	cru2x(inp->inp_socket->so_cred, &xuc);
 out:
 	INP_WUNLOCK(inp);
 outunlocked:
 	INP_INFO_RUNLOCK(&tcbinfo);
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
 
 #ifdef INET6
 static int
 tcp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error, mapped = 0;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
 		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
 			mapped = 1;
 		else
 			return (EINVAL);
 	}
 
 	INP_INFO_RLOCK(&tcbinfo);
 	if (mapped == 1)
 		inp = in_pcblookup_hash(&tcbinfo,
 			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
 			addrs[1].sin6_port,
 			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
 			addrs[0].sin6_port,
 			0, NULL);
 	else
 		inp = in6_pcblookup_hash(&tcbinfo,
 			&addrs[1].sin6_addr, addrs[1].sin6_port,
 			&addrs[0].sin6_addr, addrs[0].sin6_port, 0, NULL);
 	if (inp == NULL) {
 		error = ENOENT;
 		goto outunlocked;
 	}
 	INP_WLOCK(inp);
 	if (inp->inp_socket == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
 	if (error)
 		goto out;
 	cru2x(inp->inp_socket->so_cred, &xuc);
 out:
 	INP_WUNLOCK(inp);
 outunlocked:
 	INP_INFO_RUNLOCK(&tcbinfo);
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
 #endif
 
 
 void
 tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct ip *ip = vip;
 	struct tcphdr *th;
 	struct in_addr faddr;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct icmp *icp;
 	struct in_conninfo inc;
 	tcp_seq icmp_tcp_seq;
 	int mtu;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc;
 	else if (icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
 		notify = tcp_drop_syn_sent;
 	/*
 	 * Redirects don't need to be handled up here.
 	 */
 	else if (PRC_IS_REDIRECT(cmd))
 		return;
 	/*
 	 * Source quench is depreciated.
 	 */
 	else if (cmd == PRC_QUENCH)
 		return;
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an
 	 * excellent DoS attack on machines with many connections.
 	 */
 	else if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 	if (ip != NULL) {
 		icp = (struct icmp *)((caddr_t)ip
 				      - offsetof(struct icmp, icmp_ip));
 		th = (struct tcphdr *)((caddr_t)ip
 				       + (ip->ip_hl << 2));
 		INP_INFO_WLOCK(&tcbinfo);
 		inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport,
 		    ip->ip_src, th->th_sport, 0, NULL);
 		if (inp != NULL)  {
 			INP_WLOCK(inp);
 			if (!(inp->inp_vflag & INP_TIMEWAIT) &&
 			    !(inp->inp_vflag & INP_DROPPED) &&
 			    !(inp->inp_socket == NULL)) {
 				icmp_tcp_seq = htonl(th->th_seq);
 				tp = intotcpcb(inp);
 				if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
 				    SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
 					if (cmd == PRC_MSGSIZE) {
 					    /*
 					     * MTU discovery:
 					     * If we got a needfrag set the MTU
 					     * in the route to the suggested new
 					     * value (if given) and then notify.
 					     */
 					    bzero(&inc, sizeof(inc));
 					    inc.inc_flags = 0;	/* IPv4 */
 					    inc.inc_faddr = faddr;
+					    inc.inc_fibnum =
+						inp->inp_inc.inc_fibnum;
 
 					    mtu = ntohs(icp->icmp_nextmtu);
 					    /*
 					     * If no alternative MTU was
 					     * proposed, try the next smaller
 					     * one.  ip->ip_len has already
 					     * been swapped in icmp_input().
 					     */
 					    if (!mtu)
 						mtu = ip_next_mtu(ip->ip_len,
 						 1);
 					    if (mtu < max(296, (tcp_minmss)
 						 + sizeof(struct tcpiphdr)))
 						mtu = 0;
 					    if (!mtu)
 						mtu = tcp_mssdflt
 						 + sizeof(struct tcpiphdr);
 					    /*
 					     * Only cache the the MTU if it
 					     * is smaller than the interface
 					     * or route MTU.  tcp_mtudisc()
 					     * will do right thing by itself.
 					     */
 					    if (mtu <= tcp_maxmtu(&inc, NULL))
 						tcp_hc_updatemtu(&inc, mtu);
 					}
 
 					inp = (*notify)(inp, inetctlerrmap[cmd]);
 				}
 			}
 			if (inp != NULL)
 				INP_WUNLOCK(inp);
 		} else {
 			inc.inc_fport = th->th_dport;
 			inc.inc_lport = th->th_sport;
 			inc.inc_faddr = faddr;
 			inc.inc_laddr = ip->ip_src;
 #ifdef INET6
 			inc.inc_isipv6 = 0;
 #endif
 			syncache_unreach(&inc, th);
 		}
 		INP_INFO_WUNLOCK(&tcbinfo);
 	} else
 		in_pcbnotifyall(&tcbinfo, faddr, inetctlerrmap[cmd], notify);
 }
 
 #ifdef INET6
 void
 tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct tcphdr th;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	int off;
 	struct tcp_portonly {
 		u_int16_t th_sport;
 		u_int16_t th_dport;
 	} *thp;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc;
 	else if (!PRC_IS_REDIRECT(cmd) &&
 		 ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
 		return;
 	/* Source quench is depreciated. */
 	else if (cmd == PRC_QUENCH)
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		off = 0;	/* fool gcc */
 		sa6_src = &sa6_any;
 	}
 
 	if (ip6 != NULL) {
 		struct in_conninfo inc;
 		/*
 		 * XXX: We assume that when IPV6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		/* check if we can safely examine src and dst ports */
 		if (m->m_pkthdr.len < off + sizeof(*thp))
 			return;
 
 		bzero(&th, sizeof(th));
 		m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
 
 		in6_pcbnotify(&tcbinfo, sa, th.th_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src,
 		    th.th_sport, cmd, NULL, notify);
 
 		inc.inc_fport = th.th_dport;
 		inc.inc_lport = th.th_sport;
 		inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
 		inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
 		inc.inc_isipv6 = 1;
 		INP_INFO_WLOCK(&tcbinfo);
 		syncache_unreach(&inc, &th);
 		INP_INFO_WUNLOCK(&tcbinfo);
 	} else
 		in6_pcbnotify(&tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
 			      0, cmd, NULL, notify);
 }
 #endif /* INET6 */
 
 
 /*
  * Following is where TCP initial sequence number generation occurs.
  *
  * There are two places where we must use initial sequence numbers:
  * 1.  In SYN-ACK packets.
  * 2.  In SYN packets.
  *
  * All ISNs for SYN-ACK packets are generated by the syncache.  See
  * tcp_syncache.c for details.
  *
  * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
  * depends on this property.  In addition, these ISNs should be
  * unguessable so as to prevent connection hijacking.  To satisfy
  * the requirements of this situation, the algorithm outlined in
  * RFC 1948 is used, with only small modifications.
  *
  * Implementation details:
  *
  * Time is based off the system timer, and is corrected so that it
  * increases by one megabyte per second.  This allows for proper
  * recycling on high speed LANs while still leaving over an hour
  * before rollover.
  *
  * As reading the *exact* system time is too expensive to be done
  * whenever setting up a TCP connection, we increment the time
  * offset in two ways.  First, a small random positive increment
  * is added to isn_offset for each connection that is set up.
  * Second, the function tcp_isn_tick fires once per clock tick
  * and increments isn_offset as necessary so that sequence numbers
  * are incremented at approximately ISN_BYTES_PER_SECOND.  The
  * random positive increments serve only to ensure that the same
  * exact sequence number is never sent out twice (as could otherwise
  * happen when a port is recycled in less than the system tick
  * interval.)
  *
  * net.inet.tcp.isn_reseed_interval controls the number of seconds
  * between seeding of isn_secret.  This is normally set to zero,
  * as reseeding should not be necessary.
  *
  * Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
  * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock.  In
  * general, this means holding an exclusive (write) lock.
  */
 
 #define ISN_BYTES_PER_SECOND 1048576
 #define ISN_STATIC_INCREMENT 4096
 #define ISN_RANDOM_INCREMENT (4096 - 1)
 
 static u_char isn_secret[32];
 static int isn_last_reseed;
 static u_int32_t isn_offset, isn_offset_old;
 static MD5_CTX isn_ctx;
 
 tcp_seq
 tcp_new_isn(struct tcpcb *tp)
 {
 	u_int32_t md5_buffer[4];
 	tcp_seq new_isn;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	ISN_LOCK();
 	/* Seed if this is the first use, reseed if requested. */
 	if ((isn_last_reseed == 0) || ((tcp_isn_reseed_interval > 0) &&
 	     (((u_int)isn_last_reseed + (u_int)tcp_isn_reseed_interval*hz)
 		< (u_int)ticks))) {
 		read_random(&isn_secret, sizeof(isn_secret));
 		isn_last_reseed = ticks;
 	}
 
 	/* Compute the md5 hash and return the ISN. */
 	MD5Init(&isn_ctx);
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short));
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short));
 #ifdef INET6
 	if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) {
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr,
 			  sizeof(struct in6_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr,
 			  sizeof(struct in6_addr));
 	} else
 #endif
 	{
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr,
 			  sizeof(struct in_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr,
 			  sizeof(struct in_addr));
 	}
 	MD5Update(&isn_ctx, (u_char *) &isn_secret, sizeof(isn_secret));
 	MD5Final((u_char *) &md5_buffer, &isn_ctx);
 	new_isn = (tcp_seq) md5_buffer[0];
 	isn_offset += ISN_STATIC_INCREMENT +
 		(arc4random() & ISN_RANDOM_INCREMENT);
 	new_isn += isn_offset;
 	ISN_UNLOCK();
 	return (new_isn);
 }
 
 /*
  * Increment the offset to the next ISN_BYTES_PER_SECOND / 100 boundary
  * to keep time flowing at a relatively constant rate.  If the random
  * increments have already pushed us past the projected offset, do nothing.
  */
 static void
 tcp_isn_tick(void *xtp)
 {
 	u_int32_t projected_offset;
 
 	ISN_LOCK();
 	projected_offset = isn_offset_old + ISN_BYTES_PER_SECOND / 100;
 
 	if (SEQ_GT(projected_offset, isn_offset))
 		isn_offset = projected_offset;
 
 	isn_offset_old = isn_offset;
 	callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
 	ISN_UNLOCK();
 }
 
 /*
  * When a specific ICMP unreachable message is received and the
  * connection state is SYN-SENT, drop the connection.  This behavior
  * is controlled by the icmp_may_rst sysctl.
  */
 struct inpcb *
 tcp_drop_syn_sent(struct inpcb *inp, int errno)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_vflag & INP_TIMEWAIT) ||
 	    (inp->inp_vflag & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	if (tp->t_state != TCPS_SYN_SENT)
 		return (inp);
 
 	tp = tcp_drop(tp, errno);
 	if (tp != NULL)
 		return (inp);
 	else
 		return (NULL);
 }
 
 /*
  * When `need fragmentation' ICMP is received, update our idea of the MSS
  * based on the new value in the route.  Also nudge TCP to send something,
  * since we know the packet we just sent was dropped.
  * This duplicates some code in the tcp_mss() function in tcp_input.c.
  */
 struct inpcb *
 tcp_mtudisc(struct inpcb *inp, int errno)
 {
 	struct tcpcb *tp;
 	struct socket *so = inp->inp_socket;
 	u_int maxmtu;
 	u_int romtu;
 	int mss;
 #ifdef INET6
 	int isipv6;
 #endif /* INET6 */
 
 	INP_WLOCK_ASSERT(inp);
 	if ((inp->inp_vflag & INP_TIMEWAIT) ||
 	    (inp->inp_vflag & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
 
 #ifdef INET6
 	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif
 	maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
 	romtu =
 #ifdef INET6
 	    isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) :
 #endif /* INET6 */
 	    tcp_maxmtu(&inp->inp_inc, NULL);
 	if (!maxmtu)
 		maxmtu = romtu;
 	else
 		maxmtu = min(maxmtu, romtu);
 	if (!maxmtu) {
 		tp->t_maxopd = tp->t_maxseg =
 #ifdef INET6
 			isipv6 ? tcp_v6mssdflt :
 #endif /* INET6 */
 			tcp_mssdflt;
 		return (inp);
 	}
 	mss = maxmtu -
 #ifdef INET6
 		(isipv6 ? sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
 #endif /* INET6 */
 		 sizeof(struct tcpiphdr)
 #ifdef INET6
 		 )
 #endif /* INET6 */
 		;
 
 	/*
 	 * XXX - The above conditional probably violates the TCP
 	 * spec.  The problem is that, since we don't know the
 	 * other end's MSS, we are supposed to use a conservative
 	 * default.  But, if we do that, then MTU discovery will
 	 * never actually take place, because the conservative
 	 * default is much less than the MTUs typically seen
 	 * on the Internet today.  For the moment, we'll sweep
 	 * this under the carpet.
 	 *
 	 * The conservative default might not actually be a problem
 	 * if the only case this occurs is when sending an initial
 	 * SYN with options and data to a host we've never talked
 	 * to before.  Then, they will reply with an MSS value which
 	 * will get recorded and the new parameters should get
 	 * recomputed.  For Further Study.
 	 */
 	if (tp->t_maxopd <= mss)
 		return (inp);
 	tp->t_maxopd = mss;
 
 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
 	    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
 		mss -= TCPOLEN_TSTAMP_APPA;
 #if	(MCLBYTES & (MCLBYTES - 1)) == 0
 	if (mss > MCLBYTES)
 		mss &= ~(MCLBYTES-1);
 #else
 	if (mss > MCLBYTES)
 		mss = mss / MCLBYTES * MCLBYTES;
 #endif
 	if (so->so_snd.sb_hiwat < mss)
 		mss = so->so_snd.sb_hiwat;
 
 	tp->t_maxseg = mss;
 
 	tcpstat.tcps_mturesent++;
 	tp->t_rtttime = 0;
 	tp->snd_nxt = tp->snd_una;
 	tcp_free_sackholes(tp);
 	tp->snd_recover = tp->snd_max;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		EXIT_FASTRECOVERY(tp);
 	tcp_output_send(tp);
 	return (inp);
 }
 
 /*
  * Look-up the routing entry to the peer of this inpcb.  If no route
  * is found and it cannot be allocated, then return NULL.  This routine
  * is called by TCP routines that access the rmx structure and by tcp_mss
  * to get the interface MTU.
  */
 u_long
 tcp_maxmtu(struct in_conninfo *inc, int *flags)
 {
 	struct route sro;
 	struct sockaddr_in *dst;
 	struct ifnet *ifp;
 	u_long maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
 
 	bzero(&sro, sizeof(sro));
 	if (inc->inc_faddr.s_addr != INADDR_ANY) {
 	        dst = (struct sockaddr_in *)&sro.ro_dst;
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = inc->inc_faddr;
-		rtalloc_ign(&sro, RTF_CLONING);
+		in_rtalloc_ign(&sro, RTF_CLONING, inc->inc_fibnum);
 	}
 	if (sro.ro_rt != NULL) {
 		ifp = sro.ro_rt->rt_ifp;
 		if (sro.ro_rt->rt_rmx.rmx_mtu == 0)
 			maxmtu = ifp->if_mtu;
 		else
 			maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
 
 		/* Report additional interface capabilities. */
 		if (flags != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO4 &&
 			    ifp->if_hwassist & CSUM_TSO)
 				*flags |= CSUM_TSO;
 		}
 		RTFREE(sro.ro_rt);
 	}
 	return (maxmtu);
 }
 
 #ifdef INET6
 u_long
 tcp_maxmtu6(struct in_conninfo *inc, int *flags)
 {
 	struct route_in6 sro6;
 	struct ifnet *ifp;
 	u_long maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
 
 	bzero(&sro6, sizeof(sro6));
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
 		sro6.ro_dst.sin6_family = AF_INET6;
 		sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
 		sro6.ro_dst.sin6_addr = inc->inc6_faddr;
 		rtalloc_ign((struct route *)&sro6, RTF_CLONING);
 	}
 	if (sro6.ro_rt != NULL) {
 		ifp = sro6.ro_rt->rt_ifp;
 		if (sro6.ro_rt->rt_rmx.rmx_mtu == 0)
 			maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
 		else
 			maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
 				     IN6_LINKMTU(sro6.ro_rt->rt_ifp));
 
 		/* Report additional interface capabilities. */
 		if (flags != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO6 &&
 			    ifp->if_hwassist & CSUM_TSO)
 				*flags |= CSUM_TSO;
 		}
 		RTFREE(sro6.ro_rt);
 	}
 
 	return (maxmtu);
 }
 #endif /* INET6 */
 
 #ifdef IPSEC
 /* compute ESP/AH header size for TCP, including outer IP header. */
 size_t
 ipsec_hdrsiz_tcp(struct tcpcb *tp)
 {
 	struct inpcb *inp;
 	struct mbuf *m;
 	size_t hdrsiz;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct tcphdr *th;
 
 	if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL))
 		return (0);
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (!m)
 		return (0);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)(ip6 + 1);
 		m->m_pkthdr.len = m->m_len =
 			sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		tcpip_fillheaders(inp, ip6, th);
 		hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
 	} else
 #endif /* INET6 */
 	{
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)(ip + 1);
 		m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
 		tcpip_fillheaders(inp, ip, th);
 		hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
 	}
 
 	m_free(m);
 	return (hdrsiz);
 }
 #endif /* IPSEC */
 
 /*
  * TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
  *
  * This code attempts to calculate the bandwidth-delay product as a
  * means of determining the optimal window size to maximize bandwidth,
  * minimize RTT, and avoid the over-allocation of buffers on interfaces and
  * routers.  This code also does a fairly good job keeping RTTs in check
  * across slow links like modems.  We implement an algorithm which is very
  * similar (but not meant to be) TCP/Vegas.  The code operates on the
  * transmitter side of a TCP connection and so only effects the transmit
  * side of the connection.
  *
  * BACKGROUND:  TCP makes no provision for the management of buffer space
  * at the end points or at the intermediate routers and switches.  A TCP
  * stream, whether using NewReno or not, will eventually buffer as
  * many packets as it is able and the only reason this typically works is
  * due to the fairly small default buffers made available for a connection
  * (typicaly 16K or 32K).  As machines use larger windows and/or window
  * scaling it is now fairly easy for even a single TCP connection to blow-out
  * all available buffer space not only on the local interface, but on
  * intermediate routers and switches as well.  NewReno makes a misguided
  * attempt to 'solve' this problem by waiting for an actual failure to occur,
  * then backing off, then steadily increasing the window again until another
  * failure occurs, ad-infinitum.  This results in terrible oscillation that
  * is only made worse as network loads increase and the idea of intentionally
  * blowing out network buffers is, frankly, a terrible way to manage network
  * resources.
  *
  * It is far better to limit the transmit window prior to the failure
  * condition being achieved.  There are two general ways to do this:  First
  * you can 'scan' through different transmit window sizes and locate the
  * point where the RTT stops increasing, indicating that you have filled the
  * pipe, then scan backwards until you note that RTT stops decreasing, then
  * repeat ad-infinitum.  This method works in principle but has severe
  * implementation issues due to RTT variances, timer granularity, and
  * instability in the algorithm which can lead to many false positives and
  * create oscillations as well as interact badly with other TCP streams
  * implementing the same algorithm.
  *
  * The second method is to limit the window to the bandwidth delay product
  * of the link.  This is the method we implement.  RTT variances and our
  * own manipulation of the congestion window, bwnd, can potentially
  * destabilize the algorithm.  For this reason we have to stabilize the
  * elements used to calculate the window.  We do this by using the minimum
  * observed RTT, the long term average of the observed bandwidth, and
  * by adding two segments worth of slop.  It isn't perfect but it is able
  * to react to changing conditions and gives us a very stable basis on
  * which to extend the algorithm.
  */
 void
 tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq)
 {
 	u_long bw;
 	u_long bwnd;
 	int save_ticks;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * If inflight_enable is disabled in the middle of a tcp connection,
 	 * make sure snd_bwnd is effectively disabled.
 	 */
 	if (tcp_inflight_enable == 0 || tp->t_rttlow < tcp_inflight_rttthresh) {
 		tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 		tp->snd_bandwidth = 0;
 		return;
 	}
 
 	/*
 	 * Figure out the bandwidth.  Due to the tick granularity this
 	 * is a very rough number and it MUST be averaged over a fairly
 	 * long period of time.  XXX we need to take into account a link
 	 * that is not using all available bandwidth, but for now our
 	 * slop will ramp us up if this case occurs and the bandwidth later
 	 * increases.
 	 *
 	 * Note: if ticks rollover 'bw' may wind up negative.  We must
 	 * effectively reset t_bw_rtttime for this case.
 	 */
 	save_ticks = ticks;
 	if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1)
 		return;
 
 	bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz /
 	    (save_ticks - tp->t_bw_rtttime);
 	tp->t_bw_rtttime = save_ticks;
 	tp->t_bw_rtseq = ack_seq;
 	if (tp->t_bw_rtttime == 0 || (int)bw < 0)
 		return;
 	bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4;
 
 	tp->snd_bandwidth = bw;
 
 	/*
 	 * Calculate the semi-static bandwidth delay product, plus two maximal
 	 * segments.  The additional slop puts us squarely in the sweet
 	 * spot and also handles the bandwidth run-up case and stabilization.
 	 * Without the slop we could be locking ourselves into a lower
 	 * bandwidth.
 	 *
 	 * Situations Handled:
 	 *	(1) Prevents over-queueing of packets on LANs, especially on
 	 *	    high speed LANs, allowing larger TCP buffers to be
 	 *	    specified, and also does a good job preventing
 	 *	    over-queueing of packets over choke points like modems
 	 *	    (at least for the transmit side).
 	 *
 	 *	(2) Is able to handle changing network loads (bandwidth
 	 *	    drops so bwnd drops, bandwidth increases so bwnd
 	 *	    increases).
 	 *
 	 *	(3) Theoretically should stabilize in the face of multiple
 	 *	    connections implementing the same algorithm (this may need
 	 *	    a little work).
 	 *
 	 *	(4) Stability value (defaults to 20 = 2 maximal packets) can
 	 *	    be adjusted with a sysctl but typically only needs to be
 	 *	    on very slow connections.  A value no smaller then 5
 	 *	    should be used, but only reduce this default if you have
 	 *	    no other choice.
 	 */
 #define USERTT	((tp->t_srtt + tp->t_rttbest) / 2)
 	bwnd = (int64_t)bw * USERTT / (hz << TCP_RTT_SHIFT) + tcp_inflight_stab * tp->t_maxseg / 10;
 #undef USERTT
 
 	if (tcp_inflight_debug > 0) {
 		static int ltime;
 		if ((u_int)(ticks - ltime) >= hz / tcp_inflight_debug) {
 			ltime = ticks;
 			printf("%p bw %ld rttbest %d srtt %d bwnd %ld\n",
 			    tp,
 			    bw,
 			    tp->t_rttbest,
 			    tp->t_srtt,
 			    bwnd
 			);
 		}
 	}
 	if ((long)bwnd < tcp_inflight_min)
 		bwnd = tcp_inflight_min;
 	if (bwnd > tcp_inflight_max)
 		bwnd = tcp_inflight_max;
 	if ((long)bwnd < tp->t_maxseg * 2)
 		bwnd = tp->t_maxseg * 2;
 	tp->snd_bwnd = bwnd;
 }
 
 #ifdef TCP_SIGNATURE
 /*
  * Callback function invoked by m_apply() to digest TCP segment data
  * contained within an mbuf chain.
  */
 static int
 tcp_signature_apply(void *fstate, void *data, u_int len)
 {
 
 	MD5Update(fstate, (u_char *)data, len);
 	return (0);
 }
 
 /*
  * Compute TCP-MD5 hash of a TCPv4 segment. (RFC2385)
  *
  * Parameters:
  * m		pointer to head of mbuf chain
  * off0		offset to TCP header within the mbuf chain
  * len		length of TCP segment data, excluding options
  * optlen	length of TCP segment options
  * buf		pointer to storage for computed MD5 digest
  * direction	direction of flow (IPSEC_DIR_INBOUND or OUTBOUND)
  *
  * We do this over ip, tcphdr, segment data, and the key in the SADB.
  * When called from tcp_input(), we can be sure that th_sum has been
  * zeroed out and verified already.
  *
  * This function is for IPv4 use only. Calling this function with an
  * IPv6 packet in the mbuf chain will yield undefined results.
  *
  * Return 0 if successful, otherwise return -1.
  *
  * XXX The key is retrieved from the system's PF_KEY SADB, by keying a
  * search with the destination IP address, and a 'magic SPI' to be
  * determined by the application. This is hardcoded elsewhere to 1179
  * right now. Another branch of this code exists which uses the SPD to
  * specify per-application flows but it is unstable.
  */
 int
 tcp_signature_compute(struct mbuf *m, int off0, int len, int optlen,
     u_char *buf, u_int direction)
 {
 	union sockaddr_union dst;
 	struct ippseudo ippseudo;
 	MD5_CTX ctx;
 	int doff;
 	struct ip *ip;
 	struct ipovly *ipovly;
 	struct secasvar *sav;
 	struct tcphdr *th;
 	u_short savecsum;
 
 	KASSERT(m != NULL, ("NULL mbuf chain"));
 	KASSERT(buf != NULL, ("NULL signature pointer"));
 
 	/* Extract the destination from the IP header in the mbuf. */
 	ip = mtod(m, struct ip *);
 	bzero(&dst, sizeof(union sockaddr_union));
 	dst.sa.sa_len = sizeof(struct sockaddr_in);
 	dst.sa.sa_family = AF_INET;
 	dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ?
 	    ip->ip_src : ip->ip_dst;
 
 	/* Look up an SADB entry which matches the address of the peer. */
 	sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI));
 	if (sav == NULL) {
 		printf("%s: SADB lookup failed for %s\n", __func__,
 		    inet_ntoa(dst.sin.sin_addr));
 		return (EINVAL);
 	}
 
 	MD5Init(&ctx);
 	ipovly = (struct ipovly *)ip;
 	th = (struct tcphdr *)((u_char *)ip + off0);
 	doff = off0 + sizeof(struct tcphdr) + optlen;
 
 	/*
 	 * Step 1: Update MD5 hash with IP pseudo-header.
 	 *
 	 * XXX The ippseudo header MUST be digested in network byte order,
 	 * or else we'll fail the regression test. Assume all fields we've
 	 * been doing arithmetic on have been in host byte order.
 	 * XXX One cannot depend on ipovly->ih_len here. When called from
 	 * tcp_output(), the underlying ip_len member has not yet been set.
 	 */
 	ippseudo.ippseudo_src = ipovly->ih_src;
 	ippseudo.ippseudo_dst = ipovly->ih_dst;
 	ippseudo.ippseudo_pad = 0;
 	ippseudo.ippseudo_p = IPPROTO_TCP;
 	ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) + optlen);
 	MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo));
 
 	/*
 	 * Step 2: Update MD5 hash with TCP header, excluding options.
 	 * The TCP checksum must be set to zero.
 	 */
 	savecsum = th->th_sum;
 	th->th_sum = 0;
 	MD5Update(&ctx, (char *)th, sizeof(struct tcphdr));
 	th->th_sum = savecsum;
 
 	/*
 	 * Step 3: Update MD5 hash with TCP segment data.
 	 *         Use m_apply() to avoid an early m_pullup().
 	 */
 	if (len > 0)
 		m_apply(m, doff, len, tcp_signature_apply, &ctx);
 
 	/*
 	 * Step 4: Update MD5 hash with shared secret.
 	 */
 	MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth));
 	MD5Final(buf, &ctx);
 
 	key_sa_recordxfer(sav, m);
 	KEY_FREESAV(&sav);
 	return (0);
 }
 #endif /* TCP_SIGNATURE */
 
 static int
 sysctl_drop(SYSCTL_HANDLER_ARGS)
 {
 	/* addrs[0] is a foreign socket, addrs[1] is a local one. */
 	struct sockaddr_storage addrs[2];
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct tcptw *tw;
 	struct sockaddr_in *fin, *lin;
 #ifdef INET6
 	struct sockaddr_in6 *fin6, *lin6;
 	struct in6_addr f6, l6;
 #endif
 	int error;
 
 	inp = NULL;
 	fin = lin = NULL;
 #ifdef INET6
 	fin6 = lin6 = NULL;
 #endif
 	error = 0;
 
 	if (req->oldptr != NULL || req->oldlen != 0)
 		return (EINVAL);
 	if (req->newptr == NULL)
 		return (EPERM);
 	if (req->newlen < sizeof(addrs))
 		return (ENOMEM);
 	error = SYSCTL_IN(req, &addrs, sizeof(addrs));
 	if (error)
 		return (error);
 
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		fin6 = (struct sockaddr_in6 *)&addrs[0];
 		lin6 = (struct sockaddr_in6 *)&addrs[1];
 		if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
 		    lin6->sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 		if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
 			if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
 				return (EINVAL);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
 			fin = (struct sockaddr_in *)&addrs[0];
 			lin = (struct sockaddr_in *)&addrs[1];
 			break;
 		}
 		error = sa6_embedscope(fin6, ip6_use_defzone);
 		if (error)
 			return (error);
 		error = sa6_embedscope(lin6, ip6_use_defzone);
 		if (error)
 			return (error);
 		break;
 #endif
 	case AF_INET:
 		fin = (struct sockaddr_in *)&addrs[0];
 		lin = (struct sockaddr_in *)&addrs[1];
 		if (fin->sin_len != sizeof(struct sockaddr_in) ||
 		    lin->sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 		break;
 	default:
 		return (EINVAL);
 	}
 	INP_INFO_WLOCK(&tcbinfo);
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup_hash(&tcbinfo, &f6, fin6->sin6_port,
 		    &l6, lin6->sin6_port, 0, NULL);
 		break;
 #endif
 	case AF_INET:
 		inp = in_pcblookup_hash(&tcbinfo, fin->sin_addr, fin->sin_port,
 		    lin->sin_addr, lin->sin_port, 0, NULL);
 		break;
 	}
 	if (inp != NULL) {
 		INP_WLOCK(inp);
 		if (inp->inp_vflag & INP_TIMEWAIT) {
 			/*
 			 * XXXRW: There currently exists a state where an
 			 * inpcb is present, but its timewait state has been
 			 * discarded.  For now, don't allow dropping of this
 			 * type of inpcb.
 			 */
 			tw = intotw(inp);
 			if (tw != NULL)
 				tcp_twclose(tw, 0);
 			else
 				INP_WUNLOCK(inp);
 		} else if (!(inp->inp_vflag & INP_DROPPED) &&
 			   !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
 			tp = intotcpcb(inp);
 			tp = tcp_drop(tp, ECONNABORTED);
 			if (tp != NULL)
 				INP_WUNLOCK(inp);
 		} else
 			INP_WUNLOCK(inp);
 	} else
 		error = ESRCH;
 	INP_INFO_WUNLOCK(&tcbinfo);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
     CTLTYPE_STRUCT|CTLFLAG_WR|CTLFLAG_SKIP, NULL,
     0, sysctl_drop, "", "Drop TCP connection");
 
 /*
  * Generate a standardized TCP log line for use throughout the
  * tcp subsystem.  Memory allocation is done with M_NOWAIT to
  * allow use in the interrupt context.
  *
  * NB: The caller MUST free(s, M_TCPLOG) the returned string.
  * NB: The function may return NULL if memory allocation failed.
  *
  * Due to header inclusion and ordering limitations the struct ip
  * and ip6_hdr pointers have to be passed as void pointers.
  */
 char *
 tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 	char *s, *sp;
 	size_t size;
 	struct ip *ip;
 #ifdef INET6
 	const struct ip6_hdr *ip6;
 
 	ip6 = (const struct ip6_hdr *)ip6hdr;
 #endif /* INET6 */
 	ip = (struct ip *)ip4hdr;
 
 	/*
 	 * The log line looks like this:
 	 * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
 	 */
 	size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
 	    sizeof(PRINT_TH_FLAGS) + 1 +
 #ifdef INET6
 	    2 * INET6_ADDRSTRLEN;
 #else
 	    2 * INET_ADDRSTRLEN;
 #endif /* INET6 */
 
 	/* Is logging enabled? */
 	if (tcp_log_debug == 0 && tcp_log_in_vain == 0)
 		return (NULL);
 
 	s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
 	if (s == NULL)
 		return (NULL);
 
 	strcat(s, "TCP: [");
 	sp = s + strlen(s);
 
 	if (inc && inc->inc_isipv6 == 0) {
 		inet_ntoa_r(inc->inc_faddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		inet_ntoa_r(inc->inc_laddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 #ifdef INET6
 	} else if (inc) {
 		ip6_sprintf(sp, &inc->inc6_faddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &inc->inc6_laddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 	} else if (ip6 && th) {
 		ip6_sprintf(sp, &ip6->ip6_src);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &ip6->ip6_dst);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET6 */
 	} else if (ip && th) {
 		inet_ntoa_r(ip->ip_src, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		inet_ntoa_r(ip->ip_dst, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 	} else {
 		free(s, M_TCPLOG);
 		return (NULL);
 	}
 	sp = s + strlen(s);
 	if (th)
 		sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
 	if (*(s + size - 1) != '\0')
 		panic("%s: string too long", __func__);
 	return (s);
 }
Index: head/sys/netinet/tcp_syncache.c
===================================================================
--- head/sys/netinet/tcp_syncache.c	(revision 178887)
+++ head/sys/netinet/tcp_syncache.c	(revision 178888)
@@ -1,1756 +1,1760 @@
 /*-
  * Copyright (c) 2001 McAfee, Inc.
  * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jonathan Lemon
  * and McAfee Research, the Security Research Division of McAfee, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/md5.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_offload.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/key.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 static int tcp_syncookies = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW,
     &tcp_syncookies, 0,
     "Use TCP SYN cookies if the syncache overflows");
 
 static int tcp_syncookiesonly = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
     &tcp_syncookiesonly, 0,
     "Use only TCP SYN cookies");
 
 #define	SYNCOOKIE_SECRET_SIZE	8	/* dwords */
 #define	SYNCOOKIE_LIFETIME	16	/* seconds */
 
 struct syncache {
 	TAILQ_ENTRY(syncache)	sc_hash;
 	struct		in_conninfo sc_inc;	/* addresses */
 	int		sc_rxttime;		/* retransmit time */
 	u_int16_t	sc_rxmits;		/* retransmit counter */
 
 	u_int32_t	sc_tsreflect;		/* timestamp to reflect */
 	u_int32_t	sc_ts;			/* our timestamp to send */
 	u_int32_t	sc_tsoff;		/* ts offset w/ syncookies */
 	u_int32_t	sc_flowlabel;		/* IPv6 flowlabel */
 	tcp_seq		sc_irs;			/* seq from peer */
 	tcp_seq		sc_iss;			/* our ISS */
 	struct		mbuf *sc_ipopts;	/* source route */
 
 	u_int16_t	sc_peer_mss;		/* peer's MSS */
 	u_int16_t	sc_wnd;			/* advertised window */
 	u_int8_t	sc_ip_ttl;		/* IPv4 TTL */
 	u_int8_t	sc_ip_tos;		/* IPv4 TOS */
 	u_int8_t	sc_requested_s_scale:4,
 			sc_requested_r_scale:4;
 	u_int8_t	sc_flags;
 #define SCF_NOOPT	0x01			/* no TCP options */
 #define SCF_WINSCALE	0x02			/* negotiated window scaling */
 #define SCF_TIMESTAMP	0x04			/* negotiated timestamps */
 						/* MSS is implicit */
 #define SCF_UNREACH	0x10			/* icmp unreachable received */
 #define SCF_SIGNATURE	0x20			/* send MD5 digests */
 #define SCF_SACK	0x80			/* send SACK option */
 #ifndef TCP_OFFLOAD_DISABLE
 	struct toe_usrreqs *sc_tu;		/* TOE operations */
 	void 		*sc_toepcb;		/* TOE protocol block */
 #endif			
 #ifdef MAC
 	struct label	*sc_label;		/* MAC label reference */
 #endif
 };
 
 #ifdef TCP_OFFLOAD_DISABLE
 #define TOEPCB_ISSET(sc) (0)
 #else
 #define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
 #endif
 
 
 struct syncache_head {
 	struct mtx	sch_mtx;
 	TAILQ_HEAD(sch_head, syncache)	sch_bucket;
 	struct callout	sch_timer;
 	int		sch_nextc;
 	u_int		sch_length;
 	u_int		sch_oddeven;
 	u_int32_t	sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
 	u_int32_t	sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
 	u_int		sch_reseed;		/* time_uptime, seconds */
 };
 
 static void	 syncache_drop(struct syncache *, struct syncache_head *);
 static void	 syncache_free(struct syncache *);
 static void	 syncache_insert(struct syncache *, struct syncache_head *);
 struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
 static int	 syncache_respond(struct syncache *);
 static struct	 socket *syncache_socket(struct syncache *, struct socket *,
 		    struct mbuf *m);
 static void	 syncache_timeout(struct syncache *sc, struct syncache_head *sch,
 		    int docallout);
 static void	 syncache_timer(void *);
 static void	 syncookie_generate(struct syncache_head *, struct syncache *,
 		    u_int32_t *);
 static struct syncache
 		*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
 		    struct syncache *, struct tcpopt *, struct tcphdr *,
 		    struct socket *);
 
 /*
  * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
  * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds,
  * the odds are that the user has given up attempting to connect by then.
  */
 #define SYNCACHE_MAXREXMTS		3
 
 /* Arbitrary values */
 #define TCP_SYNCACHE_HASHSIZE		512
 #define TCP_SYNCACHE_BUCKETLIMIT	30
 
 struct tcp_syncache {
 	struct	syncache_head *hashbase;
 	uma_zone_t zone;
 	u_int	hashsize;
 	u_int	hashmask;
 	u_int	bucket_limit;
 	u_int	cache_count;		/* XXX: unprotected */
 	u_int	cache_limit;
 	u_int	rexmt_limit;
 	u_int	hash_secret;
 };
 static struct tcp_syncache tcp_syncache;
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache");
 
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
      &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache");
 
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
      &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache");
 
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
      &tcp_syncache.cache_count, 0, "Current number of entries in syncache");
 
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
      &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable");
 
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
      &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions");
 
 int	tcp_sc_rst_sock_fail = 1;
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail, CTLFLAG_RW,
      &tcp_sc_rst_sock_fail, 0, "Send reset on socket allocation failure");
 
 static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
 
 #define SYNCACHE_HASH(inc, mask)					\
 	((tcp_syncache.hash_secret ^					\
 	  (inc)->inc_faddr.s_addr ^					\
 	  ((inc)->inc_faddr.s_addr >> 16) ^				\
 	  (inc)->inc_fport ^ (inc)->inc_lport) & mask)
 
 #define SYNCACHE_HASH6(inc, mask)					\
 	((tcp_syncache.hash_secret ^					\
 	  (inc)->inc6_faddr.s6_addr32[0] ^				\
 	  (inc)->inc6_faddr.s6_addr32[3] ^				\
 	  (inc)->inc_fport ^ (inc)->inc_lport) & mask)
 
 #define ENDPTS_EQ(a, b) (						\
 	(a)->ie_fport == (b)->ie_fport &&				\
 	(a)->ie_lport == (b)->ie_lport &&				\
 	(a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr &&			\
 	(a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr			\
 )
 
 #define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
 
 #define	SCH_LOCK(sch)		mtx_lock(&(sch)->sch_mtx)
 #define	SCH_UNLOCK(sch)		mtx_unlock(&(sch)->sch_mtx)
 #define	SCH_LOCK_ASSERT(sch)	mtx_assert(&(sch)->sch_mtx, MA_OWNED)
 
 /*
  * Requires the syncache entry to be already removed from the bucket list.
  */
 static void
 syncache_free(struct syncache *sc)
 {
 	if (sc->sc_ipopts)
 		(void) m_free(sc->sc_ipopts);
 #ifdef MAC
 	mac_syncache_destroy(&sc->sc_label);
 #endif
 
 	uma_zfree(tcp_syncache.zone, sc);
 }
 
 void
 syncache_init(void)
 {
 	int i;
 
 	tcp_syncache.cache_count = 0;
 	tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
 	tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
 	tcp_syncache.hash_secret = arc4random();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
 	    &tcp_syncache.hashsize);
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
 	    &tcp_syncache.bucket_limit);
 	if (!powerof2(tcp_syncache.hashsize) || tcp_syncache.hashsize == 0) {
 		printf("WARNING: syncache hash size is not a power of 2.\n");
 		tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	}
 	tcp_syncache.hashmask = tcp_syncache.hashsize - 1;
 
 	/* Set limits. */
 	tcp_syncache.cache_limit =
 	    tcp_syncache.hashsize * tcp_syncache.bucket_limit;
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
 	    &tcp_syncache.cache_limit);
 
 	/* Allocate the hash table. */
 	MALLOC(tcp_syncache.hashbase, struct syncache_head *,
 	    tcp_syncache.hashsize * sizeof(struct syncache_head),
 	    M_SYNCACHE, M_WAITOK | M_ZERO);
 
 	/* Initialize the hash buckets. */
 	for (i = 0; i < tcp_syncache.hashsize; i++) {
 		TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket);
 		mtx_init(&tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
 			 NULL, MTX_DEF);
 		callout_init_mtx(&tcp_syncache.hashbase[i].sch_timer,
 			 &tcp_syncache.hashbase[i].sch_mtx, 0);
 		tcp_syncache.hashbase[i].sch_length = 0;
 	}
 
 	/* Create the syncache entry zone. */
 	tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit);
 }
 
 /*
  * Inserts a syncache entry into the specified bucket row.
  * Locks and unlocks the syncache_head autonomously.
  */
 static void
 syncache_insert(struct syncache *sc, struct syncache_head *sch)
 {
 	struct syncache *sc2;
 
 	SCH_LOCK(sch);
 
 	/*
 	 * Make sure that we don't overflow the per-bucket limit.
 	 * If the bucket is full, toss the oldest element.
 	 */
 	if (sch->sch_length >= tcp_syncache.bucket_limit) {
 		KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
 			("sch->sch_length incorrect"));
 		sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
 		syncache_drop(sc2, sch);
 		tcpstat.tcps_sc_bucketoverflow++;
 	}
 
 	/* Put it into the bucket. */
 	TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length++;
 
 	/* Reinitialize the bucket row's timer. */
 	if (sch->sch_length == 1)
 		sch->sch_nextc = ticks + INT_MAX;
 	syncache_timeout(sc, sch, 1);
 
 	SCH_UNLOCK(sch);
 
 	tcp_syncache.cache_count++;
 	tcpstat.tcps_sc_added++;
 }
 
 /*
  * Remove and free entry from syncache bucket row.
  * Expects locked syncache head.
  */
 static void
 syncache_drop(struct syncache *sc, struct syncache_head *sch)
 {
 
 	SCH_LOCK_ASSERT(sch);
 
 	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length--;
 
 #ifndef TCP_OFFLOAD_DISABLE
 	if (sc->sc_tu)
 		sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
 #endif		    
 	syncache_free(sc);
 	tcp_syncache.cache_count--;
 }
 
 /*
  * Engage/reengage time on bucket row.
  */
 static void
 syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
 {
 	sc->sc_rxttime = ticks +
 		TCPTV_RTOBASE * (tcp_backoff[sc->sc_rxmits]);
 	sc->sc_rxmits++;
 	if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
 		sch->sch_nextc = sc->sc_rxttime;
 		if (docallout)
 			callout_reset(&sch->sch_timer, sch->sch_nextc - ticks,
 			    syncache_timer, (void *)sch);
 	}
 }
 
 /*
  * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
  * If we have retransmitted an entry the maximum number of times, expire it.
  * One separate timer for each bucket row.
  */
 static void
 syncache_timer(void *xsch)
 {
 	struct syncache_head *sch = (struct syncache_head *)xsch;
 	struct syncache *sc, *nsc;
 	int tick = ticks;
 	char *s;
 
 	/* NB: syncache_head has already been locked by the callout. */
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * In the following cycle we may remove some entries and/or
 	 * advance some timeouts, so re-initialize the bucket timer.
 	 */
 	sch->sch_nextc = tick + INT_MAX;
 
 	TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) {
 		/*
 		 * We do not check if the listen socket still exists
 		 * and accept the case where the listen socket may be
 		 * gone by the time we resend the SYN/ACK.  We do
 		 * not expect this to happens often. If it does,
 		 * then the RST will be sent by the time the remote
 		 * host does the SYN/ACK->ACK.
 		 */
 		if (TSTMP_GT(sc->sc_rxttime, tick)) {
 			if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc))
 				sch->sch_nextc = sc->sc_rxttime;
 			continue;
 		}
 		if (sc->sc_rxmits > tcp_syncache.rexmt_limit) {
 			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Retransmits exhausted, "
 				    "giving up and removing syncache entry\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 			}
 			syncache_drop(sc, sch);
 			tcpstat.tcps_sc_stale++;
 			continue;
 		}
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Response timeout, "
 			    "retransmitting (%u) SYN|ACK\n",
 			    s, __func__, sc->sc_rxmits);
 			free(s, M_TCPLOG);
 		}
 
 		(void) syncache_respond(sc);
 		tcpstat.tcps_sc_retransmitted++;
 		syncache_timeout(sc, sch, 0);
 	}
 	if (!TAILQ_EMPTY(&(sch)->sch_bucket))
 		callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
 			syncache_timer, (void *)(sch));
 }
 
 /*
  * Find an entry in the syncache.
  * Returns always with locked syncache_head plus a matching entry or NULL.
  */
 struct syncache *
 syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 #ifdef INET6
 	if (inc->inc_isipv6) {
 		sch = &tcp_syncache.hashbase[
 		    SYNCACHE_HASH6(inc, tcp_syncache.hashmask)];
 		*schp = sch;
 
 		SCH_LOCK(sch);
 
 		/* Circle through bucket row to find matching entry. */
 		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
 			if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
 				return (sc);
 		}
 	} else
 #endif
 	{
 		sch = &tcp_syncache.hashbase[
 		    SYNCACHE_HASH(inc, tcp_syncache.hashmask)];
 		*schp = sch;
 
 		SCH_LOCK(sch);
 
 		/* Circle through bucket row to find matching entry. */
 		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
 #ifdef INET6
 			if (sc->sc_inc.inc_isipv6)
 				continue;
 #endif
 			if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
 				return (sc);
 		}
 	}
 	SCH_LOCK_ASSERT(*schp);
 	return (NULL);			/* always returns with locked sch */
 }
 
 /*
  * This function is called when we get a RST for a
  * non-existent connection, so that we can see if the
  * connection is in the syn cache.  If it is, zap it.
  */
 void
 syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	char *s = NULL;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags.
 	 * See RFC 793 page 65, section SEGMENT ARRIVES.
 	 */
 	if (th->th_flags & (TH_ACK|TH_SYN|TH_FIN)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Spurious RST with ACK, SYN or "
 			    "FIN flag set, segment ignored\n", s, __func__);
 		tcpstat.tcps_badrst++;
 		goto done;
 	}
 
 	/*
 	 * No corresponding connection was found in syncache.
 	 * If syncookies are enabled and possibly exclusively
 	 * used, or we are under memory pressure, a valid RST
 	 * may not find a syncache entry.  In that case we're
 	 * done and no SYN|ACK retransmissions will happen.
 	 * Otherwise the the RST was misdirected or spoofed.
 	 */
 	if (sc == NULL) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Spurious RST without matching "
 			    "syncache entry (possibly syncookie only), "
 			    "segment ignored\n", s, __func__);
 		tcpstat.tcps_badrst++;
 		goto done;
 	}
 
 	/*
 	 * If the RST bit is set, check the sequence number to see
 	 * if this is a valid reset segment.
 	 * RFC 793 page 37:
 	 *   In all states except SYN-SENT, all reset (RST) segments
 	 *   are validated by checking their SEQ-fields.  A reset is
 	 *   valid if its sequence number is in the window.
 	 *
 	 *   The sequence number in the reset segment is normally an
 	 *   echo of our outgoing acknowlegement numbers, but some hosts
 	 *   send a reset with the sequence number at the rightmost edge
 	 *   of our receive window, and we have to handle this case.
 	 */
 	if (SEQ_GEQ(th->th_seq, sc->sc_irs) &&
 	    SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
 		syncache_drop(sc, sch);
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, "
 			    "connection attempt aborted by remote endpoint\n",
 			    s, __func__);
 		tcpstat.tcps_sc_reset++;
 	} else {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != "
 			    "IRS %u (+WND %u), segment ignored\n",
 			    s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd);
 		tcpstat.tcps_badrst++;
 	}
 
 done:
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	SCH_UNLOCK(sch);
 }
 
 void
 syncache_badack(struct in_conninfo *inc)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 		syncache_drop(sc, sch);
 		tcpstat.tcps_sc_badack++;
 	}
 	SCH_UNLOCK(sch);
 }
 
 void
 syncache_unreach(struct in_conninfo *inc, struct tcphdr *th)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if (sc == NULL)
 		goto done;
 
 	/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
 	if (ntohl(th->th_seq) != sc->sc_iss)
 		goto done;
 
 	/*
 	 * If we've rertransmitted 3 times and this is our second error,
 	 * we remove the entry.  Otherwise, we allow it to continue on.
 	 * This prevents us from incorrectly nuking an entry during a
 	 * spurious network outage.
 	 *
 	 * See tcp_notify().
 	 */
 	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) {
 		sc->sc_flags |= SCF_UNREACH;
 		goto done;
 	}
 	syncache_drop(sc, sch);
 	tcpstat.tcps_sc_unreach++;
 done:
 	SCH_UNLOCK(sch);
 }
 
 /*
  * Build a new TCP socket structure from a syncache entry.
  */
 static struct socket *
 syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 {
 	struct inpcb *inp = NULL;
 	struct socket *so;
 	struct tcpcb *tp;
 	char *s;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 
 	/*
 	 * Ok, create the full blown connection, and set things up
 	 * as they would have been set up if we had created the
 	 * connection when the SYN arrived.  If we can't create
 	 * the connection, abort it.
 	 */
 	so = sonewconn(lso, SS_ISCONNECTED);
 	if (so == NULL) {
 		/*
 		 * Drop the connection; we will either send a RST or
 		 * have the peer retransmit its SYN again after its
 		 * RTO and try again.
 		 */
 		tcpstat.tcps_listendrop++;
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Socket create failed "
 			    "due to limits or memory shortage\n",
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
 		goto abort2;
 	}
 #ifdef MAC
 	SOCK_LOCK(so);
 	mac_socketpeer_set_from_mbuf(m, so);
 	SOCK_UNLOCK(so);
 #endif
 
 	inp = sotoinpcb(so);
+	inp->inp_inc.inc_fibnum = sc->sc_inc.inc_fibnum;
+	so->so_fibnum = sc->sc_inc.inc_fibnum;
 	INP_WLOCK(inp);
 
 	/* Insert new socket into PCB hash list. */
 	inp->inp_inc.inc_isipv6 = sc->sc_inc.inc_isipv6;
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
 		inp->in6p_laddr = sc->sc_inc.inc6_laddr;
 	} else {
 		inp->inp_vflag &= ~INP_IPV6;
 		inp->inp_vflag |= INP_IPV4;
 #endif
 		inp->inp_laddr = sc->sc_inc.inc_laddr;
 #ifdef INET6
 	}
 #endif
 	inp->inp_lport = sc->sc_inc.inc_lport;
 	if (in_pcbinshash(inp) != 0) {
 		/*
 		 * Undo the assignments above if we failed to
 		 * put the PCB on the hash lists.
 		 */
 #ifdef INET6
 		if (sc->sc_inc.inc_isipv6)
 			inp->in6p_laddr = in6addr_any;
 		else
 #endif
 			inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		goto abort;
 	}
 #ifdef IPSEC
 	/* Copy old policy into new socket's. */
 	if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
 		printf("syncache_socket: could not copy policy\n");
 #endif
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
 		struct inpcb *oinp = sotoinpcb(lso);
 		struct in6_addr laddr6;
 		struct sockaddr_in6 sin6;
 		/*
 		 * Inherit socket options from the listening socket.
 		 * Note that in6p_inputopts are not (and should not be)
 		 * copied, since it stores previously received options and is
 		 * used to detect if each new option is different than the
 		 * previous one and hence should be passed to a user.
 		 * If we copied in6p_inputopts, a user would not be able to
 		 * receive options just after calling the accept system call.
 		 */
 		inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS;
 		if (oinp->in6p_outputopts)
 			inp->in6p_outputopts =
 			    ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
 
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_len = sizeof(sin6);
 		sin6.sin6_addr = sc->sc_inc.inc6_faddr;
 		sin6.sin6_port = sc->sc_inc.inc_fport;
 		sin6.sin6_flowinfo = sin6.sin6_scope_id = 0;
 		laddr6 = inp->in6p_laddr;
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 			inp->in6p_laddr = sc->sc_inc.inc6_laddr;
 		if (in6_pcbconnect(inp, (struct sockaddr *)&sin6,
 		    thread0.td_ucred)) {
 			inp->in6p_laddr = laddr6;
 			goto abort;
 		}
 		/* Override flowlabel from in6_pcbconnect. */
 		inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
 		inp->in6p_flowinfo |= sc->sc_flowlabel;
 	} else
 #endif
 	{
 		struct in_addr laddr;
 		struct sockaddr_in sin;
 
 		inp->inp_options = (m) ? ip_srcroute(m) : NULL;
 		
 		if (inp->inp_options == NULL) {
 			inp->inp_options = sc->sc_ipopts;
 			sc->sc_ipopts = NULL;
 		}
 
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(sin);
 		sin.sin_addr = sc->sc_inc.inc_faddr;
 		sin.sin_port = sc->sc_inc.inc_fport;
 		bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero));
 		laddr = inp->inp_laddr;
 		if (inp->inp_laddr.s_addr == INADDR_ANY)
 			inp->inp_laddr = sc->sc_inc.inc_laddr;
 		if (in_pcbconnect(inp, (struct sockaddr *)&sin,
 		    thread0.td_ucred)) {
 			inp->inp_laddr = laddr;
 			goto abort;
 		}
 	}
 	tp = intotcpcb(inp);
 	tp->t_state = TCPS_SYN_RECEIVED;
 	tp->iss = sc->sc_iss;
 	tp->irs = sc->sc_irs;
 	tcp_rcvseqinit(tp);
 	tcp_sendseqinit(tp);
 	tp->snd_wl1 = sc->sc_irs;
 	tp->snd_max = tp->iss + 1;
 	tp->snd_nxt = tp->iss + 1;
 	tp->rcv_up = sc->sc_irs + 1;
 	tp->rcv_wnd = sc->sc_wnd;
 	tp->rcv_adv += tp->rcv_wnd;
 	tp->last_ack_sent = tp->rcv_nxt;
 
 	tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
 	if (sc->sc_flags & SCF_NOOPT)
 		tp->t_flags |= TF_NOOPT;
 	else {
 		if (sc->sc_flags & SCF_WINSCALE) {
 			tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
 			tp->snd_scale = sc->sc_requested_s_scale;
 			tp->request_r_scale = sc->sc_requested_r_scale;
 		}
 		if (sc->sc_flags & SCF_TIMESTAMP) {
 			tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
 			tp->ts_recent = sc->sc_tsreflect;
 			tp->ts_recent_age = ticks;
 			tp->ts_offset = sc->sc_tsoff;
 		}
 #ifdef TCP_SIGNATURE
 		if (sc->sc_flags & SCF_SIGNATURE)
 			tp->t_flags |= TF_SIGNATURE;
 #endif
 		if (sc->sc_flags & SCF_SACK)
 			tp->t_flags |= TF_SACK_PERMIT;
 	}
 
 	/*
 	 * Set up MSS and get cached values from tcp_hostcache.
 	 * This might overwrite some of the defaults we just set.
 	 */
 	tcp_mss(tp, sc->sc_peer_mss);
 
 	/*
 	 * If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
 	 */
 	if (sc->sc_rxmits)
 		tp->snd_cwnd = tp->t_maxseg;
 	tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
 
 	INP_WUNLOCK(inp);
 
 	tcpstat.tcps_accepts++;
 	return (so);
 
 abort:
 	INP_WUNLOCK(inp);
 abort2:
 	if (so != NULL)
 		soabort(so);
 	return (NULL);
 }
 
 /*
  * This function gets called when we receive an ACK for a
  * socket in the LISTEN state.  We look up the connection
  * in the syncache, and if its there, we pull it out of
  * the cache and turn it into a full-blown connection in
  * the SYN-RECEIVED state.
  */
 int
 syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct socket **lsop, struct mbuf *m)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	struct syncache scs;
 	char *s;
 
 	/*
 	 * Global TCP locks are held because we manipulate the PCB lists
 	 * and create a new socket.
 	 */
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
 	    ("%s: can handle only ACK", __func__));
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if (sc == NULL) {
 		/*
 		 * There is no syncache entry, so see if this ACK is
 		 * a returning syncookie.  To do this, first:
 		 *  A. See if this socket has had a syncache entry dropped in
 		 *     the past.  We don't want to accept a bogus syncookie
 		 *     if we've never received a SYN.
 		 *  B. check that the syncookie is valid.  If it is, then
 		 *     cobble up a fake syncache entry, and return.
 		 */
 		if (!tcp_syncookies) {
 			SCH_UNLOCK(sch);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 				    "segment rejected (syncookies disabled)\n",
 				    s, __func__);
 			goto failed;
 		}
 		bzero(&scs, sizeof(scs));
 		sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
 		SCH_UNLOCK(sch);
 		if (sc == NULL) {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Segment failed "
 				    "SYNCOOKIE authentication, segment rejected "
 				    "(probably spoofed)\n", s, __func__);
 			goto failed;
 		}
 	} else {
 		/* Pull out the entry to unlock the bucket row. */
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 		sch->sch_length--;
 		tcp_syncache.cache_count--;
 		SCH_UNLOCK(sch);
 	}
 
 	/*
 	 * Segment validation:
 	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
 	 */
 	if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
 		goto failed;
 	}
 	/*
 	 * The SEQ must match the received initial receive sequence
 	 * number + 1 (the SYN) because we didn't ACK any data that
 	 * may have come with the SYN.
 	 */
 	if (th->th_seq != sc->sc_irs + 1 && !TOEPCB_ISSET(sc)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
 		goto failed;
 	}
 
 	if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "segment rejected\n", s, __func__);
 		goto failed;
 	}
 	/*
 	 * If timestamps were negotiated the reflected timestamp
 	 * must be equal to what we actually sent in the SYN|ACK.
 	 */
 	if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
 	    !TOEPCB_ISSET(sc)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
 			    "segment rejected\n",
 			    s, __func__, to->to_tsecr, sc->sc_ts);
 		goto failed;
 	}
 
 	*lsop = syncache_socket(sc, *lsop, m);
 
 	if (*lsop == NULL)
 		tcpstat.tcps_sc_aborted++;
 	else
 		tcpstat.tcps_sc_completed++;
 
+/* how do we find the inp for the new socket? */
 	if (sc != &scs)
 		syncache_free(sc);
 	return (1);
 failed:
 	if (sc != NULL && sc != &scs)
 		syncache_free(sc);
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	*lsop = NULL;
 	return (0);
 }
 
 /*
  * Given a LISTEN socket and an inbound SYN request, add
  * this to the syn cache, and send back a segment:
  *	<SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
  * to the source.
  *
  * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
  * Doing so would require that we hold onto the data and deliver it
  * to the application.  However, if we are the target of a SYN-flood
  * DoS attack, an attacker could send data which would eventually
  * consume all available buffer space if it were ACKed.  By not ACKing
  * the data, we avoid this DoS scenario.
  */
 static void
 _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, struct socket **lsop, struct mbuf *m,
     struct toe_usrreqs *tu, void *toepcb)
 {
 	struct tcpcb *tp;
 	struct socket *so;
 	struct syncache *sc = NULL;
 	struct syncache_head *sch;
 	struct mbuf *ipopts = NULL;
 	u_int32_t flowtmp;
 	int win, sb_hiwat, ip_ttl, ip_tos, noopt;
 	char *s;
 #ifdef INET6
 	int autoflowlabel = 0;
 #endif
 #ifdef MAC
 	struct label *maclabel;
 #endif
 	struct syncache scs;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 	INP_WLOCK_ASSERT(inp);			/* listen socket */
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
 	    ("%s: unexpected tcp flags", __func__));
 
 	/*
 	 * Combine all so/tp operations very early to drop the INP lock as
 	 * soon as possible.
 	 */
 	so = *lsop;
 	tp = sototcpcb(so);
 
 #ifdef INET6
 	if (inc->inc_isipv6 &&
 	    (inp->in6p_flags & IN6P_AUTOFLOWLABEL))
 		autoflowlabel = 1;
 #endif
 	ip_ttl = inp->inp_ip_ttl;
 	ip_tos = inp->inp_ip_tos;
 	win = sbspace(&so->so_rcv);
 	sb_hiwat = so->so_rcv.sb_hiwat;
 	noopt = (tp->t_flags & TF_NOOPT);
 
 	so = NULL;
 	tp = NULL;
 
 #ifdef MAC
 	if (mac_syncache_init(&maclabel) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&tcbinfo);
 		goto done;
 	} else
 		mac_syncache_create(maclabel, inp);
 #endif
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&tcbinfo);
 
 	/*
 	 * Remember the IP options, if any.
 	 */
 #ifdef INET6
 	if (!inc->inc_isipv6)
 #endif
 		ipopts = (m) ? ip_srcroute(m) : NULL;
 
 	/*
 	 * See if we already have an entry for this connection.
 	 * If we do, resend the SYN,ACK, and reset the retransmit timer.
 	 *
 	 * XXX: should the syncache be re-initialized with the contents
 	 * of the new SYN here (which may have different options?)
 	 *
 	 * XXX: We do not check the sequence number to see if this is a
 	 * real retransmit or a new connection attempt.  The question is
 	 * how to handle such a case; either ignore it as spoofed, or
 	 * drop the current entry and create a new one?
 	 */
 	sc = syncache_lookup(inc, &sch);	/* returns locked entry */
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 #ifndef TCP_OFFLOAD_DISABLE
 		if (sc->sc_tu)
 			sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
 			    sc->sc_toepcb);
 #endif		    
 		tcpstat.tcps_sc_dupsyn++;
 		if (ipopts) {
 			/*
 			 * If we were remembering a previous source route,
 			 * forget it and use the new one we've been given.
 			 */
 			if (sc->sc_ipopts)
 				(void) m_free(sc->sc_ipopts);
 			sc->sc_ipopts = ipopts;
 		}
 		/*
 		 * Update timestamp if present.
 		 */
 		if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS))
 			sc->sc_tsreflect = to->to_tsval;
 		else
 			sc->sc_flags &= ~SCF_TIMESTAMP;
 #ifdef MAC
 		/*
 		 * Since we have already unconditionally allocated label
 		 * storage, free it up.  The syncache entry will already
 		 * have an initialized label we can use.
 		 */
 		mac_syncache_destroy(&maclabel);
 		KASSERT(sc->sc_label != NULL,
 		    ("%s: label not initialized", __func__));
 #endif
 		/* Retransmit SYN|ACK and reset retransmit count. */
 		if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
 			    "resetting timer and retransmitting SYN|ACK\n",
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
 		if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
 			sc->sc_rxmits = 0;
 			syncache_timeout(sc, sch, 1);
 			tcpstat.tcps_sndacks++;
 			tcpstat.tcps_sndtotal++;
 		}
 		SCH_UNLOCK(sch);
 		goto done;
 	}
 
 	sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
 	if (sc == NULL) {
 		/*
 		 * The zone allocator couldn't provide more entries.
 		 * Treat this as if the cache was full; drop the oldest
 		 * entry and insert the new one.
 		 */
 		tcpstat.tcps_sc_zonefail++;
 		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL)
 			syncache_drop(sc, sch);
 		sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
 		if (sc == NULL) {
 			if (tcp_syncookies) {
 				bzero(&scs, sizeof(scs));
 				sc = &scs;
 			} else {
 				SCH_UNLOCK(sch);
 				if (ipopts)
 					(void) m_free(ipopts);
 				goto done;
 			}
 		}
 	}
 	
 	/*
 	 * Fill in the syncache values.
 	 */
 #ifdef MAC
 	sc->sc_label = maclabel;
 #endif
 	sc->sc_ipopts = ipopts;
+	sc->sc_inc.inc_fibnum = inp->inp_inc.inc_fibnum;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 #ifdef INET6
 	if (!inc->inc_isipv6)
 #endif
 	{
 		sc->sc_ip_tos = ip_tos;
 		sc->sc_ip_ttl = ip_ttl;
 	}
 #ifndef TCP_OFFLOAD_DISABLE	
 	sc->sc_tu = tu;
 	sc->sc_toepcb = toepcb;
 #endif
 	sc->sc_irs = th->th_seq;
 	sc->sc_iss = arc4random();
 	sc->sc_flags = 0;
 	sc->sc_flowlabel = 0;
 
 	/*
 	 * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN].
 	 * win was derived from socket earlier in the function.
 	 */
 	win = imax(win, 0);
 	win = imin(win, TCP_MAXWIN);
 	sc->sc_wnd = win;
 
 	if (tcp_do_rfc1323) {
 		/*
 		 * A timestamp received in a SYN makes
 		 * it ok to send timestamp requests and replies.
 		 */
 		if (to->to_flags & TOF_TS) {
 			sc->sc_tsreflect = to->to_tsval;
 			sc->sc_ts = ticks;
 			sc->sc_flags |= SCF_TIMESTAMP;
 		}
 		if (to->to_flags & TOF_SCALE) {
 			int wscale = 0;
 
 			/*
 			 * Pick the smallest possible scaling factor that
 			 * will still allow us to scale up to sb_max, aka
 			 * kern.ipc.maxsockbuf.
 			 *
 			 * We do this because there are broken firewalls that
 			 * will corrupt the window scale option, leading to
 			 * the other endpoint believing that our advertised
 			 * window is unscaled.  At scale factors larger than
 			 * 5 the unscaled window will drop below 1500 bytes,
 			 * leading to serious problems when traversing these
 			 * broken firewalls.
 			 *
 			 * With the default maxsockbuf of 256K, a scale factor
 			 * of 3 will be chosen by this algorithm.  Those who
 			 * choose a larger maxsockbuf should watch out
 			 * for the compatiblity problems mentioned above.
 			 *
 			 * RFC1323: The Window field in a SYN (i.e., a <SYN>
 			 * or <SYN,ACK>) segment itself is never scaled.
 			 */
 			while (wscale < TCP_MAX_WINSHIFT &&
 			    (TCP_MAXWIN << wscale) < sb_max)
 				wscale++;
 			sc->sc_requested_r_scale = wscale;
 			sc->sc_requested_s_scale = to->to_wscale;
 			sc->sc_flags |= SCF_WINSCALE;
 		}
 	}
 #ifdef TCP_SIGNATURE
 	/*
 	 * If listening socket requested TCP digests, and received SYN
 	 * contains the option, flag this in the syncache so that
 	 * syncache_respond() will do the right thing with the SYN+ACK.
 	 * XXX: Currently we always record the option by default and will
 	 * attempt to use it in syncache_respond().
 	 */
 	if (to->to_flags & TOF_SIGNATURE)
 		sc->sc_flags |= SCF_SIGNATURE;
 #endif
 	if (to->to_flags & TOF_SACKPERM)
 		sc->sc_flags |= SCF_SACK;
 	if (to->to_flags & TOF_MSS)
 		sc->sc_peer_mss = to->to_mss;	/* peer mss may be zero */
 	if (noopt)
 		sc->sc_flags |= SCF_NOOPT;
 
 	if (tcp_syncookies) {
 		syncookie_generate(sch, sc, &flowtmp);
 #ifdef INET6
 		if (autoflowlabel)
 			sc->sc_flowlabel = flowtmp;
 #endif
 	} else {
 #ifdef INET6
 		if (autoflowlabel)
 			sc->sc_flowlabel =
 			    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
 #endif
 	}
 	SCH_UNLOCK(sch);
 
 	/*
 	 * Do a standard 3-way handshake.
 	 */
 	if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
 		if (tcp_syncookies && tcp_syncookiesonly && sc != &scs)
 			syncache_free(sc);
 		else if (sc != &scs)
 			syncache_insert(sc, sch);   /* locks and unlocks sch */
 		tcpstat.tcps_sndacks++;
 		tcpstat.tcps_sndtotal++;
 	} else {
 		if (sc != &scs)
 			syncache_free(sc);
 		tcpstat.tcps_sc_dropped++;
 	}
 
 done:
 #ifdef MAC
 	if (sc == &scs)
 		mac_syncache_destroy(&maclabel);
 #endif
 	if (m) {
 		
 		*lsop = NULL;
 		m_freem(m);
 	}
 	return;
 }
 
 static int
 syncache_respond(struct syncache *sc)
 {
 	struct ip *ip = NULL;
 	struct mbuf *m;
 	struct tcphdr *th;
 	int optlen, error;
 	u_int16_t hlen, tlen, mssopt;
 	struct tcpopt to;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 #endif
 
 	hlen =
 #ifdef INET6
 	       (sc->sc_inc.inc_isipv6) ? sizeof(struct ip6_hdr) :
 #endif
 		sizeof(struct ip);
 	tlen = hlen + sizeof(struct tcphdr);
 
 	/* Determine MSS we advertize to other end of connection. */
 	mssopt = tcp_mssopt(&sc->sc_inc);
 	if (sc->sc_peer_mss)
 		mssopt = max( min(sc->sc_peer_mss, mssopt), tcp_minmss);
 
 	/* XXX: Assume that the entire packet will fit in a header mbuf. */
 	KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
 	    ("syncache: mbuf too small"));
 
 	/* Create the IP+TCP header from scratch. */
 	m = m_gethdr(M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 #ifdef MAC
 	mac_syncache_create_mbuf(sc->sc_label, m);
 #endif
 	m->m_data += max_linkhdr;
 	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_src = sc->sc_inc.inc6_laddr;
 		ip6->ip6_dst = sc->sc_inc.inc6_faddr;
 		ip6->ip6_plen = htons(tlen - hlen);
 		/* ip6_hlim is set after checksum */
 		ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
 		ip6->ip6_flow |= sc->sc_flowlabel;
 
 		th = (struct tcphdr *)(ip6 + 1);
 	} else
 #endif
 	{
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(struct ip) >> 2;
 		ip->ip_len = tlen;
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_sum = 0;
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = sc->sc_inc.inc_laddr;
 		ip->ip_dst = sc->sc_inc.inc_faddr;
 		ip->ip_ttl = sc->sc_ip_ttl;
 		ip->ip_tos = sc->sc_ip_tos;
 
 		/*
 		 * See if we should do MTU discovery.  Route lookups are
 		 * expensive, so we will only unset the DF bit if:
 		 *
 		 *	1) path_mtu_discovery is disabled
 		 *	2) the SCF_UNREACH flag has been set
 		 */
 		if (path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
 		       ip->ip_off |= IP_DF;
 
 		th = (struct tcphdr *)(ip + 1);
 	}
 	th->th_sport = sc->sc_inc.inc_lport;
 	th->th_dport = sc->sc_inc.inc_fport;
 
 	th->th_seq = htonl(sc->sc_iss);
 	th->th_ack = htonl(sc->sc_irs + 1);
 	th->th_off = sizeof(struct tcphdr) >> 2;
 	th->th_x2 = 0;
 	th->th_flags = TH_SYN|TH_ACK;
 	th->th_win = htons(sc->sc_wnd);
 	th->th_urp = 0;
 
 	/* Tack on the TCP options. */
 	if ((sc->sc_flags & SCF_NOOPT) == 0) {
 		to.to_flags = 0;
 
 		to.to_mss = mssopt;
 		to.to_flags = TOF_MSS;
 		if (sc->sc_flags & SCF_WINSCALE) {
 			to.to_wscale = sc->sc_requested_r_scale;
 			to.to_flags |= TOF_SCALE;
 		}
 		if (sc->sc_flags & SCF_TIMESTAMP) {
 			/* Virgin timestamp or TCP cookie enhanced one. */
 			to.to_tsval = sc->sc_ts;
 			to.to_tsecr = sc->sc_tsreflect;
 			to.to_flags |= TOF_TS;
 		}
 		if (sc->sc_flags & SCF_SACK)
 			to.to_flags |= TOF_SACKPERM;
 #ifdef TCP_SIGNATURE
 		if (sc->sc_flags & SCF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif
 		optlen = tcp_addoptions(&to, (u_char *)(th + 1));
 
 		/* Adjust headers by option size. */
 		th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
 		m->m_len += optlen;
 		m->m_pkthdr.len += optlen;
 
 #ifdef TCP_SIGNATURE
 		if (sc->sc_flags & SCF_SIGNATURE)
 			tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
 			    to.to_signature, IPSEC_DIR_OUTBOUND);
 #endif
 #ifdef INET6
 		if (sc->sc_inc.inc_isipv6)
 			ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
 		else
 #endif
 			ip->ip_len += optlen;
 	} else
 		optlen = 0;
 
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
 		th->th_sum = 0;
 		th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen,
 				       tlen + optlen - hlen);
 		ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
 		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	} else
 #endif
 	{
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(tlen + optlen - hlen + IPPROTO_TCP));
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
 	}
 	return (error);
 }
 
 void
 syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, struct socket **lsop, struct mbuf *m)
 {
 	_syncache_add(inc, to, th, inp, lsop, m, NULL, NULL);
 }
 
 void
 syncache_offload_add(struct in_conninfo *inc, struct tcpopt *to,
     struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
     struct toe_usrreqs *tu, void *toepcb)
 {
 
 	INP_INFO_WLOCK(&tcbinfo);
 	INP_WLOCK(inp);
 	_syncache_add(inc, to, th, inp, lsop, NULL, tu, toepcb);
 }
 
 /*
  * The purpose of SYN cookies is to avoid keeping track of all SYN's we
  * receive and to be able to handle SYN floods from bogus source addresses
  * (where we will never receive any reply).  SYN floods try to exhaust all
  * our memory and available slots in the SYN cache table to cause a denial
  * of service to legitimate users of the local host.
  *
  * The idea of SYN cookies is to encode and include all necessary information
  * about the connection setup state within the SYN-ACK we send back and thus
  * to get along without keeping any local state until the ACK to the SYN-ACK
  * arrives (if ever).  Everything we need to know should be available from
  * the information we encoded in the SYN-ACK.
  *
  * More information about the theory behind SYN cookies and its first
  * discussion and specification can be found at:
  *  http://cr.yp.to/syncookies.html    (overview)
  *  http://cr.yp.to/syncookies/archive (gory details)
  *
  * This implementation extends the orginal idea and first implementation
  * of FreeBSD by using not only the initial sequence number field to store
  * information but also the timestamp field if present.  This way we can
  * keep track of the entire state we need to know to recreate the session in
  * its original form.  Almost all TCP speakers implement RFC1323 timestamps
  * these days.  For those that do not we still have to live with the known
  * shortcomings of the ISN only SYN cookies.
  *
  * Cookie layers:
  *
  * Initial sequence number we send:
  * 31|................................|0
  *    DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP
  *    D = MD5 Digest (first dword)
  *    M = MSS index
  *    R = Rotation of secret
  *    P = Odd or Even secret
  *
  * The MD5 Digest is computed with over following parameters:
  *  a) randomly rotated secret
  *  b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6)
  *  c) the received initial sequence number from remote host
  *  d) the rotation offset and odd/even bit
  *
  * Timestamp we send:
  * 31|................................|0
  *    DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5
  *    D = MD5 Digest (third dword) (only as filler)
  *    S = Requested send window scale
  *    R = Requested receive window scale
  *    A = SACK allowed
  *    5 = TCP-MD5 enabled (not implemented yet)
  *    XORed with MD5 Digest (forth dword)
  *
  * The timestamp isn't cryptographically secure and doesn't need to be.
  * The double use of the MD5 digest dwords ties it to a specific remote/
  * local host/port, remote initial sequence number and our local time
  * limited secret.  A received timestamp is reverted (XORed) and then
  * the contained MD5 dword is compared to the computed one to ensure the
  * timestamp belongs to the SYN-ACK we sent.  The other parameters may
  * have been tampered with but this isn't different from supplying bogus
  * values in the SYN in the first place.
  *
  * Some problems with SYN cookies remain however:
  * Consider the problem of a recreated (and retransmitted) cookie.  If the
  * original SYN was accepted, the connection is established.  The second
  * SYN is inflight, and if it arrives with an ISN that falls within the
  * receive window, the connection is killed.
  *
  * Notes:
  * A heuristic to determine when to accept syn cookies is not necessary.
  * An ACK flood would cause the syncookie verification to be attempted,
  * but a SYN flood causes syncookies to be generated.  Both are of equal
  * cost, so there's no point in trying to optimize the ACK flood case.
  * Also, if you don't process certain ACKs for some reason, then all someone
  * would have to do is launch a SYN and ACK flood at the same time, which
  * would stop cookie verification and defeat the entire purpose of syncookies.
  */
 static int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 };
 
 static void
 syncookie_generate(struct syncache_head *sch, struct syncache *sc,
     u_int32_t *flowlabel)
 {
 	MD5_CTX ctx;
 	u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
 	u_int32_t data;
 	u_int32_t *secbits;
 	u_int off, pmss, mss;
 	int i;
 
 	SCH_LOCK_ASSERT(sch);
 
 	/* Which of the two secrets to use. */
 	secbits = sch->sch_oddeven ?
 			sch->sch_secbits_odd : sch->sch_secbits_even;
 
 	/* Reseed secret if too old. */
 	if (sch->sch_reseed < time_uptime) {
 		sch->sch_oddeven = sch->sch_oddeven ? 0 : 1;	/* toggle */
 		secbits = sch->sch_oddeven ?
 				sch->sch_secbits_odd : sch->sch_secbits_even;
 		for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++)
 			secbits[i] = arc4random();
 		sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME;
 	}
 
 	/* Secret rotation offset. */
 	off = sc->sc_iss & 0x7;			/* iss was randomized before */
 
 	/* Maximum segment size calculation. */
 	pmss = max( min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)), tcp_minmss);
 	for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--)
 		if (tcp_sc_msstab[mss] <= pmss)
 			break;
 
 	/* Fold parameters and MD5 digest into the ISN we will send. */
 	data = sch->sch_oddeven;/* odd or even secret, 1 bit */
 	data |= off << 1;	/* secret offset, derived from iss, 3 bits */
 	data |= mss << 4;	/* mss, 3 bits */
 
 	MD5Init(&ctx);
 	MD5Update(&ctx, ((u_int8_t *)secbits) + off,
 	    SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
 	MD5Update(&ctx, secbits, off);
 	MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc));
 	MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs));
 	MD5Update(&ctx, &data, sizeof(data));
 	MD5Final((u_int8_t *)&md5_buffer, &ctx);
 
 	data |= (md5_buffer[0] << 7);
 	sc->sc_iss = data;
 
 #ifdef INET6
 	*flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
 #endif
 
 	/* Additional parameters are stored in the timestamp if present. */
 	if (sc->sc_flags & SCF_TIMESTAMP) {
 		data =  ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */
 		data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */
 		data |= sc->sc_requested_s_scale << 2;  /* SWIN scale, 4 bits */
 		data |= sc->sc_requested_r_scale << 6;  /* RWIN scale, 4 bits */
 		data |= md5_buffer[2] << 10;		/* more digest bits */
 		data ^= md5_buffer[3];
 		sc->sc_ts = data;
 		sc->sc_tsoff = data - ticks;		/* after XOR */
 	}
 
 	tcpstat.tcps_sc_sendcookie++;
 	return;
 }
 
 static struct syncache *
 syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, 
     struct syncache *sc, struct tcpopt *to, struct tcphdr *th,
     struct socket *so)
 {
 	MD5_CTX ctx;
 	u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
 	u_int32_t data = 0;
 	u_int32_t *secbits;
 	tcp_seq ack, seq;
 	int off, mss, wnd, flags;
 
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * Pull information out of SYN-ACK/ACK and
 	 * revert sequence number advances.
 	 */
 	ack = th->th_ack - 1;
 	seq = th->th_seq - 1;
 	off = (ack >> 1) & 0x7;
 	mss = (ack >> 4) & 0x7;
 	flags = ack & 0x7f;
 
 	/* Which of the two secrets to use. */
 	secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even;
 
 	/*
 	 * The secret wasn't updated for the lifetime of a syncookie,
 	 * so this SYN-ACK/ACK is either too old (replay) or totally bogus.
 	 */
 	if (sch->sch_reseed < time_uptime) {
 		return (NULL);
 	}
 
 	/* Recompute the digest so we can compare it. */
 	MD5Init(&ctx);
 	MD5Update(&ctx, ((u_int8_t *)secbits) + off,
 	    SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
 	MD5Update(&ctx, secbits, off);
 	MD5Update(&ctx, inc, sizeof(*inc));
 	MD5Update(&ctx, &seq, sizeof(seq));
 	MD5Update(&ctx, &flags, sizeof(flags));
 	MD5Final((u_int8_t *)&md5_buffer, &ctx);
 
 	/* Does the digest part of or ACK'ed ISS match? */
 	if ((ack & (~0x7f)) != (md5_buffer[0] << 7))
 		return (NULL);
 
 	/* Does the digest part of our reflected timestamp match? */
 	if (to->to_flags & TOF_TS) {
 		data = md5_buffer[3] ^ to->to_tsecr;
 		if ((data & (~0x3ff)) != (md5_buffer[2] << 10))
 			return (NULL);
 	}
 
 	/* Fill in the syncache values. */
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 	sc->sc_ipopts = NULL;
 	
 	sc->sc_irs = seq;
 	sc->sc_iss = ack;
 
 #ifdef INET6
 	if (inc->inc_isipv6) {
 		if (sotoinpcb(so)->in6p_flags & IN6P_AUTOFLOWLABEL)
 			sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
 	} else
 #endif
 	{
 		sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl;
 		sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos;
 	}
 
 	/* Additional parameters that were encoded in the timestamp. */
 	if (data) {
 		sc->sc_flags |= SCF_TIMESTAMP;
 		sc->sc_tsreflect = to->to_tsval;
 		sc->sc_ts = to->to_tsecr;
 		sc->sc_tsoff = to->to_tsecr - ticks;
 		sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0;
 		sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0;
 		sc->sc_requested_s_scale = min((data >> 2) & 0xf,
 		    TCP_MAX_WINSHIFT);
 		sc->sc_requested_r_scale = min((data >> 6) & 0xf,
 		    TCP_MAX_WINSHIFT);
 		if (sc->sc_requested_s_scale || sc->sc_requested_r_scale)
 			sc->sc_flags |= SCF_WINSCALE;
 	} else
 		sc->sc_flags |= SCF_NOOPT;
 
 	wnd = sbspace(&so->so_rcv);
 	wnd = imax(wnd, 0);
 	wnd = imin(wnd, TCP_MAXWIN);
 	sc->sc_wnd = wnd;
 
 	sc->sc_rxmits = 0;
 	sc->sc_peer_mss = tcp_sc_msstab[mss];
 
 	tcpstat.tcps_sc_recvcookie++;
 	return (sc);
 }
 
 /*
  * Returns the current number of syncache entries.  This number
  * will probably change before you get around to calling 
  * syncache_pcblist.
  */
 
 int
 syncache_pcbcount(void)
 {
 	struct syncache_head *sch;
 	int count, i;
 
 	for (count = 0, i = 0; i < tcp_syncache.hashsize; i++) {
 		/* No need to lock for a read. */
 		sch = &tcp_syncache.hashbase[i];
 		count += sch->sch_length;
 	}
 	return count;
 }
 
 /*
  * Exports the syncache entries to userland so that netstat can display
  * them alongside the other sockets.  This function is intended to be
  * called only from tcp_pcblist.
  *
  * Due to concurrency on an active system, the number of pcbs exported
  * may have no relation to max_pcbs.  max_pcbs merely indicates the
  * amount of space the caller allocated for this function to use.
  */
 int
 syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
 {
 	struct xtcpcb xt;
 	struct syncache *sc;
 	struct syncache_head *sch;
 	int count, error, i;
 
 	for (count = 0, error = 0, i = 0; i < tcp_syncache.hashsize; i++) {
 		sch = &tcp_syncache.hashbase[i];
 		SCH_LOCK(sch);
 		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
 			if (count >= max_pcbs) {
 				SCH_UNLOCK(sch);
 				goto exit;
 			}
 			bzero(&xt, sizeof(xt));
 			xt.xt_len = sizeof(xt);
 			if (sc->sc_inc.inc_isipv6)
 				xt.xt_inp.inp_vflag = INP_IPV6;
 			else
 				xt.xt_inp.inp_vflag = INP_IPV4;
 			bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo));
 			xt.xt_tp.t_inpcb = &xt.xt_inp;
 			xt.xt_tp.t_state = TCPS_SYN_RECEIVED;
 			xt.xt_socket.xso_protocol = IPPROTO_TCP;
 			xt.xt_socket.xso_len = sizeof (struct xsocket);
 			xt.xt_socket.so_type = SOCK_STREAM;
 			xt.xt_socket.so_state = SS_ISCONNECTING;
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 			if (error) {
 				SCH_UNLOCK(sch);
 				goto exit;
 			}
 			count++;
 		}
 		SCH_UNLOCK(sch);
 	}
 exit:
 	*pcbs_exported = count;
 	return error;
 }
Index: head/sys/netinet6/in6.c
===================================================================
--- head/sys/netinet6/in6.c	(revision 178887)
+++ head/sys/netinet6/in6.c	(revision 178888)
@@ -1,2314 +1,2315 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.2 (Berkeley) 11/15/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_pcb.h>
 
 MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "internet multicast address");
 
 /*
  * Definitions of some costant IP6 addresses.
  */
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_nodelocal_allnodes =
 	IN6ADDR_NODELOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allnodes =
 	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters =
 	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
 
 const struct in6_addr in6mask0 = IN6MASK0;
 const struct in6_addr in6mask32 = IN6MASK32;
 const struct in6_addr in6mask64 = IN6MASK64;
 const struct in6_addr in6mask96 = IN6MASK96;
 const struct in6_addr in6mask128 = IN6MASK128;
 
 const struct sockaddr_in6 sa6_any =
 	{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };
 
 static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t,
 	struct ifnet *, struct thread *));
 static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *,
 	struct sockaddr_in6 *, int));
 static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
 
 struct in6_multihead in6_multihead;	/* XXX BSS initialization */
 int	(*faithprefix_p)(struct in6_addr *);
 
 /*
  * Subroutine for in6_ifaddloop() and in6_ifremloop().
  * This routine does actual work.
  */
 static void
 in6_ifloop_request(int cmd, struct ifaddr *ifa)
 {
 	struct sockaddr_in6 all1_sa;
 	struct rtentry *nrt = NULL;
 	int e;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	bzero(&all1_sa, sizeof(all1_sa));
 	all1_sa.sin6_family = AF_INET6;
 	all1_sa.sin6_len = sizeof(struct sockaddr_in6);
 	all1_sa.sin6_addr = in6mask128;
 
 	/*
 	 * We specify the address itself as the gateway, and set the
 	 * RTF_LLINFO flag, so that the corresponding host route would have
 	 * the flag, and thus applications that assume traditional behavior
 	 * would be happy.  Note that we assume the caller of the function
 	 * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest,
 	 * which changes the outgoing interface to the loopback interface.
 	 */
 	e = rtrequest(cmd, ifa->ifa_addr, ifa->ifa_addr,
 	    (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt);
 	if (e != 0) {
 		/* XXX need more descriptive message */
 
 		log(LOG_ERR, "in6_ifloop_request: "
 		    "%s operation failed for %s (errno=%d)\n",
 		    cmd == RTM_ADD ? "ADD" : "DELETE",
 		    ip6_sprintf(ip6buf,
 			    &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr), e);
 	}
 
 	/*
 	 * Report the addition/removal of the address to the routing socket.
 	 * XXX: since we called rtinit for a p2p interface with a destination,
 	 *      we end up reporting twice in such a case.  Should we rather
 	 *      omit the second report?
 	 */
 	if (nrt) {
 		RT_LOCK(nrt);
 		/*
 		 * Make sure rt_ifa be equal to IFA, the second argument of
 		 * the function.  We need this because when we refer to
 		 * rt_ifa->ia6_flags in ip6_input, we assume that the rt_ifa
 		 * points to the address instead of the loopback address.
 		 */
 		if (cmd == RTM_ADD && ifa != nrt->rt_ifa) {
 			IFAFREE(nrt->rt_ifa);
 			IFAREF(ifa);
 			nrt->rt_ifa = ifa;
 		}
 
 		rt_newaddrmsg(cmd, ifa, e, nrt);
 		if (cmd == RTM_DELETE)
 			RTFREE_LOCKED(nrt);
 		else {
 			/* the cmd must be RTM_ADD here */
 			RT_REMREF(nrt);
 			RT_UNLOCK(nrt);
 		}
 	}
 }
 
 /*
  * Add ownaddr as loopback rtentry.  We previously add the route only if
  * necessary (ex. on a p2p link).  However, since we now manage addresses
  * separately from prefixes, we should always add the route.  We can't
  * rely on the cloning mechanism from the corresponding interface route
  * any more.
  */
 void
 in6_ifaddloop(struct ifaddr *ifa)
 {
 	struct rtentry *rt;
 	int need_loop;
 
 	/* If there is no loopback entry, allocate one. */
 	rt = rtalloc1(ifa->ifa_addr, 0, 0);
 	need_loop = (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
 	    (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0);
 	if (rt)
 		RTFREE_LOCKED(rt);
 	if (need_loop)
 		in6_ifloop_request(RTM_ADD, ifa);
 }
 
 /*
  * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(),
  * if it exists.
  */
 void
 in6_ifremloop(struct ifaddr *ifa)
 {
 	struct in6_ifaddr *ia;
 	struct rtentry *rt;
 	int ia_count = 0;
 
 	/*
 	 * Some of BSD variants do not remove cloned routes
 	 * from an interface direct route, when removing the direct route
 	 * (see comments in net/net_osdep.h).  Even for variants that do remove
 	 * cloned routes, they could fail to remove the cloned routes when
 	 * we handle multple addresses that share a common prefix.
 	 * So, we should remove the route corresponding to the deleted address.
 	 */
 
 	/*
 	 * Delete the entry only if exact one ifa exists.  More than one ifa
 	 * can exist if we assign a same single address to multiple
 	 * (probably p2p) interfaces.
 	 * XXX: we should avoid such a configuration in IPv6...
 	 */
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
 		if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) {
 			ia_count++;
 			if (ia_count > 1)
 				break;
 		}
 	}
 
 	if (ia_count == 1) {
 		/*
 		 * Before deleting, check if a corresponding loopbacked host
 		 * route surely exists.  With this check, we can avoid to
 		 * delete an interface direct route whose destination is same
 		 * as the address being removed.  This can happen when removing
 		 * a subnet-router anycast address on an interface attahced
 		 * to a shared medium.
 		 */
 		rt = rtalloc1(ifa->ifa_addr, 0, 0);
 		if (rt != NULL) {
 			if ((rt->rt_flags & RTF_HOST) != 0 &&
 			    (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
 				RTFREE_LOCKED(rt);
 				in6_ifloop_request(RTM_DELETE, ifa);
 			} else
 				RT_UNLOCK(rt);
 		}
 	}
 }
 
 int
 in6_mask2len(struct in6_addr *mask, u_char *lim0)
 {
 	int x = 0, y;
 	u_char *lim = lim0, *p;
 
 	/* ignore the scope_id part */
 	if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
 		lim = (u_char *)mask + sizeof(*mask);
 	for (p = (u_char *)mask; p < lim; x++, p++) {
 		if (*p != 0xff)
 			break;
 	}
 	y = 0;
 	if (p < lim) {
 		for (y = 0; y < 8; y++) {
 			if ((*p & (0x80 >> y)) == 0)
 				break;
 		}
 	}
 
 	/*
 	 * when the limit pointer is given, do a stricter check on the
 	 * remaining bits.
 	 */
 	if (p < lim) {
 		if (y != 0 && (*p & (0x00ff >> y)) != 0)
 			return (-1);
 		for (p = p + 1; p < lim; p++)
 			if (*p != 0)
 				return (-1);
 	}
 
 	return x * 8 + y;
 }
 
 #define ifa2ia6(ifa)	((struct in6_ifaddr *)(ifa))
 #define ia62ifa(ia6)	(&((ia6)->ia_ifa))
 
 int
 in6_control(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 	struct	in6_ifreq *ifr = (struct in6_ifreq *)data;
 	struct	in6_ifaddr *ia = NULL;
 	struct	in6_aliasreq *ifra = (struct in6_aliasreq *)data;
 	struct sockaddr_in6 *sa6;
 	int error;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 	case SIOCGETMIFCNT_IN6:
 		return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
 	}
 
 	switch(cmd) {
 	case SIOCAADDRCTL_POLICY:
 	case SIOCDADDRCTL_POLICY:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ADDRCTRL6);
 			if (error)
 				return (error);
 		}
 		return (in6_src_ioctl(cmd, data));
 	}
 
 	if (ifp == NULL)
 		return (EOPNOTSUPP);
 
 	switch (cmd) {
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCSDEFIFACE_IN6:
 	case SIOCSIFINFO_FLAGS:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ND6);
 			if (error)
 				return (error);
 		}
 		/* FALLTHROUGH */
 	case OSIOCGIFINFO_IN6:
 	case SIOCGIFINFO_IN6:
 	case SIOCSIFINFO_IN6:
 	case SIOCGDRLST_IN6:
 	case SIOCGPRLST_IN6:
 	case SIOCGNBRINFO_IN6:
 	case SIOCGDEFIFACE_IN6:
 		return (nd6_ioctl(cmd, data, ifp));
 	}
 
 	switch (cmd) {
 	case SIOCSIFPREFIX_IN6:
 	case SIOCDIFPREFIX_IN6:
 	case SIOCAIFPREFIX_IN6:
 	case SIOCCIFPREFIX_IN6:
 	case SIOCSGIFPREFIX_IN6:
 	case SIOCGIFPREFIX_IN6:
 		log(LOG_NOTICE,
 		    "prefix ioctls are now invalidated. "
 		    "please use ifconfig.\n");
 		return (EOPNOTSUPP);
 	}
 
 	switch (cmd) {
 	case SIOCSSCOPE6:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_SCOPE6);
 			if (error)
 				return (error);
 		}
 		return (scope6_set(ifp,
 		    (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
 	case SIOCGSCOPE6:
 		return (scope6_get(ifp,
 		    (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
 	case SIOCGSCOPE6DEF:
 		return (scope6_get_default((struct scope6_id *)
 		    ifr->ifr_ifru.ifru_scope_id));
 	}
 
 	switch (cmd) {
 	case SIOCALIFADDR:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NET_ADDIFADDR);
 			if (error)
 				return (error);
 		}
 		return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
 
 	case SIOCDLIFADDR:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NET_DELIFADDR);
 			if (error)
 				return (error);
 		}
 		/* FALLTHROUGH */
 	case SIOCGLIFADDR:
 		return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
 	}
 
 	/*
 	 * Find address for this interface, if it exists.
 	 *
 	 * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
 	 * only, and used the first interface address as the target of other
 	 * operations (without checking ifra_addr).  This was because netinet
 	 * code/API assumed at most 1 interface address per interface.
 	 * Since IPv6 allows a node to assign multiple addresses
 	 * on a single interface, we almost always look and check the
 	 * presence of ifra_addr, and reject invalid ones here.
 	 * It also decreases duplicated code among SIOC*_IN6 operations.
 	 */
 	switch (cmd) {
 	case SIOCAIFADDR_IN6:
 	case SIOCSIFPHYADDR_IN6:
 		sa6 = &ifra->ifra_addr;
 		break;
 	case SIOCSIFADDR_IN6:
 	case SIOCGIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCDIFADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 	case SIOCGIFAFLAG_IN6:
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCGIFALIFETIME_IN6:
 	case SIOCSIFALIFETIME_IN6:
 	case SIOCGIFSTAT_IN6:
 	case SIOCGIFSTAT_ICMP6:
 		sa6 = &ifr->ifr_addr;
 		break;
 	default:
 		sa6 = NULL;
 		break;
 	}
 	if (sa6 && sa6->sin6_family == AF_INET6) {
 		int error = 0;
 
 		if (sa6->sin6_scope_id != 0)
 			error = sa6_embedscope(sa6, 0);
 		else
 			error = in6_setscope(&sa6->sin6_addr, ifp, NULL);
 		if (error != 0)
 			return (error);
 		ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
 	} else
 		ia = NULL;
 
 	switch (cmd) {
 	case SIOCSIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 		/*
 		 * Since IPv6 allows a node to assign multiple addresses
 		 * on a single interface, SIOCSIFxxx ioctls are deprecated.
 		 */
 		/* we decided to obsolete this command (20000704) */
 		return (EINVAL);
 
 	case SIOCDIFADDR_IN6:
 		/*
 		 * for IPv4, we look for existing in_ifaddr here to allow
 		 * "ifconfig if0 delete" to remove the first IPv4 address on
 		 * the interface.  For IPv6, as the spec allows multiple
 		 * interface address from the day one, we consider "remove the
 		 * first one" semantics to be not preferable.
 		 */
 		if (ia == NULL)
 			return (EADDRNOTAVAIL);
 		/* FALLTHROUGH */
 	case SIOCAIFADDR_IN6:
 		/*
 		 * We always require users to specify a valid IPv6 address for
 		 * the corresponding operation.
 		 */
 		if (ifra->ifra_addr.sin6_family != AF_INET6 ||
 		    ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6))
 			return (EAFNOSUPPORT);
 
 		if (td != NULL) {
 			error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ? 
 			    PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
 			if (error)
 				return (error);
 		}
 
 		break;
 
 	case SIOCGIFADDR_IN6:
 		/* This interface is basically deprecated. use SIOCGIFCONF. */
 		/* FALLTHROUGH */
 	case SIOCGIFAFLAG_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFALIFETIME_IN6:
 		/* must think again about its semantics */
 		if (ia == NULL)
 			return (EADDRNOTAVAIL);
 		break;
 	case SIOCSIFALIFETIME_IN6:
 	    {
 		struct in6_addrlifetime *lt;
 
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ALIFETIME6);
 			if (error)
 				return (error);
 		}
 		if (ia == NULL)
 			return (EADDRNOTAVAIL);
 		/* sanity for overflow - beware unsigned */
 		lt = &ifr->ifr_ifru.ifru_lifetime;
 		if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME &&
 		    lt->ia6t_vltime + time_second < time_second) {
 			return EINVAL;
 		}
 		if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME &&
 		    lt->ia6t_pltime + time_second < time_second) {
 			return EINVAL;
 		}
 		break;
 	    }
 	}
 
 	switch (cmd) {
 
 	case SIOCGIFADDR_IN6:
 		ifr->ifr_addr = ia->ia_addr;
 		if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0)
 			return (error);
 		break;
 
 	case SIOCGIFDSTADDR_IN6:
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 			return (EINVAL);
 		/*
 		 * XXX: should we check if ifa_dstaddr is NULL and return
 		 * an error?
 		 */
 		ifr->ifr_dstaddr = ia->ia_dstaddr;
 		if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0)
 			return (error);
 		break;
 
 	case SIOCGIFNETMASK_IN6:
 		ifr->ifr_addr = ia->ia_prefixmask;
 		break;
 
 	case SIOCGIFAFLAG_IN6:
 		ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
 		break;
 
 	case SIOCGIFSTAT_IN6:
 		if (ifp == NULL)
 			return EINVAL;
 		bzero(&ifr->ifr_ifru.ifru_stat,
 		    sizeof(ifr->ifr_ifru.ifru_stat));
 		ifr->ifr_ifru.ifru_stat =
 		    *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat;
 		break;
 
 	case SIOCGIFSTAT_ICMP6:
 		if (ifp == NULL)
 			return EINVAL;
 		bzero(&ifr->ifr_ifru.ifru_icmp6stat,
 		    sizeof(ifr->ifr_ifru.ifru_icmp6stat));
 		ifr->ifr_ifru.ifru_icmp6stat =
 		    *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat;
 		break;
 
 	case SIOCGIFALIFETIME_IN6:
 		ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime;
 		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 			time_t maxexpire;
 			struct in6_addrlifetime *retlt =
 			    &ifr->ifr_ifru.ifru_lifetime;
 
 			/*
 			 * XXX: adjust expiration time assuming time_t is
 			 * signed.
 			 */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (ia->ia6_lifetime.ia6t_vltime <
 			    maxexpire - ia->ia6_updatetime) {
 				retlt->ia6t_expire = ia->ia6_updatetime +
 				    ia->ia6_lifetime.ia6t_vltime;
 			} else
 				retlt->ia6t_expire = maxexpire;
 		}
 		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 			time_t maxexpire;
 			struct in6_addrlifetime *retlt =
 			    &ifr->ifr_ifru.ifru_lifetime;
 
 			/*
 			 * XXX: adjust expiration time assuming time_t is
 			 * signed.
 			 */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (ia->ia6_lifetime.ia6t_pltime <
 			    maxexpire - ia->ia6_updatetime) {
 				retlt->ia6t_preferred = ia->ia6_updatetime +
 				    ia->ia6_lifetime.ia6t_pltime;
 			} else
 				retlt->ia6t_preferred = maxexpire;
 		}
 		break;
 
 	case SIOCSIFALIFETIME_IN6:
 		ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime;
 		/* for sanity */
 		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 			ia->ia6_lifetime.ia6t_expire =
 				time_second + ia->ia6_lifetime.ia6t_vltime;
 		} else
 			ia->ia6_lifetime.ia6t_expire = 0;
 		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 			ia->ia6_lifetime.ia6t_preferred =
 				time_second + ia->ia6_lifetime.ia6t_pltime;
 		} else
 			ia->ia6_lifetime.ia6t_preferred = 0;
 		break;
 
 	case SIOCAIFADDR_IN6:
 	{
 		int i, error = 0;
 		struct nd_prefixctl pr0;
 		struct nd_prefix *pr;
 
 		/*
 		 * first, make or update the interface address structure,
 		 * and link it to the list.
 		 */
 		if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0)
 			return (error);
 		if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
 		    == NULL) {
 			/*
 			 * this can happen when the user specify the 0 valid
 			 * lifetime.
 			 */
 			break;
 		}
 
 		/*
 		 * then, make the prefix on-link on the interface.
 		 * XXX: we'd rather create the prefix before the address, but
 		 * we need at least one address to install the corresponding
 		 * interface route, so we configure the address first.
 		 */
 
 		/*
 		 * convert mask to prefix length (prefixmask has already
 		 * been validated in in6_update_ifa().
 		 */
 		bzero(&pr0, sizeof(pr0));
 		pr0.ndpr_ifp = ifp;
 		pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    NULL);
 		if (pr0.ndpr_plen == 128) {
 			break;	/* we don't need to install a host route. */
 		}
 		pr0.ndpr_prefix = ifra->ifra_addr;
 		/* apply the mask for safety. */
 		for (i = 0; i < 4; i++) {
 			pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
 			    ifra->ifra_prefixmask.sin6_addr.s6_addr32[i];
 		}
 		/*
 		 * XXX: since we don't have an API to set prefix (not address)
 		 * lifetimes, we just use the same lifetimes as addresses.
 		 * The (temporarily) installed lifetimes can be overridden by
 		 * later advertised RAs (when accept_rtadv is non 0), which is
 		 * an intended behavior.
 		 */
 		pr0.ndpr_raf_onlink = 1; /* should be configurable? */
 		pr0.ndpr_raf_auto =
 		    ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0);
 		pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime;
 		pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime;
 
 		/* add the prefix if not yet. */
 		if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
 			/*
 			 * nd6_prelist_add will install the corresponding
 			 * interface route.
 			 */
 			if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
 				return (error);
 			if (pr == NULL) {
 				log(LOG_ERR, "nd6_prelist_add succeeded but "
 				    "no prefix\n");
 				return (EINVAL); /* XXX panic here? */
 			}
 		}
 
 		/* relate the address to the prefix */
 		if (ia->ia6_ndpr == NULL) {
 			ia->ia6_ndpr = pr;
 			pr->ndpr_refcnt++;
 
 			/*
 			 * If this is the first autoconf address from the
 			 * prefix, create a temporary address as well
 			 * (when required).
 			 */
 			if ((ia->ia6_flags & IN6_IFF_AUTOCONF) &&
 			    ip6_use_tempaddr && pr->ndpr_refcnt == 1) {
 				int e;
 				if ((e = in6_tmpifadd(ia, 1, 0)) != 0) {
 					log(LOG_NOTICE, "in6_control: failed "
 					    "to create a temporary address, "
 					    "errno=%d\n", e);
 				}
 			}
 		}
 
 		/*
 		 * this might affect the status of autoconfigured addresses,
 		 * that is, this address might make other addresses detached.
 		 */
 		pfxlist_onlink_check();
 		if (error == 0 && ia)
 			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}
 
 	case SIOCDIFADDR_IN6:
 	{
 		struct nd_prefix *pr;
 
 		/*
 		 * If the address being deleted is the only one that owns
 		 * the corresponding prefix, expire the prefix as well.
 		 * XXX: theoretically, we don't have to worry about such
 		 * relationship, since we separate the address management
 		 * and the prefix management.  We do this, however, to provide
 		 * as much backward compatibility as possible in terms of
 		 * the ioctl operation.
 		 * Note that in6_purgeaddr() will decrement ndpr_refcnt.
 		 */
 		pr = ia->ia6_ndpr;
 		in6_purgeaddr(&ia->ia_ifa);
 		if (pr && pr->ndpr_refcnt == 0)
 			prelist_remove(pr);
 		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}
 
 	default:
 		if (ifp == NULL || ifp->if_ioctl == 0)
 			return (EOPNOTSUPP);
 		return ((*ifp->if_ioctl)(ifp, cmd, data));
 	}
 
 	return (0);
 }
 
 /*
  * Update parameters of an IPv6 interface address.
  * If necessary, a new entry is created and linked into address chains.
  * This function is separated from in6_control().
  * XXX: should this be performed under splnet()?
  */
 int
 in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	int error = 0, hostIsNew = 0, plen = -1;
 	struct in6_ifaddr *oia;
 	struct sockaddr_in6 dst6;
 	struct in6_addrlifetime *lt;
 	struct in6_multi_mship *imm;
 	struct in6_multi *in6m_sol;
 	struct rtentry *rt;
 	int delay;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* Validate parameters */
 	if (ifp == NULL || ifra == NULL) /* this maybe redundant */
 		return (EINVAL);
 
 	/*
 	 * The destination address for a p2p link must have a family
 	 * of AF_UNSPEC or AF_INET6.
 	 */
 	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
 		return (EAFNOSUPPORT);
 	/*
 	 * validate ifra_prefixmask.  don't check sin6_family, netmask
 	 * does not carry fields other than sin6_len.
 	 */
 	if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
 		return (EINVAL);
 	/*
 	 * Because the IPv6 address architecture is classless, we require
 	 * users to specify a (non 0) prefix length (mask) for a new address.
 	 * We also require the prefix (when specified) mask is valid, and thus
 	 * reject a non-consecutive mask.
 	 */
 	if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0)
 		return (EINVAL);
 	if (ifra->ifra_prefixmask.sin6_len != 0) {
 		plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    (u_char *)&ifra->ifra_prefixmask +
 		    ifra->ifra_prefixmask.sin6_len);
 		if (plen <= 0)
 			return (EINVAL);
 	} else {
 		/*
 		 * In this case, ia must not be NULL.  We just use its prefix
 		 * length.
 		 */
 		plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
 	}
 	/*
 	 * If the destination address on a p2p interface is specified,
 	 * and the address is a scoped one, validate/set the scope
 	 * zone identifier.
 	 */
 	dst6 = ifra->ifra_dstaddr;
 	if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 &&
 	    (dst6.sin6_family == AF_INET6)) {
 		struct in6_addr in6_tmp;
 		u_int32_t zoneid;
 
 		in6_tmp = dst6.sin6_addr;
 		if (in6_setscope(&in6_tmp, ifp, &zoneid))
 			return (EINVAL); /* XXX: should be impossible */
 
 		if (dst6.sin6_scope_id != 0) {
 			if (dst6.sin6_scope_id != zoneid)
 				return (EINVAL);
 		} else		/* user omit to specify the ID. */
 			dst6.sin6_scope_id = zoneid;
 
 		/* convert into the internal form */
 		if (sa6_embedscope(&dst6, 0))
 			return (EINVAL); /* XXX: should be impossible */
 	}
 	/*
 	 * The destination address can be specified only for a p2p or a
 	 * loopback interface.  If specified, the corresponding prefix length
 	 * must be 128.
 	 */
 	if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
 		if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) {
 			/* XXX: noisy message */
 			nd6log((LOG_INFO, "in6_update_ifa: a destination can "
 			    "be specified for a p2p or a loopback IF only\n"));
 			return (EINVAL);
 		}
 		if (plen != 128) {
 			nd6log((LOG_INFO, "in6_update_ifa: prefixlen should "
 			    "be 128 when dstaddr is specified\n"));
 			return (EINVAL);
 		}
 	}
 	/* lifetime consistency check */
 	lt = &ifra->ifra_lifetime;
 	if (lt->ia6t_pltime > lt->ia6t_vltime)
 		return (EINVAL);
 	if (lt->ia6t_vltime == 0) {
 		/*
 		 * the following log might be noisy, but this is a typical
 		 * configuration mistake or a tool's bug.
 		 */
 		nd6log((LOG_INFO,
 		    "in6_update_ifa: valid lifetime is 0 for %s\n",
 		    ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr)));
 
 		if (ia == NULL)
 			return (0); /* there's nothing to do */
 	}
 
 	/*
 	 * If this is a new address, allocate a new ifaddr and link it
 	 * into chains.
 	 */
 	if (ia == NULL) {
 		hostIsNew = 1;
 		/*
 		 * When in6_update_ifa() is called in a process of a received
 		 * RA, it is called under an interrupt context.  So, we should
 		 * call malloc with M_NOWAIT.
 		 */
 		ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR,
 		    M_NOWAIT);
 		if (ia == NULL)
 			return (ENOBUFS);
 		bzero((caddr_t)ia, sizeof(*ia));
 		LIST_INIT(&ia->ia6_memberships);
 		/* Initialize the address and masks, and put time stamp */
 		IFA_LOCK_INIT(&ia->ia_ifa);
 		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
 		ia->ia_addr.sin6_family = AF_INET6;
 		ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
 		ia->ia6_createtime = time_second;
 		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
 			/*
 			 * XXX: some functions expect that ifa_dstaddr is not
 			 * NULL for p2p interfaces.
 			 */
 			ia->ia_ifa.ifa_dstaddr =
 			    (struct sockaddr *)&ia->ia_dstaddr;
 		} else {
 			ia->ia_ifa.ifa_dstaddr = NULL;
 		}
 		ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
 
 		ia->ia_ifp = ifp;
 		if ((oia = in6_ifaddr) != NULL) {
 			for ( ; oia->ia_next; oia = oia->ia_next)
 				continue;
 			oia->ia_next = ia;
 		} else
 			in6_ifaddr = ia;
 
 		ia->ia_ifa.ifa_refcnt = 1;
 		TAILQ_INSERT_TAIL(&ifp->if_addrlist, &ia->ia_ifa, ifa_list);
 	}
 
 	/* update timestamp */
 	ia->ia6_updatetime = time_second;
 
 	/* set prefix mask */
 	if (ifra->ifra_prefixmask.sin6_len) {
 		/*
 		 * We prohibit changing the prefix length of an existing
 		 * address, because
 		 * + such an operation should be rare in IPv6, and
 		 * + the operation would confuse prefix management.
 		 */
 		if (ia->ia_prefixmask.sin6_len &&
 		    in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
 			nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an"
 			    " existing (%s) address should not be changed\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 			error = EINVAL;
 			goto unlink;
 		}
 		ia->ia_prefixmask = ifra->ifra_prefixmask;
 	}
 
 	/*
 	 * If a new destination address is specified, scrub the old one and
 	 * install the new destination.  Note that the interface must be
 	 * p2p or loopback (see the check above.)
 	 */
 	if (dst6.sin6_family == AF_INET6 &&
 	    !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
 		int e;
 
 		if ((ia->ia_flags & IFA_ROUTE) != 0 &&
 		    (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) {
 			nd6log((LOG_ERR, "in6_update_ifa: failed to remove "
 			    "a route to the old destination: %s\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 			/* proceed anyway... */
 		} else
 			ia->ia_flags &= ~IFA_ROUTE;
 		ia->ia_dstaddr = dst6;
 	}
 
 	/*
 	 * Set lifetimes.  We do not refer to ia6t_expire and ia6t_preferred
 	 * to see if the address is deprecated or invalidated, but initialize
 	 * these members for applications.
 	 */
 	ia->ia6_lifetime = ifra->ifra_lifetime;
 	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_expire =
 		    time_second + ia->ia6_lifetime.ia6t_vltime;
 	} else
 		ia->ia6_lifetime.ia6t_expire = 0;
 	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_preferred =
 		    time_second + ia->ia6_lifetime.ia6t_pltime;
 	} else
 		ia->ia6_lifetime.ia6t_preferred = 0;
 
 	/* reset the interface and routing table appropriately. */
 	if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0)
 		goto unlink;
 
 	/*
 	 * configure address flags.
 	 */
 	ia->ia6_flags = ifra->ifra_flags;
 	/*
 	 * backward compatibility - if IN6_IFF_DEPRECATED is set from the
 	 * userland, make it deprecated.
 	 */
 	if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
 		ia->ia6_lifetime.ia6t_pltime = 0;
 		ia->ia6_lifetime.ia6t_preferred = time_second;
 	}
 	/*
 	 * Make the address tentative before joining multicast addresses,
 	 * so that corresponding MLD responses would not have a tentative
 	 * source address.
 	 */
 	ia->ia6_flags &= ~IN6_IFF_DUPLICATED;	/* safety */
 	if (hostIsNew && in6if_do_dad(ifp))
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 
 	/*
 	 * We are done if we have simply modified an existing address.
 	 */
 	if (!hostIsNew)
 		return (error);
 
 	/*
 	 * Beyond this point, we should call in6_purgeaddr upon an error,
 	 * not just go to unlink.
 	 */
 
 	/* Join necessary multicast groups */
 	in6m_sol = NULL;
 	if ((ifp->if_flags & IFF_MULTICAST) != 0) {
 		struct sockaddr_in6 mltaddr, mltmask;
 		struct in6_addr llsol;
 
 		/* join solicited multicast addr for new host id */
 		bzero(&llsol, sizeof(struct in6_addr));
 		llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
 		llsol.s6_addr32[1] = 0;
 		llsol.s6_addr32[2] = htonl(1);
 		llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
 		llsol.s6_addr8[12] = 0xff;
 		if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) {
 			/* XXX: should not happen */
 			log(LOG_ERR, "in6_update_ifa: "
 			    "in6_setscope failed\n");
 			goto cleanup;
 		}
 		delay = 0;
 		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 			/*
 			 * We need a random delay for DAD on the address
 			 * being configured.  It also means delaying
 			 * transmission of the corresponding MLD report to
 			 * avoid report collision.
 			 * [draft-ietf-ipv6-rfc2462bis-02.txt]
 			 */
 			delay = arc4random() %
 			    (MAX_RTR_SOLICITATION_DELAY * hz);
 		}
 		imm = in6_joingroup(ifp, &llsol, &error, delay);
 		if (imm == NULL) {
 			nd6log((LOG_WARNING,
 			    "in6_update_ifa: addmulti failed for "
 			    "%s on %s (errno=%d)\n",
 			    ip6_sprintf(ip6buf, &llsol), if_name(ifp),
 			    error));
 			in6_purgeaddr((struct ifaddr *)ia);
 			return (error);
 		}
 		LIST_INSERT_HEAD(&ia->ia6_memberships,
 		    imm, i6mm_chain);
 		in6m_sol = imm->i6mm_maddr;
 
 		bzero(&mltmask, sizeof(mltmask));
 		mltmask.sin6_len = sizeof(struct sockaddr_in6);
 		mltmask.sin6_family = AF_INET6;
 		mltmask.sin6_addr = in6mask32;
 #define	MLTMASK_LEN  4	/* mltmask's masklen (=32bit=4octet) */
 
 		/*
 		 * join link-local all-nodes address
 		 */
 		bzero(&mltaddr, sizeof(mltaddr));
 		mltaddr.sin6_len = sizeof(struct sockaddr_in6);
 		mltaddr.sin6_family = AF_INET6;
 		mltaddr.sin6_addr = in6addr_linklocal_allnodes;
 		if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) !=
 		    0)
 			goto cleanup; /* XXX: should not fail */
 
 		/*
 		 * XXX: do we really need this automatic routes?
 		 * We should probably reconsider this stuff.  Most applications
 		 * actually do not need the routes, since they usually specify
 		 * the outgoing interface.
 		 */
 		rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL);
 		if (rt) {
 			/* XXX: only works in !SCOPEDROUTING case. */
 			if (memcmp(&mltaddr.sin6_addr,
 			    &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
 			    MLTMASK_LEN)) {
 				RTFREE_LOCKED(rt);
 				rt = NULL;
 			}
 		}
 		if (!rt) {
 			/* XXX: we need RTF_CLONING to fake nd6_rtrequest */
 			error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
 			    (struct sockaddr *)&ia->ia_addr,
 			    (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING,
 			    (struct rtentry **)0);
 			if (error)
 				goto cleanup;
 		} else {
 			RTFREE_LOCKED(rt);
 		}
 
 		imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
 		if (!imm) {
 			nd6log((LOG_WARNING,
 			    "in6_update_ifa: addmulti failed for "
 			    "%s on %s (errno=%d)\n",
 			    ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
 			    if_name(ifp), error));
 			goto cleanup;
 		}
 		LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 
 		/*
 		 * join node information group address
 		 */
 #define hostnamelen	strlen(hostname)
 		delay = 0;
 		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 			/*
 			 * The spec doesn't say anything about delay for this
 			 * group, but the same logic should apply.
 			 */
 			delay = arc4random() %
 			    (MAX_RTR_SOLICITATION_DELAY * hz);
 		}
 		if (in6_nigroup(ifp, hostname, hostnamelen, &mltaddr.sin6_addr)
 		    == 0) {
 			imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error,
 			    delay); /* XXX jinmei */
 			if (!imm) {
 				nd6log((LOG_WARNING, "in6_update_ifa: "
 				    "addmulti failed for %s on %s "
 				    "(errno=%d)\n",
 				    ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
 				    if_name(ifp), error));
 				/* XXX not very fatal, go on... */
 			} else {
 				LIST_INSERT_HEAD(&ia->ia6_memberships,
 				    imm, i6mm_chain);
 			}
 		}
 #undef hostnamelen
 
 		/*
 		 * join interface-local all-nodes address.
 		 * (ff01::1%ifN, and ff01::%ifN/32)
 		 */
 		mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
 		if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL))
 		    != 0)
 			goto cleanup; /* XXX: should not fail */
 		/* XXX: again, do we really need the route? */
 		rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL);
 		if (rt) {
 			if (memcmp(&mltaddr.sin6_addr,
 			    &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
 			    MLTMASK_LEN)) {
 				RTFREE_LOCKED(rt);
 				rt = NULL;
 			}
 		}
 		if (!rt) {
 			error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
 			    (struct sockaddr *)&ia->ia_addr,
 			    (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING,
 			    (struct rtentry **)0);
 			if (error)
 				goto cleanup;
 		} else
 			RTFREE_LOCKED(rt);
 
 		imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
 		if (!imm) {
 			nd6log((LOG_WARNING, "in6_update_ifa: "
 			    "addmulti failed for %s on %s "
 			    "(errno=%d)\n",
 			    ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
 			    if_name(ifp), error));
 			goto cleanup;
 		}
 		LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 #undef	MLTMASK_LEN
 	}
 
 	/*
 	 * Perform DAD, if needed.
 	 * XXX It may be of use, if we can administratively
 	 * disable DAD.
 	 */
 	if (hostIsNew && in6if_do_dad(ifp) &&
 	    ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
 	    (ia->ia6_flags & IN6_IFF_TENTATIVE))
 	{
 		int mindelay, maxdelay;
 
 		delay = 0;
 		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 			/*
 			 * We need to impose a delay before sending an NS
 			 * for DAD.  Check if we also needed a delay for the
 			 * corresponding MLD message.  If we did, the delay
 			 * should be larger than the MLD delay (this could be
 			 * relaxed a bit, but this simple logic is at least
 			 * safe).
 			 */
 			mindelay = 0;
 			if (in6m_sol != NULL &&
 			    in6m_sol->in6m_state == MLD_REPORTPENDING) {
 				mindelay = in6m_sol->in6m_timer;
 			}
 			maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
 			if (maxdelay - mindelay == 0)
 				delay = 0;
 			else {
 				delay =
 				    (arc4random() % (maxdelay - mindelay)) +
 				    mindelay;
 			}
 		}
 		nd6_dad_start((struct ifaddr *)ia, delay);
 	}
 
 	return (error);
 
   unlink:
 	/*
 	 * XXX: if a change of an existing address failed, keep the entry
 	 * anyway.
 	 */
 	if (hostIsNew)
 		in6_unlink_ifa(ia, ifp);
 	return (error);
 
   cleanup:
 	in6_purgeaddr(&ia->ia_ifa);
 	return error;
 }
 
 void
 in6_purgeaddr(struct ifaddr *ifa)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
 	char ip6buf[INET6_ADDRSTRLEN];
 	struct in6_multi_mship *imm;
 
 	/* stop DAD processing */
 	nd6_dad_stop(ifa);
 
 	/*
 	 * delete route to the destination of the address being purged.
 	 * The interface must be p2p or loopback in this case.
 	 */
 	if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) {
 		int e;
 
 		if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST))
 		    != 0) {
 			log(LOG_ERR, "in6_purgeaddr: failed to remove "
 			    "a route to the p2p destination: %s on %s, "
 			    "errno=%d\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			    if_name(ifp), e);
 			/* proceed anyway... */
 		} else
 			ia->ia_flags &= ~IFA_ROUTE;
 	}
 
 	/* Remove ownaddr's loopback rtentry, if it exists. */
 	in6_ifremloop(&(ia->ia_ifa));
 
 	/*
 	 * leave from multicast groups we have joined for the interface
 	 */
 	while ((imm = ia->ia6_memberships.lh_first) != NULL) {
 		LIST_REMOVE(imm, i6mm_chain);
 		in6_leavegroup(imm);
 	}
 
 	in6_unlink_ifa(ia, ifp);
 }
 
 static void
 in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
 {
 	struct in6_ifaddr *oia;
 	int	s = splnet();
 
 	TAILQ_REMOVE(&ifp->if_addrlist, &ia->ia_ifa, ifa_list);
 
 	oia = ia;
 	if (oia == (ia = in6_ifaddr))
 		in6_ifaddr = ia->ia_next;
 	else {
 		while (ia->ia_next && (ia->ia_next != oia))
 			ia = ia->ia_next;
 		if (ia->ia_next)
 			ia->ia_next = oia->ia_next;
 		else {
 			/* search failed */
 			printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n");
 		}
 	}
 
 	/*
 	 * Release the reference to the base prefix.  There should be a
 	 * positive reference.
 	 */
 	if (oia->ia6_ndpr == NULL) {
 		nd6log((LOG_NOTICE,
 		    "in6_unlink_ifa: autoconf'ed address "
 		    "%p has no prefix\n", oia));
 	} else {
 		oia->ia6_ndpr->ndpr_refcnt--;
 		oia->ia6_ndpr = NULL;
 	}
 
 	/*
 	 * Also, if the address being removed is autoconf'ed, call
 	 * pfxlist_onlink_check() since the release might affect the status of
 	 * other (detached) addresses.
 	 */
 	if ((oia->ia6_flags & IN6_IFF_AUTOCONF)) {
 		pfxlist_onlink_check();
 	}
 
 	/*
 	 * release another refcnt for the link from in6_ifaddr.
 	 * Note that we should decrement the refcnt at least once for all *BSD.
 	 */
 	IFAFREE(&oia->ia_ifa);
 
 	splx(s);
 }
 
 void
 in6_purgeif(struct ifnet *ifp)
 {
 	struct ifaddr *ifa, *nifa;
 
 	for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa) {
 		nifa = TAILQ_NEXT(ifa, ifa_list);
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		in6_purgeaddr(ifa);
 	}
 
 	in6_ifdetach(ifp);
 }
 
 /*
  * SIOC[GAD]LIFADDR.
  *	SIOCGLIFADDR: get first address. (?)
  *	SIOCGLIFADDR with IFLR_PREFIX:
  *		get first address that matches the specified prefix.
  *	SIOCALIFADDR: add the specified address.
  *	SIOCALIFADDR with IFLR_PREFIX:
  *		add the specified prefix, filling hostid part from
  *		the first link-local address.  prefixlen must be <= 64.
  *	SIOCDLIFADDR: delete the specified address.
  *	SIOCDLIFADDR with IFLR_PREFIX:
  *		delete the first address that matches the specified prefix.
  * return values:
  *	EINVAL on invalid parameters
  *	EADDRNOTAVAIL on prefix match failed/specified address not found
  *	other values may be returned from in6_ioctl()
  *
  * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64.
  * this is to accomodate address naming scheme other than RFC2374,
  * in the future.
  * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
  * address encoding scheme. (see figure on page 8)
  */
 static int
 in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
 	struct ifaddr *ifa;
 	struct sockaddr *sa;
 
 	/* sanity checks */
 	if (!data || !ifp) {
 		panic("invalid argument to in6_lifaddr_ioctl");
 		/* NOTREACHED */
 	}
 
 	switch (cmd) {
 	case SIOCGLIFADDR:
 		/* address must be specified on GET with IFLR_PREFIX */
 		if ((iflr->flags & IFLR_PREFIX) == 0)
 			break;
 		/* FALLTHROUGH */
 	case SIOCALIFADDR:
 	case SIOCDLIFADDR:
 		/* address must be specified on ADD and DELETE */
 		sa = (struct sockaddr *)&iflr->addr;
 		if (sa->sa_family != AF_INET6)
 			return EINVAL;
 		if (sa->sa_len != sizeof(struct sockaddr_in6))
 			return EINVAL;
 		/* XXX need improvement */
 		sa = (struct sockaddr *)&iflr->dstaddr;
 		if (sa->sa_family && sa->sa_family != AF_INET6)
 			return EINVAL;
 		if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6))
 			return EINVAL;
 		break;
 	default: /* shouldn't happen */
 #if 0
 		panic("invalid cmd to in6_lifaddr_ioctl");
 		/* NOTREACHED */
 #else
 		return EOPNOTSUPP;
 #endif
 	}
 	if (sizeof(struct in6_addr) * 8 < iflr->prefixlen)
 		return EINVAL;
 
 	switch (cmd) {
 	case SIOCALIFADDR:
 	    {
 		struct in6_aliasreq ifra;
 		struct in6_addr *hostid = NULL;
 		int prefixlen;
 
 		if ((iflr->flags & IFLR_PREFIX) != 0) {
 			struct sockaddr_in6 *sin6;
 
 			/*
 			 * hostid is to fill in the hostid part of the
 			 * address.  hostid points to the first link-local
 			 * address attached to the interface.
 			 */
 			ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);
 			if (!ifa)
 				return EADDRNOTAVAIL;
 			hostid = IFA_IN6(ifa);
 
 			/* prefixlen must be <= 64. */
 			if (64 < iflr->prefixlen)
 				return EINVAL;
 			prefixlen = iflr->prefixlen;
 
 			/* hostid part must be zero. */
 			sin6 = (struct sockaddr_in6 *)&iflr->addr;
 			if (sin6->sin6_addr.s6_addr32[2] != 0 ||
 			    sin6->sin6_addr.s6_addr32[3] != 0) {
 				return EINVAL;
 			}
 		} else
 			prefixlen = iflr->prefixlen;
 
 		/* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
 		bzero(&ifra, sizeof(ifra));
 		bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name));
 
 		bcopy(&iflr->addr, &ifra.ifra_addr,
 		    ((struct sockaddr *)&iflr->addr)->sa_len);
 		if (hostid) {
 			/* fill in hostid part */
 			ifra.ifra_addr.sin6_addr.s6_addr32[2] =
 			    hostid->s6_addr32[2];
 			ifra.ifra_addr.sin6_addr.s6_addr32[3] =
 			    hostid->s6_addr32[3];
 		}
 
 		if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */
 			bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
 			    ((struct sockaddr *)&iflr->dstaddr)->sa_len);
 			if (hostid) {
 				ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] =
 				    hostid->s6_addr32[2];
 				ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] =
 				    hostid->s6_addr32[3];
 			}
 		}
 
 		ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 		in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);
 
 		ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX;
 		return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td);
 	    }
 	case SIOCGLIFADDR:
 	case SIOCDLIFADDR:
 	    {
 		struct in6_ifaddr *ia;
 		struct in6_addr mask, candidate, match;
 		struct sockaddr_in6 *sin6;
 		int cmp;
 
 		bzero(&mask, sizeof(mask));
 		if (iflr->flags & IFLR_PREFIX) {
 			/* lookup a prefix rather than address. */
 			in6_prefixlen2mask(&mask, iflr->prefixlen);
 
 			sin6 = (struct sockaddr_in6 *)&iflr->addr;
 			bcopy(&sin6->sin6_addr, &match, sizeof(match));
 			match.s6_addr32[0] &= mask.s6_addr32[0];
 			match.s6_addr32[1] &= mask.s6_addr32[1];
 			match.s6_addr32[2] &= mask.s6_addr32[2];
 			match.s6_addr32[3] &= mask.s6_addr32[3];
 
 			/* if you set extra bits, that's wrong */
 			if (bcmp(&match, &sin6->sin6_addr, sizeof(match)))
 				return EINVAL;
 
 			cmp = 1;
 		} else {
 			if (cmd == SIOCGLIFADDR) {
 				/* on getting an address, take the 1st match */
 				cmp = 0;	/* XXX */
 			} else {
 				/* on deleting an address, do exact match */
 				in6_prefixlen2mask(&mask, 128);
 				sin6 = (struct sockaddr_in6 *)&iflr->addr;
 				bcopy(&sin6->sin6_addr, &match, sizeof(match));
 
 				cmp = 1;
 			}
 		}
 
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			if (!cmp)
 				break;
 
 			/*
 			 * XXX: this is adhoc, but is necessary to allow
 			 * a user to specify fe80::/64 (not /10) for a
 			 * link-local address.
 			 */
 			bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate));
 			in6_clearscope(&candidate);
 			candidate.s6_addr32[0] &= mask.s6_addr32[0];
 			candidate.s6_addr32[1] &= mask.s6_addr32[1];
 			candidate.s6_addr32[2] &= mask.s6_addr32[2];
 			candidate.s6_addr32[3] &= mask.s6_addr32[3];
 			if (IN6_ARE_ADDR_EQUAL(&candidate, &match))
 				break;
 		}
 		if (!ifa)
 			return EADDRNOTAVAIL;
 		ia = ifa2ia6(ifa);
 
 		if (cmd == SIOCGLIFADDR) {
 			int error;
 
 			/* fill in the if_laddrreq structure */
 			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len);
 			error = sa6_recoverscope(
 			    (struct sockaddr_in6 *)&iflr->addr);
 			if (error != 0)
 				return (error);
 
 			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
 				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
 				    ia->ia_dstaddr.sin6_len);
 				error = sa6_recoverscope(
 				    (struct sockaddr_in6 *)&iflr->dstaddr);
 				if (error != 0)
 					return (error);
 			} else
 				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
 
 			iflr->prefixlen =
 			    in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
 
 			iflr->flags = ia->ia6_flags;	/* XXX */
 
 			return 0;
 		} else {
 			struct in6_aliasreq ifra;
 
 			/* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
 			bzero(&ifra, sizeof(ifra));
 			bcopy(iflr->iflr_name, ifra.ifra_name,
 			    sizeof(ifra.ifra_name));
 
 			bcopy(&ia->ia_addr, &ifra.ifra_addr,
 			    ia->ia_addr.sin6_len);
 			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
 				bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
 				    ia->ia_dstaddr.sin6_len);
 			} else {
 				bzero(&ifra.ifra_dstaddr,
 				    sizeof(ifra.ifra_dstaddr));
 			}
 			bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr,
 			    ia->ia_prefixmask.sin6_len);
 
 			ifra.ifra_flags = ia->ia6_flags;
 			return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra,
 			    ifp, td);
 		}
 	    }
 	}
 
 	return EOPNOTSUPP;	/* just for safety */
 }
 
 /*
  * Initialize an interface's intetnet6 address
  * and routing table entry.
  */
 static int
 in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
     struct sockaddr_in6 *sin6, int newhost)
 {
 	int	error = 0, plen, ifacount = 0;
 	int	s = splimp();
 	struct ifaddr *ifa;
 
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 * and to validate the address if necessary.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ifacount++;
 	}
 
 	ia->ia_addr = *sin6;
 
 	if (ifacount <= 1 && ifp->if_ioctl) {
 		IFF_LOCKGIANT(ifp);
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
 		IFF_UNLOCKGIANT(ifp);
 		if (error) {
 			splx(s);
 			return (error);
 		}
 	}
 	splx(s);
 
 	ia->ia_ifa.ifa_metric = ifp->if_metric;
 
 	/* we could do in(6)_socktrim here, but just omit it at this moment. */
 
 	if (newhost) {
 		/*
 		 * set the rtrequest function to create llinfo.  It also
 		 * adjust outgoing interface of the route for the local
 		 * address when called via in6_ifaddloop() below.
 		 */
 		ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
 	}
 
 	/*
 	 * Special case:
 	 * If a new destination address is specified for a point-to-point
 	 * interface, install a route to the destination as an interface
 	 * direct route.  In addition, if the link is expected to have neighbor
 	 * cache entries, specify RTF_LLINFO so that a cache entry for the
 	 * destination address will be created.
 	 * created
 	 * XXX: the logic below rejects assigning multiple addresses on a p2p
 	 * interface that share the same destination.
 	 */
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
 	    ia->ia_dstaddr.sin6_family == AF_INET6) {
 		int rtflags = RTF_UP | RTF_HOST;
 		struct rtentry *rt = NULL, **rtp = NULL;
 
 		if (nd6_need_cache(ifp) != 0) {
 			rtflags |= RTF_LLINFO;
 			rtp = &rt;
 		}
 
-		error = rtrequest(RTM_ADD, (struct sockaddr *)&ia->ia_dstaddr,
+		error = rtrequest(RTM_ADD,
+		    (struct sockaddr *)&ia->ia_dstaddr,
 		    (struct sockaddr *)&ia->ia_addr,
 		    (struct sockaddr *)&ia->ia_prefixmask,
 		    ia->ia_flags | rtflags, rtp);
 		if (error != 0)
 			return (error);
 		if (rt != NULL) {
 			struct llinfo_nd6 *ln;
 
 			RT_LOCK(rt);
 			ln = (struct llinfo_nd6 *)rt->rt_llinfo;
 			if (ln != NULL) {
 				/*
 				 * Set the state to STALE because we don't
 				 * have to perform address resolution on this
 				 * link.
 				 */
 				ln->ln_state = ND6_LLINFO_STALE;
 			}
 			RT_REMREF(rt);
 			RT_UNLOCK(rt);
 		}
 		ia->ia_flags |= IFA_ROUTE;
 	}
 	if (plen < 128) {
 		/*
 		 * The RTF_CLONING flag is necessary for in6_is_ifloop_auto().
 		 */
 		ia->ia_ifa.ifa_flags |= RTF_CLONING;
 	}
 
 	/* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */
 	if (newhost)
 		in6_ifaddloop(&(ia->ia_ifa));
 
 	return (error);
 }
 
 struct in6_multi_mship *
 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
     int *errorp, int delay)
 {
 	struct in6_multi_mship *imm;
 
 	imm = malloc(sizeof(*imm), M_IP6MADDR, M_NOWAIT);
 	if (!imm) {
 		*errorp = ENOBUFS;
 		return NULL;
 	}
 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, delay);
 	if (!imm->i6mm_maddr) {
 		/* *errorp is alrady set */
 		free(imm, M_IP6MADDR);
 		return NULL;
 	}
 	return imm;
 }
 
 int
 in6_leavegroup(struct in6_multi_mship *imm)
 {
 
 	if (imm->i6mm_maddr)
 		in6_delmulti(imm->i6mm_maddr);
 	free(imm,  M_IP6MADDR);
 	return 0;
 }
 
 /*
  * Find an IPv6 interface link-local address specific to an interface.
  */
 struct in6_ifaddr *
 in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
 {
 	struct ifaddr *ifa;
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
 			if ((((struct in6_ifaddr *)ifa)->ia6_flags &
 			     ignoreflags) != 0)
 				continue;
 			break;
 		}
 	}
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 
 /*
  * find the internet address corresponding to a given interface and address.
  */
 struct in6_ifaddr *
 in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
 {
 	struct ifaddr *ifa;
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
 			break;
 	}
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 /*
  * Convert IP6 address to printable (loggable) representation. Caller
  * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
  */
 static char digits[] = "0123456789abcdef";
 char *
 ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
 {
 	int i;
 	char *cp;
 	const u_int16_t *a = (const u_int16_t *)addr;
 	const u_int8_t *d;
 	int dcolon = 0, zero = 0;
 
 	cp = ip6buf;
 
 	for (i = 0; i < 8; i++) {
 		if (dcolon == 1) {
 			if (*a == 0) {
 				if (i == 7)
 					*cp++ = ':';
 				a++;
 				continue;
 			} else
 				dcolon = 2;
 		}
 		if (*a == 0) {
 			if (dcolon == 0 && *(a + 1) == 0) {
 				if (i == 0)
 					*cp++ = ':';
 				*cp++ = ':';
 				dcolon = 1;
 			} else {
 				*cp++ = '0';
 				*cp++ = ':';
 			}
 			a++;
 			continue;
 		}
 		d = (const u_char *)a;
 		/* Try to eliminate leading zeros in printout like in :0001. */
 		zero = 1;
 		*cp = digits[*d >> 4];
 		if (*cp != '0') {
 			zero = 0;
 			cp++;
 		}
 		*cp = digits[*d++ & 0xf];
 		if (zero == 0 || (*cp != '0')) {
 			zero = 0;
 			cp++;
 		}
 		*cp = digits[*d >> 4];
 		if (zero == 0 || (*cp != '0')) {
 			zero = 0;
 			cp++;
 		}
 		*cp++ = digits[*d & 0xf];
 		*cp++ = ':';
 		a++;
 	}
 	*--cp = '\0';
 	return (ip6buf);
 }
 
 int
 in6_localaddr(struct in6_addr *in6)
 {
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
 		return 1;
 
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
 		if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
 		    &ia->ia_prefixmask.sin6_addr)) {
 			return 1;
 		}
 	}
 
 	return (0);
 }
 
 int
 in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
 {
 	struct in6_ifaddr *ia;
 
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
 		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 				       &sa6->sin6_addr) &&
 		    (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0)
 			return (1); /* true */
 
 		/* XXX: do we still have to go thru the rest of the list? */
 	}
 
 	return (0);		/* false */
 }
 
 /*
  * return length of part which dst and src are equal
  * hard coding...
  */
 int
 in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
 {
 	int match = 0;
 	u_char *s = (u_char *)src, *d = (u_char *)dst;
 	u_char *lim = s + 16, r;
 
 	while (s < lim)
 		if ((r = (*d++ ^ *s++)) != 0) {
 			while (r < 128) {
 				match++;
 				r <<= 1;
 			}
 			break;
 		} else
 			match += 8;
 	return match;
 }
 
 /* XXX: to be scope conscious */
 int
 in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len)
 {
 	int bytelen, bitlen;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n",
 		    len);
 		return (0);
 	}
 
 	bytelen = len / 8;
 	bitlen = len % 8;
 
 	if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen))
 		return (0);
 	if (bitlen != 0 &&
 	    p1->s6_addr[bytelen] >> (8 - bitlen) !=
 	    p2->s6_addr[bytelen] >> (8 - bitlen))
 		return (0);
 
 	return (1);
 }
 
 void
 in6_prefixlen2mask(struct in6_addr *maskp, int len)
 {
 	u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
 	int bytelen, bitlen, i;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
 		    len);
 		return;
 	}
 
 	bzero(maskp, sizeof(*maskp));
 	bytelen = len / 8;
 	bitlen = len % 8;
 	for (i = 0; i < bytelen; i++)
 		maskp->s6_addr[i] = 0xff;
 	if (bitlen)
 		maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
 }
 
 /*
  * return the best address out of the same scope. if no address was
  * found, return the first valid address from designated IF.
  */
 struct in6_ifaddr *
 in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
 {
 	int dst_scope =	in6_addrscope(dst), blen = -1, tlen;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *besta = 0;
 	struct in6_ifaddr *dep[2];	/* last-resort: deprecated */
 
 	dep[0] = dep[1] = NULL;
 
 	/*
 	 * We first look for addresses in the same scope.
 	 * If there is one, return it.
 	 * If two or more, return one which matches the dst longest.
 	 * If none, return one of global addresses assigned other ifs.
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (ip6_use_deprecated)
 				dep[0] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		if (dst_scope == in6_addrscope(IFA_IN6(ifa))) {
 			/*
 			 * call in6_matchlen() as few as possible
 			 */
 			if (besta) {
 				if (blen == -1)
 					blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst);
 				tlen = in6_matchlen(IFA_IN6(ifa), dst);
 				if (tlen > blen) {
 					blen = tlen;
 					besta = (struct in6_ifaddr *)ifa;
 				}
 			} else
 				besta = (struct in6_ifaddr *)ifa;
 		}
 	}
 	if (besta)
 		return (besta);
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (ip6_use_deprecated)
 				dep[1] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		return (struct in6_ifaddr *)ifa;
 	}
 
 	/* use the last-resort values, that are, deprecated addresses */
 	if (dep[0])
 		return dep[0];
 	if (dep[1])
 		return dep[1];
 
 	return NULL;
 }
 
 /*
  * perform DAD when interface becomes IFF_UP.
  */
 void
 in6_if_up(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ia;
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ia = (struct in6_ifaddr *)ifa;
 		if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
 			/*
 			 * The TENTATIVE flag was likely set by hand
 			 * beforehand, implicitly indicating the need for DAD.
 			 * We may be able to skip the random delay in this
 			 * case, but we impose delays just in case.
 			 */
 			nd6_dad_start(ifa,
 			    arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz));
 		}
 	}
 
 	/*
 	 * special cases, like 6to4, are handled in in6_ifattach
 	 */
 	in6_ifattach(ifp, NULL);
 }
 
 int
 in6if_do_dad(struct ifnet *ifp)
 {
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		return (0);
 
 	switch (ifp->if_type) {
 #ifdef IFT_DUMMY
 	case IFT_DUMMY:
 #endif
 	case IFT_FAITH:
 		/*
 		 * These interfaces do not have the IFF_LOOPBACK flag,
 		 * but loop packets back.  We do not have to do DAD on such
 		 * interfaces.  We should even omit it, because loop-backed
 		 * NS would confuse the DAD procedure.
 		 */
 		return (0);
 	default:
 		/*
 		 * Our DAD routine requires the interface up and running.
 		 * However, some interfaces can be up before the RUNNING
 		 * status.  Additionaly, users may try to assign addresses
 		 * before the interface becomes up (or running).
 		 * We simply skip DAD in such a case as a work around.
 		 * XXX: we should rather mark "tentative" on such addresses,
 		 * and do DAD after the interface becomes ready.
 		 */
 		if (!((ifp->if_flags & IFF_UP) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 			return (0);
 
 		return (1);
 	}
 }
 
 /*
  * Calculate max IPv6 MTU through all the interfaces and store it
  * to in6_maxmtu.
  */
 void
 in6_setmaxmtu(void)
 {
 	unsigned long maxmtu = 0;
 	struct ifnet *ifp;
 
 	IFNET_RLOCK();
 	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
 		/* this function can be called during ifnet initialization */
 		if (!ifp->if_afdata[AF_INET6])
 			continue;
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
 		    IN6_LINKMTU(ifp) > maxmtu)
 			maxmtu = IN6_LINKMTU(ifp);
 	}
 	IFNET_RUNLOCK();
 	if (maxmtu)	     /* update only when maxmtu is positive */
 		in6_maxmtu = maxmtu;
 }
 
 /*
  * Provide the length of interface identifiers to be used for the link attached
  * to the given interface.  The length should be defined in "IPv6 over
  * xxx-link" document.  Note that address architecture might also define
  * the length for a particular set of address prefixes, regardless of the
  * link type.  As clarified in rfc2462bis, those two definitions should be
  * consistent, and those really are as of August 2004.
  */
 int
 in6_if2idlen(struct ifnet *ifp)
 {
 	switch (ifp->if_type) {
 	case IFT_ETHER:		/* RFC2464 */
 #ifdef IFT_PROPVIRTUAL
 	case IFT_PROPVIRTUAL:	/* XXX: no RFC. treat it as ether */
 #endif
 #ifdef IFT_L2VLAN
 	case IFT_L2VLAN:	/* ditto */
 #endif
 #ifdef IFT_IEEE80211
 	case IFT_IEEE80211:	/* ditto */
 #endif
 #ifdef IFT_MIP
 	case IFT_MIP:	/* ditto */
 #endif
 		return (64);
 	case IFT_FDDI:		/* RFC2467 */
 		return (64);
 	case IFT_ISO88025:	/* RFC2470 (IPv6 over Token Ring) */
 		return (64);
 	case IFT_PPP:		/* RFC2472 */
 		return (64);
 	case IFT_ARCNET:	/* RFC2497 */
 		return (64);
 	case IFT_FRELAY:	/* RFC2590 */
 		return (64);
 	case IFT_IEEE1394:	/* RFC3146 */
 		return (64);
 	case IFT_GIF:
 		return (64);	/* draft-ietf-v6ops-mech-v2-07 */
 	case IFT_LOOP:
 		return (64);	/* XXX: is this really correct? */
 	default:
 		/*
 		 * Unknown link type:
 		 * It might be controversial to use the today's common constant
 		 * of 64 for these cases unconditionally.  For full compliance,
 		 * we should return an error in this case.  On the other hand,
 		 * if we simply miss the standard for the link type or a new
 		 * standard is defined for a new link type, the IFID length
 		 * is very likely to be the common constant.  As a compromise,
 		 * we always use the constant, but make an explicit notice
 		 * indicating the "unknown" case.
 		 */
 		printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type);
 		return (64);
 	}
 }
 
 void *
 in6_domifattach(struct ifnet *ifp)
 {
 	struct in6_ifextra *ext;
 
 	ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
 	bzero(ext, sizeof(*ext));
 
 	ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat),
 	    M_IFADDR, M_WAITOK);
 	bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat));
 
 	ext->icmp6_ifstat =
 	    (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat),
 	    M_IFADDR, M_WAITOK);
 	bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat));
 
 	ext->nd_ifinfo = nd6_ifattach(ifp);
 	ext->scope6_id = scope6_ifattach(ifp);
 	return ext;
 }
 
 void
 in6_domifdetach(struct ifnet *ifp, void *aux)
 {
 	struct in6_ifextra *ext = (struct in6_ifextra *)aux;
 
 	scope6_ifdetach(ext->scope6_id);
 	nd6_ifdetach(ext->nd_ifinfo);
 	free(ext->in6_ifstat, M_IFADDR);
 	free(ext->icmp6_ifstat, M_IFADDR);
 	free(ext, M_IFADDR);
 }
 
 /*
  * Convert sockaddr_in6 to sockaddr_in.  Original sockaddr_in6 must be
  * v4 mapped addr or v4 compat addr
  */
 void
 in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 
 	bzero(sin, sizeof(*sin));
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_family = AF_INET;
 	sin->sin_port = sin6->sin6_port;
 	sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];
 }
 
 /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_len = sizeof(struct sockaddr_in6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_port = sin->sin_port;
 	sin6->sin6_addr.s6_addr32[0] = 0;
 	sin6->sin6_addr.s6_addr32[1] = 0;
 	sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
 	sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
 }
 
 /* Convert sockaddr_in6 into sockaddr_in. */
 void
 in6_sin6_2_sin_in_sock(struct sockaddr *nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 sin6;
 
 	/*
 	 * Save original sockaddr_in6 addr and convert it
 	 * to sockaddr_in.
 	 */
 	sin6 = *(struct sockaddr_in6 *)nam;
 	sin_p = (struct sockaddr_in *)nam;
 	in6_sin6_2_sin(sin_p, &sin6);
 }
 
 /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 *sin6_p;
 
 	MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME,
 	       M_WAITOK);
 	sin_p = (struct sockaddr_in *)*nam;
 	in6_sin_2_v4mapsin6(sin_p, sin6_p);
 	FREE(*nam, M_SONAME);
 	*nam = (struct sockaddr *)sin6_p;
 }
Index: head/sys/netinet6/in6_ifattach.c
===================================================================
--- head/sys/netinet6/in6_ifattach.c	(revision 178887)
+++ head/sys/netinet6/in6_ifattach.c	(revision 178888)
@@ -1,905 +1,905 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/md5.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_pcb.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/scope6_var.h>
 
 unsigned long in6_maxmtu = 0;
 
 #ifdef IP6_AUTO_LINKLOCAL
 int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
 #else
 int ip6_auto_linklocal = 1;	/* enable by default */
 #endif
 
 struct callout in6_tmpaddrtimer_ch;
 
 extern struct inpcbinfo udbinfo;
 extern struct inpcbinfo ripcbinfo;
 
 static int get_rand_ifid(struct ifnet *, struct in6_addr *);
 static int generate_tmp_ifid(u_int8_t *, const u_int8_t *, u_int8_t *);
 static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
 static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *);
 static int in6_ifattach_loopback(struct ifnet *);
 static void in6_purgemaddrs(struct ifnet *);
 
 #define EUI64_GBIT	0x01
 #define EUI64_UBIT	0x02
 #define EUI64_TO_IFID(in6)	do {(in6)->s6_addr[8] ^= EUI64_UBIT; } while (0)
 #define EUI64_GROUP(in6)	((in6)->s6_addr[8] & EUI64_GBIT)
 #define EUI64_INDIVIDUAL(in6)	(!EUI64_GROUP(in6))
 #define EUI64_LOCAL(in6)	((in6)->s6_addr[8] & EUI64_UBIT)
 #define EUI64_UNIVERSAL(in6)	(!EUI64_LOCAL(in6))
 
 #define IFID_LOCAL(in6)		(!EUI64_LOCAL(in6))
 #define IFID_UNIVERSAL(in6)	(!EUI64_UNIVERSAL(in6))
 
 /*
  * Generate a last-resort interface identifier, when the machine has no
  * IEEE802/EUI64 address sources.
  * The goal here is to get an interface identifier that is
  * (1) random enough and (2) does not change across reboot.
  * We currently use MD5(hostname) for it.
  *
  * in6 - upper 64bits are preserved
  */
 static int
 get_rand_ifid(struct ifnet *ifp, struct in6_addr *in6)
 {
 	MD5_CTX ctxt;
 	u_int8_t digest[16];
 	int hostnamelen	= strlen(hostname);
 
 #if 0
 	/* we need at least several letters as seed for ifid */
 	if (hostnamelen < 3)
 		return -1;
 #endif
 
 	/* generate 8 bytes of pseudo-random value. */
 	bzero(&ctxt, sizeof(ctxt));
 	MD5Init(&ctxt);
 	MD5Update(&ctxt, hostname, hostnamelen);
 	MD5Final(digest, &ctxt);
 
 	/* assumes sizeof(digest) > sizeof(ifid) */
 	bcopy(digest, &in6->s6_addr[8], 8);
 
 	/* make sure to set "u" bit to local, and "g" bit to individual. */
 	in6->s6_addr[8] &= ~EUI64_GBIT;	/* g bit to "individual" */
 	in6->s6_addr[8] |= EUI64_UBIT;	/* u bit to "local" */
 
 	/* convert EUI64 into IPv6 interface identifier */
 	EUI64_TO_IFID(in6);
 
 	return 0;
 }
 
 static int
 generate_tmp_ifid(u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret)
 {
 	MD5_CTX ctxt;
 	u_int8_t seed[16], digest[16], nullbuf[8];
 	u_int32_t val32;
 
 	/* If there's no history, start with a random seed. */
 	bzero(nullbuf, sizeof(nullbuf));
 	if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) {
 		int i;
 
 		for (i = 0; i < 2; i++) {
 			val32 = arc4random();
 			bcopy(&val32, seed + sizeof(val32) * i, sizeof(val32));
 		}
 	} else
 		bcopy(seed0, seed, 8);
 
 	/* copy the right-most 64-bits of the given address */
 	/* XXX assumption on the size of IFID */
 	bcopy(seed1, &seed[8], 8);
 
 	if (0) {		/* for debugging purposes only */
 		int i;
 
 		printf("generate_tmp_ifid: new randomized ID from: ");
 		for (i = 0; i < 16; i++)
 			printf("%02x", seed[i]);
 		printf(" ");
 	}
 
 	/* generate 16 bytes of pseudo-random value. */
 	bzero(&ctxt, sizeof(ctxt));
 	MD5Init(&ctxt);
 	MD5Update(&ctxt, seed, sizeof(seed));
 	MD5Final(digest, &ctxt);
 
 	/*
 	 * RFC 3041 3.2.1. (3)
 	 * Take the left-most 64-bits of the MD5 digest and set bit 6 (the
 	 * left-most bit is numbered 0) to zero.
 	 */
 	bcopy(digest, ret, 8);
 	ret[0] &= ~EUI64_UBIT;
 
 	/*
 	 * XXX: we'd like to ensure that the generated value is not zero
 	 * for simplicity.  If the caclculated digest happens to be zero,
 	 * use a random non-zero value as the last resort.
 	 */
 	if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) {
 		nd6log((LOG_INFO,
 		    "generate_tmp_ifid: computed MD5 value is zero.\n"));
 
 		val32 = arc4random();
 		val32 = 1 + (val32 % (0xffffffff - 1));
 	}
 
 	/*
 	 * RFC 3041 3.2.1. (4)
 	 * Take the rightmost 64-bits of the MD5 digest and save them in
 	 * stable storage as the history value to be used in the next
 	 * iteration of the algorithm.
 	 */
 	bcopy(&digest[8], seed0, 8);
 
 	if (0) {		/* for debugging purposes only */
 		int i;
 
 		printf("to: ");
 		for (i = 0; i < 16; i++)
 			printf("%02x", digest[i]);
 		printf("\n");
 	}
 
 	return 0;
 }
 
 /*
  * Get interface identifier for the specified interface.
  * XXX assumes single sockaddr_dl (AF_LINK address) per an interface
  *
  * in6 - upper 64bits are preserved
  */
 int
 in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6)
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	u_int8_t *addr;
 	size_t addrlen;
 	static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 	static u_int8_t allone[8] =
 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 	for (ifa = ifp->if_addrlist.tqh_first;
 	     ifa;
 	     ifa = ifa->ifa_list.tqe_next) {
 		if (ifa->ifa_addr->sa_family != AF_LINK)
 			continue;
 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 		if (sdl == NULL)
 			continue;
 		if (sdl->sdl_alen == 0)
 			continue;
 
 		goto found;
 	}
 
 	return -1;
 
 found:
 	addr = LLADDR(sdl);
 	addrlen = sdl->sdl_alen;
 
 	/* get EUI64 */
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_FDDI:
 	case IFT_ISO88025:
 	case IFT_ATM:
 	case IFT_IEEE1394:
 #ifdef IFT_IEEE80211
 	case IFT_IEEE80211:
 #endif
 		/* IEEE802/EUI64 cases - what others? */
 		/* IEEE1394 uses 16byte length address starting with EUI64 */
 		if (addrlen > 8)
 			addrlen = 8;
 
 		/* look at IEEE802/EUI64 only */
 		if (addrlen != 8 && addrlen != 6)
 			return -1;
 
 		/*
 		 * check for invalid MAC address - on bsdi, we see it a lot
 		 * since wildboar configures all-zero MAC on pccard before
 		 * card insertion.
 		 */
 		if (bcmp(addr, allzero, addrlen) == 0)
 			return -1;
 		if (bcmp(addr, allone, addrlen) == 0)
 			return -1;
 
 		/* make EUI64 address */
 		if (addrlen == 8)
 			bcopy(addr, &in6->s6_addr[8], 8);
 		else if (addrlen == 6) {
 			in6->s6_addr[8] = addr[0];
 			in6->s6_addr[9] = addr[1];
 			in6->s6_addr[10] = addr[2];
 			in6->s6_addr[11] = 0xff;
 			in6->s6_addr[12] = 0xfe;
 			in6->s6_addr[13] = addr[3];
 			in6->s6_addr[14] = addr[4];
 			in6->s6_addr[15] = addr[5];
 		}
 		break;
 
 	case IFT_ARCNET:
 		if (addrlen != 1)
 			return -1;
 		if (!addr[0])
 			return -1;
 
 		bzero(&in6->s6_addr[8], 8);
 		in6->s6_addr[15] = addr[0];
 
 		/*
 		 * due to insufficient bitwidth, we mark it local.
 		 */
 		in6->s6_addr[8] &= ~EUI64_GBIT;	/* g bit to "individual" */
 		in6->s6_addr[8] |= EUI64_UBIT;	/* u bit to "local" */
 		break;
 
 	case IFT_GIF:
 #ifdef IFT_STF
 	case IFT_STF:
 #endif
 		/*
 		 * RFC2893 says: "SHOULD use IPv4 address as ifid source".
 		 * however, IPv4 address is not very suitable as unique
 		 * identifier source (can be renumbered).
 		 * we don't do this.
 		 */
 		return -1;
 
 	default:
 		return -1;
 	}
 
 	/* sanity check: g bit must not indicate "group" */
 	if (EUI64_GROUP(in6))
 		return -1;
 
 	/* convert EUI64 into IPv6 interface identifier */
 	EUI64_TO_IFID(in6);
 
 	/*
 	 * sanity check: ifid must not be all zero, avoid conflict with
 	 * subnet router anycast
 	 */
 	if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 &&
 	    bcmp(&in6->s6_addr[9], allzero, 7) == 0) {
 		return -1;
 	}
 
 	return 0;
 }
 
 /*
  * Get interface identifier for the specified interface.  If it is not
  * available on ifp0, borrow interface identifier from other information
  * sources.
  *
  * altifp - secondary EUI64 source
  */
 static int
 get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
     struct in6_addr *in6)
 {
 	struct ifnet *ifp;
 
 	/* first, try to get it from the interface itself */
 	if (in6_get_hw_ifid(ifp0, in6) == 0) {
 		nd6log((LOG_DEBUG, "%s: got interface identifier from itself\n",
 		    if_name(ifp0)));
 		goto success;
 	}
 
 	/* try secondary EUI64 source. this basically is for ATM PVC */
 	if (altifp && in6_get_hw_ifid(altifp, in6) == 0) {
 		nd6log((LOG_DEBUG, "%s: got interface identifier from %s\n",
 		    if_name(ifp0), if_name(altifp)));
 		goto success;
 	}
 
 	/* next, try to get it from some other hardware interface */
 	IFNET_RLOCK();
 	for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) {
 		if (ifp == ifp0)
 			continue;
 		if (in6_get_hw_ifid(ifp, in6) != 0)
 			continue;
 
 		/*
 		 * to borrow ifid from other interface, ifid needs to be
 		 * globally unique
 		 */
 		if (IFID_UNIVERSAL(in6)) {
 			nd6log((LOG_DEBUG,
 			    "%s: borrow interface identifier from %s\n",
 			    if_name(ifp0), if_name(ifp)));
 			IFNET_RUNLOCK();
 			goto success;
 		}
 	}
 	IFNET_RUNLOCK();
 
 	/* last resort: get from random number source */
 	if (get_rand_ifid(ifp, in6) == 0) {
 		nd6log((LOG_DEBUG,
 		    "%s: interface identifier generated by random number\n",
 		    if_name(ifp0)));
 		goto success;
 	}
 
 	printf("%s: failed to get interface identifier\n", if_name(ifp0));
 	return -1;
 
 success:
 	nd6log((LOG_INFO, "%s: ifid: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
 	    if_name(ifp0), in6->s6_addr[8], in6->s6_addr[9], in6->s6_addr[10],
 	    in6->s6_addr[11], in6->s6_addr[12], in6->s6_addr[13],
 	    in6->s6_addr[14], in6->s6_addr[15]));
 	return 0;
 }
 
 /*
  * altifp - secondary EUI64 source
  */
 static int
 in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
 {
 	struct in6_ifaddr *ia;
 	struct in6_aliasreq ifra;
 	struct nd_prefixctl pr0;
 	int i, error;
 
 	/*
 	 * configure link-local address.
 	 */
 	bzero(&ifra, sizeof(ifra));
 
 	/*
 	 * in6_update_ifa() does not use ifra_name, but we accurately set it
 	 * for safety.
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 
 	ifra.ifra_addr.sin6_family = AF_INET6;
 	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_addr.sin6_addr.s6_addr32[0] = htonl(0xfe800000);
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0;
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 		ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0;
 		ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1);
 	} else {
 		if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) {
 			nd6log((LOG_ERR,
 			    "%s: no ifid available\n", if_name(ifp)));
 			return (-1);
 		}
 	}
 	if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL))
 		return (-1);
 
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	ifra.ifra_prefixmask.sin6_addr = in6mask64;
 	/* link-local addresses should NEVER expire. */
 	ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
 	ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
 
 	/*
 	 * Now call in6_update_ifa() to do a bunch of procedures to configure
 	 * a link-local address. We can set the 3rd argument to NULL, because
 	 * we know there's no other link-local address on the interface
 	 * and therefore we are adding one (instead of updating one).
 	 */
 	if ((error = in6_update_ifa(ifp, &ifra, NULL,
 				    IN6_IFAUPDATE_DADDELAY)) != 0) {
 		/*
 		 * XXX: When the interface does not support IPv6, this call
 		 * would fail in the SIOCSIFADDR ioctl.  I believe the
 		 * notification is rather confusing in this case, so just
 		 * suppress it.  (jinmei@kame.net 20010130)
 		 */
 		if (error != EAFNOSUPPORT)
 			nd6log((LOG_NOTICE, "in6_ifattach_linklocal: failed to "
 			    "configure a link-local address on %s "
 			    "(errno=%d)\n",
 			    if_name(ifp), error));
 		return (-1);
 	}
 
 	ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */
 #ifdef DIAGNOSTIC
 	if (!ia) {
 		panic("ia == NULL in in6_ifattach_linklocal");
 		/* NOTREACHED */
 	}
 #endif
 
 	/*
 	 * Make the link-local prefix (fe80::%link/64) as on-link.
 	 * Since we'd like to manage prefixes separately from addresses,
 	 * we make an ND6 prefix structure for the link-local prefix,
 	 * and add it to the prefix list as a never-expire prefix.
 	 * XXX: this change might affect some existing code base...
 	 */
 	bzero(&pr0, sizeof(pr0));
 	pr0.ndpr_ifp = ifp;
 	/* this should be 64 at this moment. */
 	pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL);
 	pr0.ndpr_prefix = ifra.ifra_addr;
 	/* apply the mask for safety. (nd6_prelist_add will apply it again) */
 	for (i = 0; i < 4; i++) {
 		pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
 		    in6mask64.s6_addr32[i];
 	}
 	/*
 	 * Initialize parameters.  The link-local prefix must always be
 	 * on-link, and its lifetimes never expire.
 	 */
 	pr0.ndpr_raf_onlink = 1;
 	pr0.ndpr_raf_auto = 1;	/* probably meaningless */
 	pr0.ndpr_vltime = ND6_INFINITE_LIFETIME;
 	pr0.ndpr_pltime = ND6_INFINITE_LIFETIME;
 	/*
 	 * Since there is no other link-local addresses, nd6_prefix_lookup()
 	 * probably returns NULL.  However, we cannot always expect the result.
 	 * For example, if we first remove the (only) existing link-local
 	 * address, and then reconfigure another one, the prefix is still
 	 * valid with referring to the old link-local address.
 	 */
 	if (nd6_prefix_lookup(&pr0) == NULL) {
 		if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0)
 			return (error);
 	}
 
 	return 0;
 }
 
 /*
  * ifp - must be IFT_LOOP
  */
 static int
 in6_ifattach_loopback(struct ifnet *ifp)
 {
 	struct in6_aliasreq ifra;
 	int error;
 
 	bzero(&ifra, sizeof(ifra));
 
 	/*
 	 * in6_update_ifa() does not use ifra_name, but we accurately set it
 	 * for safety.
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	ifra.ifra_prefixmask.sin6_addr = in6mask128;
 
 	/*
 	 * Always initialize ia_dstaddr (= broadcast address) to loopback
 	 * address.  Follows IPv4 practice - see in_ifinit().
 	 */
 	ifra.ifra_dstaddr.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_dstaddr.sin6_family = AF_INET6;
 	ifra.ifra_dstaddr.sin6_addr = in6addr_loopback;
 
 	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_addr.sin6_family = AF_INET6;
 	ifra.ifra_addr.sin6_addr = in6addr_loopback;
 
 	/* the loopback  address should NEVER expire. */
 	ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
 	ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
 
 	/* we don't need to perform DAD on loopback interfaces. */
 	ifra.ifra_flags |= IN6_IFF_NODAD;
 
 	/* skip registration to the prefix list. XXX should be temporary. */
 	ifra.ifra_flags |= IN6_IFF_NOPFX;
 
 	/*
 	 * We are sure that this is a newly assigned address, so we can set
 	 * NULL to the 3rd arg.
 	 */
 	if ((error = in6_update_ifa(ifp, &ifra, NULL, 0)) != 0) {
 		nd6log((LOG_ERR, "in6_ifattach_loopback: failed to configure "
 		    "the loopback address on %s (errno=%d)\n",
 		    if_name(ifp), error));
 		return (-1);
 	}
 
 	return 0;
 }
 
 /*
  * compute NI group address, based on the current hostname setting.
  * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later).
  *
  * when ifp == NULL, the caller is responsible for filling scopeid.
  */
 int
 in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
     struct in6_addr *in6)
 {
 	const char *p;
 	u_char *q;
 	MD5_CTX ctxt;
 	u_int8_t digest[16];
 	char l;
 	char n[64];	/* a single label must not exceed 63 chars */
 
 	if (!namelen || !name)
 		return -1;
 
 	p = name;
 	while (p && *p && *p != '.' && p - name < namelen)
 		p++;
 	if (p - name > sizeof(n) - 1)
 		return -1;	/* label too long */
 	l = p - name;
 	strncpy(n, name, l);
 	n[(int)l] = '\0';
 	for (q = n; *q; q++) {
 		if ('A' <= *q && *q <= 'Z')
 			*q = *q - 'A' + 'a';
 	}
 
 	/* generate 8 bytes of pseudo-random value. */
 	bzero(&ctxt, sizeof(ctxt));
 	MD5Init(&ctxt);
 	MD5Update(&ctxt, &l, sizeof(l));
 	MD5Update(&ctxt, n, l);
 	MD5Final(digest, &ctxt);
 
 	bzero(in6, sizeof(*in6));
 	in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL;
 	in6->s6_addr8[11] = 2;
 	bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
 	if (in6_setscope(in6, ifp, NULL))
 		return (-1); /* XXX: should not fail */
 
 	return 0;
 }
 
 /*
  * XXX multiple loopback interface needs more care.  for instance,
  * nodelocal address needs to be configured onto only one of them.
  * XXX multiple link-local address case
  *
  * altifp - secondary EUI64 source
  */
 void
 in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
 {
 	struct in6_ifaddr *ia;
 	struct in6_addr in6;
 
 	/* some of the interfaces are inherently not IPv6 capable */
 	switch (ifp->if_type) {
 	case IFT_PFLOG:
 	case IFT_PFSYNC:
 	case IFT_CARP:
 		return;
 	}
 
 	/*
 	 * quirks based on interface type
 	 */
 	switch (ifp->if_type) {
 #ifdef IFT_STF
 	case IFT_STF:
 		/*
 		 * 6to4 interface is a very special kind of beast.
 		 * no multicast, no linklocal.  RFC2529 specifies how to make
 		 * linklocals for 6to4 interface, but there's no use and
 		 * it is rather harmful to have one.
 		 */
 		goto statinit;
 #endif
 	default:
 		break;
 	}
 
 	/*
 	 * usually, we require multicast capability to the interface
 	 */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 		nd6log((LOG_INFO, "in6_ifattach: "
 		    "%s is not multicast capable, IPv6 not enabled\n",
 		    if_name(ifp)));
 		return;
 	}
 
 	/*
 	 * assign loopback address for loopback interface.
 	 * XXX multiple loopback interface case.
 	 */
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 		in6 = in6addr_loopback;
 		if (in6ifa_ifpwithaddr(ifp, &in6) == NULL) {
 			if (in6_ifattach_loopback(ifp) != 0)
 				return;
 		}
 	}
 
 	/*
 	 * assign a link-local address, if there's none.
 	 */
 	if (ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) {
 		ia = in6ifa_ifpforlinklocal(ifp, 0);
 		if (ia == NULL) {
 			if (in6_ifattach_linklocal(ifp, altifp) == 0) {
 				/* linklocal address assigned */
 			} else {
 				/* failed to assign linklocal address. bark? */
 			}
 		}
 	}
 
 #ifdef IFT_STF			/* XXX */
 statinit:
 #endif
 
 	/* update dynamically. */
 	if (in6_maxmtu < ifp->if_mtu)
 		in6_maxmtu = ifp->if_mtu;
 }
 
 /*
  * NOTE: in6_ifdetach() does not support loopback if at this moment.
  * We don't need this function in bsdi, because interfaces are never removed
  * from the ifnet list in bsdi.
  */
 void
 in6_ifdetach(struct ifnet *ifp)
 {
 	struct in6_ifaddr *ia, *oia;
 	struct ifaddr *ifa, *next;
 	struct rtentry *rt;
 	short rtflags;
 	struct sockaddr_in6 sin6;
 	struct in6_multi_mship *imm;
 
 	/* remove neighbor management table */
 	nd6_purge(ifp);
 
 	/* nuke any of IPv6 addresses we have */
 	for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next) {
 		next = ifa->ifa_list.tqe_next;
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		in6_purgeaddr(ifa);
 	}
 
 	/* undo everything done by in6_ifattach(), just in case */
 	for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next) {
 		next = ifa->ifa_list.tqe_next;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6
 		 || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) {
 			continue;
 		}
 
 		ia = (struct in6_ifaddr *)ifa;
 
 		/*
 		 * leave from multicast groups we have joined for the interface
 		 */
 		while ((imm = ia->ia6_memberships.lh_first) != NULL) {
 			LIST_REMOVE(imm, i6mm_chain);
 			in6_leavegroup(imm);
 		}
 
 		/* remove from the routing table */
 		if ((ia->ia_flags & IFA_ROUTE) &&
 		    (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) {
 			rtflags = rt->rt_flags;
 			rtfree(rt);
 			rtrequest(RTM_DELETE, (struct sockaddr *)&ia->ia_addr,
 			    (struct sockaddr *)&ia->ia_addr,
 			    (struct sockaddr *)&ia->ia_prefixmask,
 			    rtflags, (struct rtentry **)0);
 		}
 
 		/* remove from the linked list */
 		TAILQ_REMOVE(&ifp->if_addrlist, (struct ifaddr *)ia, ifa_list);
 		IFAFREE(&ia->ia_ifa);
 
 		/* also remove from the IPv6 address chain(itojun&jinmei) */
 		oia = ia;
 		if (oia == (ia = in6_ifaddr))
 			in6_ifaddr = ia->ia_next;
 		else {
 			while (ia->ia_next && (ia->ia_next != oia))
 				ia = ia->ia_next;
 			if (ia->ia_next)
 				ia->ia_next = oia->ia_next;
 			else {
 				nd6log((LOG_ERR,
 				    "%s: didn't unlink in6ifaddr from list\n",
 				    if_name(ifp)));
 			}
 		}
 
 		IFAFREE(&oia->ia_ifa);
 	}
 
 	in6_pcbpurgeif0(&udbinfo, ifp);
 	in6_pcbpurgeif0(&ripcbinfo, ifp);
 	/* leave from all multicast groups joined */
 	in6_purgemaddrs(ifp);
 
 	/*
 	 * remove neighbor management table.  we call it twice just to make
 	 * sure we nuke everything.  maybe we need just one call.
 	 * XXX: since the first call did not release addresses, some prefixes
 	 * might remain.  We should call nd6_purge() again to release the
 	 * prefixes after removing all addresses above.
 	 * (Or can we just delay calling nd6_purge until at this point?)
 	 */
 	nd6_purge(ifp);
 
 	/* remove route to link-local allnodes multicast (ff02::1) */
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = in6addr_linklocal_allnodes;
 	if (in6_setscope(&sin6.sin6_addr, ifp, NULL))
 		/* XXX: should not fail */
 		return;
 	/* XXX grab lock first to avoid LOR */
-	if (rt_tables[AF_INET6] != NULL) {
-		RADIX_NODE_HEAD_LOCK(rt_tables[AF_INET6]);
+	if (rt_tables[0][AF_INET6] != NULL) {
+		RADIX_NODE_HEAD_LOCK(rt_tables[0][AF_INET6]);
 		rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
 		if (rt) {
 			if (rt->rt_ifp == ifp)
 				rtexpunge(rt);
 			RTFREE_LOCKED(rt);
 		}
-		RADIX_NODE_HEAD_UNLOCK(rt_tables[AF_INET6]);
+		RADIX_NODE_HEAD_UNLOCK(rt_tables[0][AF_INET6]);
 	}
 }
 
 int
 in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf,
     const u_int8_t *baseid, int generate)
 {
 	u_int8_t nullbuf[8];
 	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 
 	bzero(nullbuf, sizeof(nullbuf));
 	if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) {
 		/* we've never created a random ID.  Create a new one. */
 		generate = 1;
 	}
 
 	if (generate) {
 		bcopy(baseid, ndi->randomseed1, sizeof(ndi->randomseed1));
 
 		/* generate_tmp_ifid will update seedn and buf */
 		(void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1,
 		    ndi->randomid);
 	}
 	bcopy(ndi->randomid, retbuf, 8);
 
 	return (0);
 }
 
 void
 in6_tmpaddrtimer(void *ignored_arg)
 {
 	struct nd_ifinfo *ndi;
 	u_int8_t nullbuf[8];
 	struct ifnet *ifp;
 	int s = splnet();
 
 	callout_reset(&in6_tmpaddrtimer_ch,
 	    (ip6_temp_preferred_lifetime - ip6_desync_factor -
 	    ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, NULL);
 
 	bzero(nullbuf, sizeof(nullbuf));
 	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
 		ndi = ND_IFINFO(ifp);
 		if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
 			/*
 			 * We've been generating a random ID on this interface.
 			 * Create a new one.
 			 */
 			(void)generate_tmp_ifid(ndi->randomseed0,
 			    ndi->randomseed1, ndi->randomid);
 		}
 	}
 
 	splx(s);
 }
 
 static void
 in6_purgemaddrs(struct ifnet *ifp)
 {
 	struct in6_multi *in6m;
 	struct in6_multi *oin6m;
 
 #ifdef DIAGNOSTIC
 	printf("%s: purging ifp %p\n", __func__, ifp);
 #endif
 
 	IFF_LOCKGIANT(ifp);
 	LIST_FOREACH_SAFE(in6m, &in6_multihead, in6m_entry, oin6m) {
 		if (in6m->in6m_ifp == ifp)
 			in6_delmulti(in6m);
 	}
 	IFF_UNLOCKGIANT(ifp);
 }
Index: head/sys/netinet6/in6_rmx.c
===================================================================
--- head/sys/netinet6/in6_rmx.c	(revision 178887)
+++ head/sys/netinet6/in6_rmx.c	(revision 178888)
@@ -1,473 +1,477 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $
  */
 
 /*-
  * Copyright 1994, 1995 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  *
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 /*
  * This code does two things necessary for the enhanced TCP metrics to
  * function in a useful manner:
  *  1) It marks all non-host routes as `cloning', thus ensuring that
  *     every actual reference to such a route actually gets turned
  *     into a reference to a host route to the specific destination
  *     requested.
  *  2) When such routes lose all their references, it arranges for them
  *     to be deleted in some random collection of circumstances, so that
  *     a large quantity of stale routing data is not kept in kernel memory
  *     indefinitely.  See in6_rtqtimo() below for the exact mechanism.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/callout.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 
 #include <netinet/tcp.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 
 extern int	in6_inithead(void **head, int off);
 
 #define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
 
 /*
  * Do what we need to do when inserting a route.
  */
 static struct radix_node *
 in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
     struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
 	struct radix_node *ret;
 
 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 		rt->rt_flags |= RTF_MULTICAST;
 
 	/*
 	 * A little bit of help for both IPv6 output and input:
 	 *   For local addresses, we make sure that RTF_LOCAL is set,
 	 *   with the thought that this might one day be used to speed up
 	 *   ip_input().
 	 *
 	 * We also mark routes to multicast addresses as such, because
 	 * it's easy to do and might be useful (but this is much more
 	 * dubious since it's so easy to inspect the address).  (This
 	 * is done above.)
 	 *
 	 * XXX
 	 * should elaborate the code.
 	 */
 	if (rt->rt_flags & RTF_HOST) {
 		if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
 					->sin6_addr,
 				       &sin6->sin6_addr)) {
 			rt->rt_flags |= RTF_LOCAL;
 		}
 	}
 
 	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
 		rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
 	if (ret == NULL && rt->rt_flags & RTF_HOST) {
 		struct rtentry *rt2;
 		/*
 		 * We are trying to add a host route, but can't.
 		 * Find out if it is because of an
 		 * ARP entry and delete it if so.
 		 */
 		rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_CLONING);
 		if (rt2) {
 			if (rt2->rt_flags & RTF_LLINFO &&
 				rt2->rt_flags & RTF_HOST &&
 				rt2->rt_gateway &&
 				rt2->rt_gateway->sa_family == AF_LINK) {
 				rtexpunge(rt2);
 				RTFREE_LOCKED(rt2);
 				ret = rn_addroute(v_arg, n_arg, head,
 					treenodes);
 			} else
 				RTFREE_LOCKED(rt2);
 		}
 	} else if (ret == NULL && rt->rt_flags & RTF_CLONING) {
 		struct rtentry *rt2;
 		/*
 		 * We are trying to add a net route, but can't.
 		 * The following case should be allowed, so we'll make a
 		 * special check for this:
 		 *	Two IPv6 addresses with the same prefix is assigned
 		 *	to a single interrface.
 		 *	# ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
 		 *	# ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
 		 *	In this case, (*1) and (*2) want to add the same
 		 *	net route entry, 3ffe:0501:: -> if0.
 		 *	This case should not raise an error.
 		 */
 		rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_CLONING);
 		if (rt2) {
 			if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY))
 					== RTF_CLONING
 			 && rt2->rt_gateway
 			 && rt2->rt_gateway->sa_family == AF_LINK
 			 && rt2->rt_ifp == rt->rt_ifp) {
 				ret = rt2->rt_nodes;
 			}
 			RTFREE_LOCKED(rt2);
 		}
 	}
 	return ret;
 }
 
 /*
  * This code is the inverse of in6_clsroute: on first reference, if we
  * were managing the route, stop doing so and set the expiration timer
  * back off again.
  */
 static struct radix_node *
 in6_matroute(void *v_arg, struct radix_node_head *head)
 {
 	struct radix_node *rn = rn_match(v_arg, head);
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	if (rt && rt->rt_refcnt == 0) { /* this is first reference */
 		if (rt->rt_flags & RTPRF_OURS) {
 			rt->rt_flags &= ~RTPRF_OURS;
 			rt->rt_rmx.rmx_expire = 0;
 		}
 	}
 	return rn;
 }
 
 SYSCTL_DECL(_net_inet6_ip6);
 
 static int rtq_reallyold = 60*60;
 	/* one hour is ``really old'' */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire,
 	CTLFLAG_RW, &rtq_reallyold , 0, "");
 
 static int rtq_minreallyold = 10;
 	/* never automatically crank down to less */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire,
 	CTLFLAG_RW, &rtq_minreallyold , 0, "");
 
 static int rtq_toomany = 128;
 	/* 128 cached routes is ``too many'' */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache,
 	CTLFLAG_RW, &rtq_toomany , 0, "");
 
 
 /*
  * On last reference drop, mark the route as belong to us so that it can be
  * timed out.
  */
 static void
 in6_clsroute(struct radix_node *rn, struct radix_node_head *head)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 
 	RT_LOCK_ASSERT(rt);
 
 	if (!(rt->rt_flags & RTF_UP))
 		return;		/* prophylactic measures */
 
 	if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
 		return;
 
 	if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED)
 		return;
 
 	/*
 	 * As requested by David Greenman:
 	 * If rtq_reallyold is 0, just delete the route without
 	 * waiting for a timeout cycle to kill it.
 	 */
 	if (rtq_reallyold != 0) {
 		rt->rt_flags |= RTPRF_OURS;
 		rt->rt_rmx.rmx_expire = time_uptime + rtq_reallyold;
 	} else {
 		rtexpunge(rt);
 	}
 }
 
 struct rtqk_arg {
 	struct radix_node_head *rnh;
 	int mode;
 	int updating;
 	int draining;
 	int killed;
 	int found;
 	time_t nextstop;
 };
 
 /*
  * Get rid of old routes.  When draining, this deletes everything, even when
  * the timeout is not expired yet.  When updating, this makes sure that
  * nothing has a timeout longer than the current value of rtq_reallyold.
  */
 static int
 in6_rtqkill(struct radix_node *rn, void *rock)
 {
 	struct rtqk_arg *ap = rock;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int err;
 
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
 		if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
 			err = rtrequest(RTM_DELETE,
 					(struct sockaddr *)rt_key(rt),
 					rt->rt_gateway, rt_mask(rt),
 					rt->rt_flags, 0);
 			if (err) {
 				log(LOG_WARNING, "in6_rtqkill: error %d", err);
 			} else {
 				ap->killed++;
 			}
 		} else {
 			if (ap->updating
 			   && (rt->rt_rmx.rmx_expire - time_uptime
 			       > rtq_reallyold)) {
 				rt->rt_rmx.rmx_expire = time_uptime
 					+ rtq_reallyold;
 			}
 			ap->nextstop = lmin(ap->nextstop,
 					    rt->rt_rmx.rmx_expire);
 		}
 	}
 
 	return 0;
 }
 
 #define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
 static int rtq_timeout = RTQ_TIMEOUT;
 static struct callout rtq_timer;
 
 static void
 in6_rtqtimo(void *rock)
 {
 	struct radix_node_head *rnh = rock;
 	struct rtqk_arg arg;
 	struct timeval atv;
 	static time_t last_adjusted_timeout = 0;
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
 	arg.nextstop = time_uptime + rtq_timeout;
 	arg.draining = arg.updating = 0;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 
 	/*
 	 * Attempt to be somewhat dynamic about this:
 	 * If there are ``too many'' routes sitting around taking up space,
 	 * then crank down the timeout, and see if we can't make some more
 	 * go away.  However, we make sure that we will never adjust more
 	 * than once in rtq_timeout seconds, to keep from cranking down too
 	 * hard.
 	 */
 	if ((arg.found - arg.killed > rtq_toomany)
 	   && (time_uptime - last_adjusted_timeout >= rtq_timeout)
 	   && rtq_reallyold > rtq_minreallyold) {
 		rtq_reallyold = 2*rtq_reallyold / 3;
 		if (rtq_reallyold < rtq_minreallyold) {
 			rtq_reallyold = rtq_minreallyold;
 		}
 
 		last_adjusted_timeout = time_uptime;
 #ifdef DIAGNOSTIC
 		log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d",
 		    rtq_reallyold);
 #endif
 		arg.found = arg.killed = 0;
 		arg.updating = 1;
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	}
 
 	atv.tv_usec = 0;
 	atv.tv_sec = arg.nextstop - time_uptime;
 	callout_reset(&rtq_timer, tvtohz(&atv), in6_rtqtimo, rock);
 }
 
 /*
  * Age old PMTUs.
  */
 struct mtuex_arg {
 	struct radix_node_head *rnh;
 	time_t nextstop;
 };
 static struct callout rtq_mtutimer;
 
 static int
 in6_mtuexpire(struct radix_node *rn, void *rock)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct mtuex_arg *ap = rock;
 
 	/* sanity */
 	if (!rt)
 		panic("rt == NULL in in6_mtuexpire");
 
 	if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
 		if (rt->rt_rmx.rmx_expire <= time_uptime) {
 			rt->rt_flags |= RTF_PROBEMTU;
 		} else {
 			ap->nextstop = lmin(ap->nextstop,
 					rt->rt_rmx.rmx_expire);
 		}
 	}
 
 	return 0;
 }
 
 #define	MTUTIMO_DEFAULT	(60*1)
 
 static void
 in6_mtutimo(void *rock)
 {
 	struct radix_node_head *rnh = rock;
 	struct mtuex_arg arg;
 	struct timeval atv;
 
 	arg.rnh = rnh;
 	arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 
 	atv.tv_usec = 0;
 	atv.tv_sec = arg.nextstop - time_uptime;
 	if (atv.tv_sec < 0) {
 		printf("invalid mtu expiration time on routing table\n");
 		arg.nextstop = time_uptime + 30;	/* last resort */
 		atv.tv_sec = 30;
 	}
 	callout_reset(&rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock);
 }
 
 #if 0
 void
 in6_rtqdrain(void)
 {
 	struct radix_node_head *rnh = rt_tables[AF_INET6];
 	struct rtqk_arg arg;
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
 	arg.nextstop = 0;
 	arg.draining = 1;
 	arg.updating = 0;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 }
 #endif
 
 /*
  * Initialize our routing tree.
+ * XXX MRT When off == 0, we are being called from vfs_export.c
+ * so just set up their table and leave. (we know what the correct
+ * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd
+ * see also comments in in_inithead() vfs_export.c and domain.h
  */
 int
 in6_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
-	if (!rn_inithead(head, off))
-		return 0;
+	if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3))
+		return 0;		/* See above */
 
-	if (head != (void **)&rt_tables[AF_INET6]) /* BOGUS! */
-		return 1;	/* only do this for the real routing table */
+	if (off == 0)		/* See above */
+		return 1;	/* only do the rest for the real thing */
 
 	rnh = *head;
 	rnh->rnh_addaddr = in6_addroute;
 	rnh->rnh_matchaddr = in6_matroute;
 	rnh->rnh_close = in6_clsroute;
 	callout_init(&rtq_timer, CALLOUT_MPSAFE);
 	in6_rtqtimo(rnh);	/* kick off timeout first time */
 	callout_init(&rtq_mtutimer, CALLOUT_MPSAFE);
 	in6_mtutimo(rnh);	/* kick off timeout first time */
 	return 1;
 }
Index: head/sys/netinet6/nd6_rtr.c
===================================================================
--- head/sys/netinet6/nd6_rtr.c	(revision 178887)
+++ head/sys/netinet6/nd6_rtr.c	(revision 178888)
@@ -1,2097 +1,2098 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/radix.h>
 
 #include <netinet/in.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/scope6_var.h>
 
 #define SDL(s)	((struct sockaddr_dl *)s)
 
 static int rtpref(struct nd_defrouter *);
 static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
 static int prelist_update __P((struct nd_prefixctl *, struct nd_defrouter *,
     struct mbuf *, int));
 static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *,	int);
 static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *,
 	struct nd_defrouter *));
 static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
 static void pfxrtr_del(struct nd_pfxrouter *);
 static struct nd_pfxrouter *find_pfxlist_reachable_router
 (struct nd_prefix *);
 static void defrouter_delreq(struct nd_defrouter *);
 static void nd6_rtmsg(int, struct rtentry *);
 
 static int in6_init_prefix_ltimes(struct nd_prefix *);
 static void in6_init_address_ltimes __P((struct nd_prefix *,
 	struct in6_addrlifetime *));
 
 static int rt6_deleteroute(struct radix_node *, void *);
 
 extern int nd6_recalc_reachtm_interval;
 
 static struct ifnet *nd6_defifp;
 int nd6_defifindex;
 
 int ip6_use_tempaddr = 0;
 
 int ip6_desync_factor;
 u_int32_t ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME;
 u_int32_t ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME;
 /*
  * shorter lifetimes for debugging purposes.
 int ip6_temp_preferred_lifetime = 800;
 static int ip6_temp_valid_lifetime = 1800;
 */
 int ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE;
 
 /* RTPREF_MEDIUM has to be 0! */
 #define RTPREF_HIGH	1
 #define RTPREF_MEDIUM	0
 #define RTPREF_LOW	(-1)
 #define RTPREF_RESERVED	(-2)
 #define RTPREF_INVALID	(-3)	/* internal */
 
 /*
  * Receive Router Solicitation Message - just for routers.
  * Router solicitation/advertisement is mostly managed by userland program
  * (rtadvd) so here we have no function like nd6_ra_output().
  *
  * Based on RFC 2461
  */
 void
 nd6_rs_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_solicit *nd_rs;
 	struct in6_addr saddr6 = ip6->ip6_src;
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	union nd_opts ndopts;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/* If I'm not a router, ignore it. */
 	if (ip6_accept_rtadv != 0 || ip6_forwarding != 1)
 		goto freeit;
 
 	/* Sanity checks */
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	/*
 	 * Don't update the neighbor cache, if src = ::.
 	 * This indicates that the src has no IP address assigned yet.
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
 	if (nd_rs == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return;
 	}
 #endif
 
 	icmp6len -= sizeof(*nd_rs);
 	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_rs_input: lladdrlen mismatch for %s "
 		    "(if %d, RS packet %d)\n",
 		    ip6_sprintf(ip6bufs, &saddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	icmp6stat.icp6s_badrs++;
 	m_freem(m);
 }
 
 /*
  * Receive Router Advertisement Message.
  *
  * Based on RFC 2461
  * TODO: on-link bit on prefix information
  * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
  */
 void
 nd6_ra_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_advert *nd_ra;
 	struct in6_addr saddr6 = ip6->ip6_src;
 	int mcast = 0;
 	union nd_opts ndopts;
 	struct nd_defrouter *dr;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	/*
 	 * We only accept RAs only when
 	 * the system-wide variable allows the acceptance, and
 	 * per-interface variable allows RAs on the receiving interface.
 	 */
 	if (ip6_accept_rtadv == 0)
 		goto freeit;
 	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
 		goto freeit;
 
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
 		nd6log((LOG_ERR,
 		    "nd6_ra_input: src %s is not link-local\n",
 		    ip6_sprintf(ip6bufs, &saddr6)));
 		goto bad;
 	}
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
 	if (nd_ra == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return;
 	}
 #endif
 
 	icmp6len -= sizeof(*nd_ra);
 	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
     {
 	struct nd_defrouter dr0;
 	u_int32_t advreachable = nd_ra->nd_ra_reachable;
 
 	/* remember if this is a multicasted advertisement */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
 		mcast = 1;
 
 	bzero(&dr0, sizeof(dr0));
 	dr0.rtaddr = saddr6;
 	dr0.flags  = nd_ra->nd_ra_flags_reserved;
 	dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
 	dr0.expire = time_second + dr0.rtlifetime;
 	dr0.ifp = ifp;
 	/* unspecified or not? (RFC 2461 6.3.4) */
 	if (advreachable) {
 		advreachable = ntohl(advreachable);
 		if (advreachable <= MAX_REACHABLE_TIME &&
 		    ndi->basereachable != advreachable) {
 			ndi->basereachable = advreachable;
 			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
 			ndi->recalctm = nd6_recalc_reachtm_interval; /* reset */
 		}
 	}
 	if (nd_ra->nd_ra_retransmit)
 		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
 	if (nd_ra->nd_ra_curhoplimit)
 		ndi->chlim = nd_ra->nd_ra_curhoplimit;
 	dr = defrtrlist_update(&dr0);
     }
 
 	/*
 	 * prefix
 	 */
 	if (ndopts.nd_opts_pi) {
 		struct nd_opt_hdr *pt;
 		struct nd_opt_prefix_info *pi = NULL;
 		struct nd_prefixctl pr;
 
 		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
 		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
 		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
 						(pt->nd_opt_len << 3))) {
 			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
 				continue;
 			pi = (struct nd_opt_prefix_info *)pt;
 
 			if (pi->nd_opt_pi_len != 4) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid option "
 				    "len %d for prefix information option, "
 				    "ignored\n", pi->nd_opt_pi_len));
 				continue;
 			}
 
 			if (128 < pi->nd_opt_pi_prefix_len) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "len %d for prefix information option, "
 				    "ignored\n", pi->nd_opt_pi_prefix_len));
 				continue;
 			}
 
 			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
 			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
 				nd6log((LOG_INFO,
 				    "nd6_ra_input: invalid prefix "
 				    "%s, ignored\n",
 				    ip6_sprintf(ip6bufs,
 					&pi->nd_opt_pi_prefix)));
 				continue;
 			}
 
 			bzero(&pr, sizeof(pr));
 			pr.ndpr_prefix.sin6_family = AF_INET6;
 			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
 			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
 			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
 
 			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
 			    ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
 			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
 			    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
 			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
 			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
 			pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
 			(void)prelist_update(&pr, dr, m, mcast);
 		}
 	}
 
 	/*
 	 * MTU
 	 */
 	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
 		u_long mtu;
 		u_long maxmtu;
 
 		mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
 
 		/* lower bound */
 		if (mtu < IPV6_MMTU) {
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
 			    "mtu=%lu sent from %s, ignoring\n",
 			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
 			goto skip;
 		}
 
 		/* upper bound */
 		maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
 		    ? ndi->maxmtu : ifp->if_mtu;
 		if (mtu <= maxmtu) {
 			int change = (ndi->linkmtu != mtu);
 
 			ndi->linkmtu = mtu;
 			if (change) /* in6_maxmtu may change */
 				in6_setmaxmtu();
 		} else {
 			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
 			    "mtu=%lu sent from %s; "
 			    "exceeds maxmtu %lu, ignoring\n",
 			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
 		}
 	}
 
  skip:
 
 	/*
 	 * Source link layer address
 	 */
     {
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 		    "nd6_ra_input: lladdrlen mismatch for %s "
 		    "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr,
 	    lladdrlen, ND_ROUTER_ADVERT, 0);
 
 	/*
 	 * Installing a link-layer address might change the state of the
 	 * router's neighbor cache, which might also affect our on-link
 	 * detection of adveritsed prefixes.
 	 */
 	pfxlist_onlink_check();
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	icmp6stat.icp6s_badra++;
 	m_freem(m);
 }
 
 /*
  * default router list proccessing sub routines
  */
 
 /* tell the change to user processes watching the routing socket. */
 static void
 nd6_rtmsg(int cmd, struct rtentry *rt)
 {
 	struct rt_addrinfo info;
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 	if (rt->rt_ifp) {
 		info.rti_info[RTAX_IFP] =
 		    TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 	}
 
 	rt_missmsg(cmd, &info, rt->rt_flags, 0);
 }
 
 void
 defrouter_addreq(struct nd_defrouter *new)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *newrt = NULL;
 	int s;
 	int error;
 
 	bzero(&def, sizeof(def));
 	bzero(&mask, sizeof(mask));
 	bzero(&gate, sizeof(gate));
 
 	def.sin6_len = mask.sin6_len = gate.sin6_len =
 	    sizeof(struct sockaddr_in6);
 	def.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = new->rtaddr;
 
 	s = splnet();
 	error = rtrequest(RTM_ADD, (struct sockaddr *)&def,
 	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
 	    RTF_GATEWAY, &newrt);
 	if (newrt) {
 		RT_LOCK(newrt);
 		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
 		RT_REMREF(newrt);
 		RT_UNLOCK(newrt);
 	}
 	if (error == 0)
 		new->installed = 1;
 	splx(s);
 	return;
 }
 
 struct nd_defrouter *
 defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
 {
 	struct nd_defrouter *dr;
 
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 	     dr = TAILQ_NEXT(dr, dr_entry)) {
 		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
 			return (dr);
 	}
 
 	return (NULL);		/* search failed */
 }
 
 /*
  * Remove the default route for a given router.
  * This is just a subroutine function for defrouter_select(), and should
  * not be called from anywhere else.
  */
 static void
 defrouter_delreq(struct nd_defrouter *dr)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *oldrt = NULL;
 
 	bzero(&def, sizeof(def));
 	bzero(&mask, sizeof(mask));
 	bzero(&gate, sizeof(gate));
 
 	def.sin6_len = mask.sin6_len = gate.sin6_len =
 	    sizeof(struct sockaddr_in6);
 	def.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = dr->rtaddr;
 
 	rtrequest(RTM_DELETE, (struct sockaddr *)&def,
 	    (struct sockaddr *)&gate,
 	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt);
 	if (oldrt) {
 		nd6_rtmsg(RTM_DELETE, oldrt);
 		RTFREE(oldrt);
 	}
 
 	dr->installed = 0;
 }
 
 /*
  * remove all default routes from default router list
  */
 void
 defrouter_reset(void)
 {
 	struct nd_defrouter *dr;
 
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 	     dr = TAILQ_NEXT(dr, dr_entry))
 		defrouter_delreq(dr);
 
 	/*
 	 * XXX should we also nuke any default routers in the kernel, by
 	 * going through them by rtalloc1()?
 	 */
 }
 
 void
 defrtrlist_del(struct nd_defrouter *dr)
 {
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
 
 	/*
 	 * Flush all the routing table entries that use the router
 	 * as a next hop.
 	 */
 	if (!ip6_forwarding && ip6_accept_rtadv) /* XXX: better condition? */
 		rt6_flush(&dr->rtaddr, dr->ifp);
 
 	if (dr->installed) {
 		deldr = dr;
 		defrouter_delreq(dr);
 	}
 	TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
 
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
 	 */
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		struct nd_pfxrouter *pfxrtr;
 		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
 			pfxrtr_del(pfxrtr);
 	}
 	pfxlist_onlink_check();
 
 	/*
 	 * If the router is the primary one, choose a new one.
 	 * Note that defrouter_select() will remove the current gateway
 	 * from the routing table.
 	 */
 	if (deldr)
 		defrouter_select();
 
 	free(dr, M_IP6NDP);
 }
 
 /*
  * Default Router Selection according to Section 6.3.6 of RFC 2461 and
  * draft-ietf-ipngwg-router-selection:
  * 1) Routers that are reachable or probably reachable should be preferred.
  *    If we have more than one (probably) reachable router, prefer ones
  *    with the highest router preference.
  * 2) When no routers on the list are known to be reachable or
  *    probably reachable, routers SHOULD be selected in a round-robin
  *    fashion, regardless of router preference values.
  * 3) If the Default Router List is empty, assume that all
  *    destinations are on-link.
  *
  * We assume nd_defrouter is sorted by router preference value.
  * Since the code below covers both with and without router preference cases,
  * we do not need to classify the cases by ifdef.
  *
  * At this moment, we do not try to install more than one default router,
  * even when the multipath routing is available, because we're not sure about
  * the benefits for stub hosts comparing to the risk of making the code
  * complicated and the possibility of introducing bugs.
  */
 void
 defrouter_select(void)
 {
 	int s = splnet();
 	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
 	struct rtentry *rt = NULL;
 	struct llinfo_nd6 *ln = NULL;
 
 	/*
 	 * This function should be called only when acting as an autoconfigured
 	 * host.  Although the remaining part of this function is not effective
 	 * if the node is not an autoconfigured host, we explicitly exclude
 	 * such cases here for safety.
 	 */
 	if (ip6_forwarding || !ip6_accept_rtadv) {
 		nd6log((LOG_WARNING,
 		    "defrouter_select: called unexpectedly (forwarding=%d, "
 		    "accept_rtadv=%d)\n", ip6_forwarding, ip6_accept_rtadv));
 		splx(s);
 		return;
 	}
 
 	/*
 	 * Let's handle easy case (3) first:
 	 * If default router list is empty, there's nothing to be done.
 	 */
 	if (!TAILQ_FIRST(&nd_defrouter)) {
 		splx(s);
 		return;
 	}
 
 	/*
 	 * Search for a (probably) reachable router from the list.
 	 * We just pick up the first reachable one (if any), assuming that
 	 * the ordering rule of the list described in defrtrlist_update().
 	 */
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 	     dr = TAILQ_NEXT(dr, dr_entry)) {
 		if (selected_dr == NULL &&
 		    (rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
 		    (ln = (struct llinfo_nd6 *)rt->rt_llinfo) &&
 		    ND6_IS_LLINFO_PROBREACH(ln)) {
 			selected_dr = dr;
 		}
 
 		if (dr->installed && installed_dr == NULL)
 			installed_dr = dr;
 		else if (dr->installed && installed_dr) {
 			/* this should not happen.  warn for diagnosis. */
 			log(LOG_ERR, "defrouter_select: more than one router"
 			    " is installed\n");
 		}
 	}
 	/*
 	 * If none of the default routers was found to be reachable,
 	 * round-robin the list regardless of preference.
 	 * Otherwise, if we have an installed router, check if the selected
 	 * (reachable) router should really be preferred to the installed one.
 	 * We only prefer the new router when the old one is not reachable
 	 * or when the new one has a really higher preference value.
 	 */
 	if (selected_dr == NULL) {
 		if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
 			selected_dr = TAILQ_FIRST(&nd_defrouter);
 		else
 			selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
 	} else if (installed_dr &&
 	    (rt = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
 	    (ln = (struct llinfo_nd6 *)rt->rt_llinfo) &&
 	    ND6_IS_LLINFO_PROBREACH(ln) &&
 	    rtpref(selected_dr) <= rtpref(installed_dr)) {
 		selected_dr = installed_dr;
 	}
 
 	/*
 	 * If the selected router is different than the installed one,
 	 * remove the installed router and install the selected one.
 	 * Note that the selected router is never NULL here.
 	 */
 	if (installed_dr != selected_dr) {
 		if (installed_dr)
 			defrouter_delreq(installed_dr);
 		defrouter_addreq(selected_dr);
 	}
 
 	splx(s);
 	return;
 }
 
 /*
  * for default router selection
  * regards router-preference field as a 2-bit signed integer
  */
 static int
 rtpref(struct nd_defrouter *dr)
 {
 	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
 	case ND_RA_FLAG_RTPREF_HIGH:
 		return (RTPREF_HIGH);
 	case ND_RA_FLAG_RTPREF_MEDIUM:
 	case ND_RA_FLAG_RTPREF_RSV:
 		return (RTPREF_MEDIUM);
 	case ND_RA_FLAG_RTPREF_LOW:
 		return (RTPREF_LOW);
 	default:
 		/*
 		 * This case should never happen.  If it did, it would mean a
 		 * serious bug of kernel internal.  We thus always bark here.
 		 * Or, can we even panic?
 		 */
 		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
 		return (RTPREF_INVALID);
 	}
 	/* NOTREACHED */
 }
 
 static struct nd_defrouter *
 defrtrlist_update(struct nd_defrouter *new)
 {
 	struct nd_defrouter *dr, *n;
 	int s = splnet();
 
 	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
 		/* entry exists */
 		if (new->rtlifetime == 0) {
 			defrtrlist_del(dr);
 			dr = NULL;
 		} else {
 			int oldpref = rtpref(dr);
 
 			/* override */
 			dr->flags = new->flags; /* xxx flag check */
 			dr->rtlifetime = new->rtlifetime;
 			dr->expire = new->expire;
 
 			/*
 			 * If the preference does not change, there's no need
 			 * to sort the entries.
 			 */
 			if (rtpref(new) == oldpref) {
 				splx(s);
 				return (dr);
 			}
 
 			/*
 			 * preferred router may be changed, so relocate
 			 * this router.
 			 * XXX: calling TAILQ_REMOVE directly is a bad manner.
 			 * However, since defrtrlist_del() has many side
 			 * effects, we intentionally do so here.
 			 * defrouter_select() below will handle routing
 			 * changes later.
 			 */
 			TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
 			n = dr;
 			goto insert;
 		}
 		splx(s);
 		return (dr);
 	}
 
 	/* entry does not exist */
 	if (new->rtlifetime == 0) {
 		splx(s);
 		return (NULL);
 	}
 
 	n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
 	if (n == NULL) {
 		splx(s);
 		return (NULL);
 	}
 	bzero(n, sizeof(*n));
 	*n = *new;
 
 insert:
 	/*
 	 * Insert the new router in the Default Router List;
 	 * The Default Router List should be in the descending order
 	 * of router-preferece.  Routers with the same preference are
 	 * sorted in the arriving time order.
 	 */
 
 	/* insert at the end of the group */
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 	     dr = TAILQ_NEXT(dr, dr_entry)) {
 		if (rtpref(n) > rtpref(dr))
 			break;
 	}
 	if (dr)
 		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
 	else
 		TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry);
 
 	defrouter_select();
 
 	splx(s);
 
 	return (n);
 }
 
 static struct nd_pfxrouter *
 pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *search;
 
 	for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) {
 		if (search->router == dr)
 			break;
 	}
 
 	return (search);
 }
 
 static void
 pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *new;
 
 	new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
 		return;
 	bzero(new, sizeof(*new));
 	new->router = dr;
 
 	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
 
 	pfxlist_onlink_check();
 }
 
 static void
 pfxrtr_del(struct nd_pfxrouter *pfr)
 {
 	LIST_REMOVE(pfr, pfr_entry);
 	free(pfr, M_IP6NDP);
 }
 
 struct nd_prefix *
 nd6_prefix_lookup(struct nd_prefixctl *key)
 {
 	struct nd_prefix *search;
 
 	for (search = nd_prefix.lh_first; search; search = search->ndpr_next) {
 		if (key->ndpr_ifp == search->ndpr_ifp &&
 		    key->ndpr_plen == search->ndpr_plen &&
 		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
 		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
 			break;
 		}
 	}
 
 	return (search);
 }
 
 int
 nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
     struct nd_prefix **newp)
 {
 	struct nd_prefix *new = NULL;
 	int error = 0;
 	int i, s;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
 	if (new == NULL)
 		return(ENOMEM);
 	bzero(new, sizeof(*new));
 	new->ndpr_ifp = pr->ndpr_ifp;
 	new->ndpr_prefix = pr->ndpr_prefix;
 	new->ndpr_plen = pr->ndpr_plen;
 	new->ndpr_vltime = pr->ndpr_vltime;
 	new->ndpr_pltime = pr->ndpr_pltime;
 	new->ndpr_flags = pr->ndpr_flags;
 	if ((error = in6_init_prefix_ltimes(new)) != 0) {
 		free(new, M_IP6NDP);
 		return(error);
 	}
 	new->ndpr_lastupdate = time_second;
 	if (newp != NULL)
 		*newp = new;
 
 	/* initialization */
 	LIST_INIT(&new->ndpr_advrtrs);
 	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
 	/* make prefix in the canonical form */
 	for (i = 0; i < 4; i++)
 		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
 		    new->ndpr_mask.s6_addr32[i];
 
 	s = splnet();
 	/* link ndpr_entry to nd_prefix list */
 	LIST_INSERT_HEAD(&nd_prefix, new, ndpr_entry);
 	splx(s);
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
 		int e;
 
 		if ((e = nd6_prefix_onlink(new)) != 0) {
 			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
 			    "the prefix %s/%d on-link on %s (errno=%d)\n",
 			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 			/* proceed anyway. XXX: is it correct? */
 		}
 	}
 
 	if (dr)
 		pfxrtr_add(new, dr);
 
 	return 0;
 }
 
 void
 prelist_remove(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfr, *next;
 	int e, s;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* make sure to invalidate the prefix until it is really freed. */
 	pr->ndpr_vltime = 0;
 	pr->ndpr_pltime = 0;
 
 	/*
 	 * Though these flags are now meaningless, we'd rather keep the value
 	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
 	 * when executing "ndp -p".
 	 */
 
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
 	    (e = nd6_prefix_offlink(pr)) != 0) {
 		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
 		    "on %s, errno=%d\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 		/* what should we do? */
 	}
 
 	if (pr->ndpr_refcnt > 0)
 		return;		/* notice here? */
 
 	s = splnet();
 
 	/* unlink ndpr_entry from nd_prefix list */
 	LIST_REMOVE(pr, ndpr_entry);
 
 	/* free list of routers that adversed the prefix */
 	for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) {
 		next = pfr->pfr_next;
 
 		free(pfr, M_IP6NDP);
 	}
 	splx(s);
 
 	free(pr, M_IP6NDP);
 
 	pfxlist_onlink_check();
 }
 
 /*
  * dr - may be NULL
  */
 
 static int
 prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
     struct mbuf *m, int mcast)
 {
 	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp = new->ndpr_ifp;
 	struct nd_prefix *pr;
 	int s = splnet();
 	int error = 0;
 	int newprefix = 0;
 	int auth;
 	struct in6_addrlifetime lt6_tmp;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	auth = 0;
 	if (m) {
 		/*
 		 * Authenticity for NA consists authentication for
 		 * both IP header and IP datagrams, doesn't it ?
 		 */
 #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
 		auth = ((m->m_flags & M_AUTHIPHDR) &&
 		    (m->m_flags & M_AUTHIPDGM));
 #endif
 	}
 
 	if ((pr = nd6_prefix_lookup(new)) != NULL) {
 		/*
 		 * nd6_prefix_lookup() ensures that pr and new have the same
 		 * prefix on a same interface.
 		 */
 
 		/*
 		 * Update prefix information.  Note that the on-link (L) bit
 		 * and the autonomous (A) bit should NOT be changed from 1
 		 * to 0.
 		 */
 		if (new->ndpr_raf_onlink == 1)
 			pr->ndpr_raf_onlink = 1;
 		if (new->ndpr_raf_auto == 1)
 			pr->ndpr_raf_auto = 1;
 		if (new->ndpr_raf_onlink) {
 			pr->ndpr_vltime = new->ndpr_vltime;
 			pr->ndpr_pltime = new->ndpr_pltime;
 			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
 			pr->ndpr_lastupdate = time_second;
 		}
 
 		if (new->ndpr_raf_onlink &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			int e;
 
 			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "prelist_update: failed to make "
 				    "the prefix %s/%d on-link on %s "
 				    "(errno=%d)\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 				/* proceed anyway. XXX: is it correct? */
 			}
 		}
 
 		if (dr && pfxrtr_lookup(pr, dr) == NULL)
 			pfxrtr_add(pr, dr);
 	} else {
 		struct nd_prefix *newpr = NULL;
 
 		newprefix = 1;
 
 		if (new->ndpr_vltime == 0)
 			goto end;
 		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
 			goto end;
 
 		error = nd6_prelist_add(new, dr, &newpr);
 		if (error != 0 || newpr == NULL) {
 			nd6log((LOG_NOTICE, "prelist_update: "
 			    "nd6_prelist_add failed for %s/%d on %s "
 			    "errno=%d, returnpr=%p\n",
 			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
 			    new->ndpr_plen, if_name(new->ndpr_ifp),
 			    error, newpr));
 			goto end; /* we should just give up in this case. */
 		}
 
 		/*
 		 * XXX: from the ND point of view, we can ignore a prefix
 		 * with the on-link bit being zero.  However, we need a
 		 * prefix structure for references from autoconfigured
 		 * addresses.  Thus, we explicitly make sure that the prefix
 		 * itself expires now.
 		 */
 		if (newpr->ndpr_raf_onlink == 0) {
 			newpr->ndpr_vltime = 0;
 			newpr->ndpr_pltime = 0;
 			in6_init_prefix_ltimes(newpr);
 		}
 
 		pr = newpr;
 	}
 
 	/*
 	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
 	 * Note that pr must be non NULL at this point.
 	 */
 
 	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
 	if (!new->ndpr_raf_auto)
 		goto end;
 
 	/*
 	 * 5.5.3 (b). the link-local prefix should have been ignored in
 	 * nd6_ra_input.
 	 */
 
 	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
 	if (new->ndpr_pltime > new->ndpr_vltime) {
 		error = EINVAL;	/* XXX: won't be used */
 		goto end;
 	}
 
 	/*
 	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
 	 * an address configured by stateless autoconfiguration already in the
 	 * list of addresses associated with the interface, and the Valid
 	 * Lifetime is not 0, form an address.  We first check if we have
 	 * a matching prefix.
 	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
 	 * consider autoconfigured addresses while RFC2462 simply said
 	 * "address".
 	 */
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 		struct in6_ifaddr *ifa6;
 		u_int32_t remaininglifetime;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		ifa6 = (struct in6_ifaddr *)ifa;
 
 		/*
 		 * We only consider autoconfigured addresses as per rfc2462bis.
 		 */
 		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
 			continue;
 
 		/*
 		 * Spec is not clear here, but I believe we should concentrate
 		 * on unicast (i.e. not anycast) addresses.
 		 * XXX: other ia6_flags? detached or duplicated?
 		 */
 		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
 			continue;
 
 		/*
 		 * Ignore the address if it is not associated with a prefix
 		 * or is associated with a prefix that is different from this
 		 * one.  (pr is never NULL here)
 		 */
 		if (ifa6->ia6_ndpr != pr)
 			continue;
 
 		if (ia6_match == NULL) /* remember the first one */
 			ia6_match = ifa6;
 
 		/*
 		 * An already autoconfigured address matched.  Now that we
 		 * are sure there is at least one matched address, we can
 		 * proceed to 5.5.3. (e): update the lifetimes according to the
 		 * "two hours" rule and the privacy extension.
 		 * We apply some clarifications in rfc2462bis:
 		 * - use remaininglifetime instead of storedlifetime as a
 		 *   variable name
 		 * - remove the dead code in the "two-hour" rule
 		 */
 #define TWOHOUR		(120*60)
 		lt6_tmp = ifa6->ia6_lifetime;
 
 		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
 			remaininglifetime = ND6_INFINITE_LIFETIME;
 		else if (time_second - ifa6->ia6_updatetime >
 			 lt6_tmp.ia6t_vltime) {
 			/*
 			 * The case of "invalid" address.  We should usually
 			 * not see this case.
 			 */
 			remaininglifetime = 0;
 		} else
 			remaininglifetime = lt6_tmp.ia6t_vltime -
 			    (time_second - ifa6->ia6_updatetime);
 
 		/* when not updating, keep the current stored lifetime. */
 		lt6_tmp.ia6t_vltime = remaininglifetime;
 
 		if (TWOHOUR < new->ndpr_vltime ||
 		    remaininglifetime < new->ndpr_vltime) {
 			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 		} else if (remaininglifetime <= TWOHOUR) {
 			if (auth) {
 				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 			}
 		} else {
 			/*
 			 * new->ndpr_vltime <= TWOHOUR &&
 			 * TWOHOUR < remaininglifetime
 			 */
 			lt6_tmp.ia6t_vltime = TWOHOUR;
 		}
 
 		/* The 2 hour rule is not imposed for preferred lifetime. */
 		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
 
 		in6_init_address_ltimes(pr, &lt6_tmp);
 
 		/*
 		 * We need to treat lifetimes for temporary addresses
 		 * differently, according to
 		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
 		 * we only update the lifetimes when they are in the maximum
 		 * intervals.
 		 */
 		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 			u_int32_t maxvltime, maxpltime;
 
 			if (ip6_temp_valid_lifetime >
 			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
 			    ip6_desync_factor)) {
 				maxvltime = ip6_temp_valid_lifetime -
 				    (time_second - ifa6->ia6_createtime) -
 				    ip6_desync_factor;
 			} else
 				maxvltime = 0;
 			if (ip6_temp_preferred_lifetime >
 			    (u_int32_t)((time_second - ifa6->ia6_createtime) +
 			    ip6_desync_factor)) {
 				maxpltime = ip6_temp_preferred_lifetime -
 				    (time_second - ifa6->ia6_createtime) -
 				    ip6_desync_factor;
 			} else
 				maxpltime = 0;
 
 			if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
 			    lt6_tmp.ia6t_vltime > maxvltime) {
 				lt6_tmp.ia6t_vltime = maxvltime;
 			}
 			if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
 			    lt6_tmp.ia6t_pltime > maxpltime) {
 				lt6_tmp.ia6t_pltime = maxpltime;
 			}
 		}
 		ifa6->ia6_lifetime = lt6_tmp;
 		ifa6->ia6_updatetime = time_second;
 	}
 	if (ia6_match == NULL && new->ndpr_vltime) {
 		int ifidlen;
 
 		/*
 		 * 5.5.3 (d) (continued)
 		 * No address matched and the valid lifetime is non-zero.
 		 * Create a new address.
 		 */
 
 		/*
 		 * Prefix Length check:
 		 * If the sum of the prefix length and interface identifier
 		 * length does not equal 128 bits, the Prefix Information
 		 * option MUST be ignored.  The length of the interface
 		 * identifier is defined in a separate link-type specific
 		 * document.
 		 */
 		ifidlen = in6_if2idlen(ifp);
 		if (ifidlen < 0) {
 			/* this should not happen, so we always log it. */
 			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
 			    if_name(ifp));
 			goto end;
 		}
 		if (ifidlen + pr->ndpr_plen != 128) {
 			nd6log((LOG_INFO,
 			    "prelist_update: invalid prefixlen "
 			    "%d for %s, ignored\n",
 			    pr->ndpr_plen, if_name(ifp)));
 			goto end;
 		}
 
 		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
 			/*
 			 * note that we should use pr (not new) for reference.
 			 */
 			pr->ndpr_refcnt++;
 			ia6->ia6_ndpr = pr;
 
 			/*
 			 * RFC 3041 3.3 (2).
 			 * When a new public address is created as described
 			 * in RFC2462, also create a new temporary address.
 			 *
 			 * RFC 3041 3.5.
 			 * When an interface connects to a new link, a new
 			 * randomized interface identifier should be generated
 			 * immediately together with a new set of temporary
 			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
 			 * in6_tmpifadd().
 			 */
 			if (ip6_use_tempaddr) {
 				int e;
 				if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
 					nd6log((LOG_NOTICE, "prelist_update: "
 					    "failed to create a temporary "
 					    "address, errno=%d\n",
 					    e));
 				}
 			}
 
 			/*
 			 * A newly added address might affect the status
 			 * of other addresses, so we check and update it.
 			 * XXX: what if address duplication happens?
 			 */
 			pfxlist_onlink_check();
 		} else {
 			/* just set an error. do not bark here. */
 			error = EADDRNOTAVAIL; /* XXX: might be unused. */
 		}
 	}
 
  end:
 	splx(s);
 	return error;
 }
 
 /*
  * A supplement function used in the on-link detection below;
  * detect if a given prefix has a (probably) reachable advertising router.
  * XXX: lengthy function name...
  */
 static struct nd_pfxrouter *
 find_pfxlist_reachable_router(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfxrtr;
 	struct rtentry *rt;
 	struct llinfo_nd6 *ln;
 
 	for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr;
 	     pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) {
 		if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0,
 		    pfxrtr->router->ifp)) &&
 		    (ln = (struct llinfo_nd6 *)rt->rt_llinfo) &&
 		    ND6_IS_LLINFO_PROBREACH(ln))
 			break;	/* found */
 	}
 
 	return (pfxrtr);
 }
 
 /*
  * Check if each prefix in the prefix list has at least one available router
  * that advertised the prefix (a router is "available" if its neighbor cache
  * entry is reachable or probably reachable).
  * If the check fails, the prefix may be off-link, because, for example,
  * we have moved from the network but the lifetime of the prefix has not
  * expired yet.  So we should not use the prefix if there is another prefix
  * that has an available router.
  * But, if there is no prefix that has an available router, we still regards
  * all the prefixes as on-link.  This is because we can't tell if all the
  * routers are simply dead or if we really moved from the network and there
  * is no router around us.
  */
 void
 pfxlist_onlink_check()
 {
 	struct nd_prefix *pr;
 	struct in6_ifaddr *ifa;
 	struct nd_defrouter *dr;
 	struct nd_pfxrouter *pfxrtr = NULL;
 
 	/*
 	 * Check if there is a prefix that has a reachable advertising
 	 * router.
 	 */
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
 			break;
 	}
 
 	/*
 	 * If we have no such prefix, check whether we still have a router
 	 * that does not advertise any prefixes.
 	 */
 	if (pr == NULL) {
 		for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 		    dr = TAILQ_NEXT(dr, dr_entry)) {
 			struct nd_prefix *pr0;
 
 			for (pr0 = nd_prefix.lh_first; pr0;
 			    pr0 = pr0->ndpr_next) {
 				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
 					break;
 			}
 			if (pfxrtr != NULL)
 				break;
 		}
 	}
 	if (pr != NULL || (TAILQ_FIRST(&nd_defrouter) && pfxrtr == NULL)) {
 		/*
 		 * There is at least one prefix that has a reachable router,
 		 * or at least a router which probably does not advertise
 		 * any prefixes.  The latter would be the case when we move
 		 * to a new link where we have a router that does not provide
 		 * prefixes and we configure an address by hand.
 		 * Detach prefixes which have no reachable advertising
 		 * router, and attach other prefixes.
 		 */
 		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 			/* XXX: a link-local prefix should never be detached */
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue;
 
 			/*
 			 * we aren't interested in prefixes without the L bit
 			 * set.
 			 */
 			if (pr->ndpr_raf_onlink == 0)
 				continue;
 
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 			    find_pfxlist_reachable_router(pr) == NULL)
 				pr->ndpr_stateflags |= NDPRF_DETACHED;
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 			    find_pfxlist_reachable_router(pr) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
 		}
 	} else {
 		/* there is no prefix that has a reachable router */
 		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue;
 
 			if (pr->ndpr_raf_onlink == 0)
 				continue;
 
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
 		}
 	}
 
 	/*
 	 * Remove each interface route associated with a (just) detached
 	 * prefix, and reinstall the interface route for a (just) attached
 	 * prefix.  Note that all attempt of reinstallation does not
 	 * necessarily success, when a same prefix is shared among multiple
 	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
 	 * so we don't have to care about them.
 	 */
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		int e;
 		char ip6buf[INET6_ADDRSTRLEN];
 
 		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 			continue;
 
 		if (pr->ndpr_raf_onlink == 0)
 			continue;
 
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 			if ((e = nd6_prefix_offlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
 			}
 		}
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
 		    pr->ndpr_raf_onlink) {
 			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d onlink, errno=%d\n",
 				    ip6_sprintf(ip6buf,
 					    &pr->ndpr_prefix.sin6_addr),
 					    pr->ndpr_plen, e));
 			}
 		}
 	}
 
 	/*
 	 * Changes on the prefix status might affect address status as well.
 	 * Make sure that all addresses derived from an attached prefix are
 	 * attached, and that all addresses derived from a detached prefix are
 	 * detached.  Note, however, that a manually configured address should
 	 * always be attached.
 	 * The precise detection logic is same as the one for prefixes.
 	 */
 	for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) {
 		if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
 			continue;
 
 		if (ifa->ia6_ndpr == NULL) {
 			/*
 			 * This can happen when we first configure the address
 			 * (i.e. the address exists, but the prefix does not).
 			 * XXX: complicated relationships...
 			 */
 			continue;
 		}
 
 		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
 			break;
 	}
 	if (ifa) {
 		for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) {
 			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 				continue;
 
 			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
 				continue;
 
 			if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
 				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
 					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
 					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
 					nd6_dad_start((struct ifaddr *)ifa, 0);
 				}
 			} else {
 				ifa->ia6_flags |= IN6_IFF_DETACHED;
 			}
 		}
 	}
 	else {
 		for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) {
 			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 				continue;
 
 			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
 				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
 				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
 				/* Do we need a delay in this case? */
 				nd6_dad_start((struct ifaddr *)ifa, 0);
 			}
 		}
 	}
 }
 
 int
 nd6_prefix_onlink(struct nd_prefix *pr)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct sockaddr_in6 mask6;
 	struct nd_prefix *opr;
 	u_long rtflags;
 	int error = 0;
 	struct rtentry *rt = NULL;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_onlink: %s/%d is already on-link\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen));
 		return (EEXIST);
 	}
 
 	/*
 	 * Add the interface route associated with the prefix.  Before
 	 * installing the route, check if there's the same prefix on another
 	 * interface, and the prefix has already installed the interface route.
 	 * Although such a configuration is expected to be rare, we explicitly
 	 * allow it.
 	 */
 	for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
 		if (opr == pr)
 			continue;
 
 		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
 			continue;
 
 		if (opr->ndpr_plen == pr->ndpr_plen &&
 		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
 			return (0);
 	}
 
 	/*
 	 * We prefer link-local addresses as the associated interface address.
 	 */
 	/* search for a link-local addr */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
 	    IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
 	if (ifa == NULL) {
 		/* XXX: freebsd does not have ifa_ifwithaf */
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
 			if (ifa->ifa_addr->sa_family == AF_INET6)
 				break;
 		}
 		/* should we care about ia6_flags? */
 	}
 	if (ifa == NULL) {
 		/*
 		 * This can still happen, when, for example, we receive an RA
 		 * containing a prefix with the L bit set and the A bit clear,
 		 * after removing all IPv6 addresses on the receiving
 		 * interface.  This should, of course, be rare though.
 		 */
 		nd6log((LOG_NOTICE,
 		    "nd6_prefix_onlink: failed to find any ifaddr"
 		    " to add route for a prefix(%s/%d) on %s\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp)));
 		return (0);
 	}
 
 	/*
 	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
 	 * ifa->ifa_rtrequest = nd6_rtrequest;
 	 */
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_len = sizeof(mask6);
 	mask6.sin6_addr = pr->ndpr_mask;
 	rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP;
 	if (nd6_need_cache(ifp)) {
 		/* explicitly set in case ifa_flags does not set the flag. */
 		rtflags |= RTF_CLONING;
 	} else {
 		/*
 		 * explicitly clear the cloning bit in case ifa_flags sets it.
 		 */
 		rtflags &= ~RTF_CLONING;
 	}
 	error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix,
 	    ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt);
 	if (error == 0) {
 		if (rt != NULL) /* this should be non NULL, though */
 			nd6_rtmsg(RTM_ADD, rt);
 		pr->ndpr_stateflags |= NDPRF_ONLINK;
 	} else {
 		char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN];
 		nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
 		    " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx "
 		    "errno = %d\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp),
 		    ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
 		    ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error));
 	}
 
 	if (rt != NULL) {
 		RT_LOCK(rt);
 		RT_REMREF(rt);
 		RT_UNLOCK(rt);
 	}
 
 	return (error);
 }
 
 int
 nd6_prefix_offlink(struct nd_prefix *pr)
 {
 	int error = 0;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
 	struct sockaddr_in6 sa6, mask6;
 	struct rtentry *rt = NULL;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* sanity check */
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: %s/%d is already off-link\n",
 		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen));
 		return (EEXIST);
 	}
 
 	bzero(&sa6, sizeof(sa6));
 	sa6.sin6_family = AF_INET6;
 	sa6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
 	    sizeof(struct in6_addr));
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_family = AF_INET6;
 	mask6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
 	error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
 	    (struct sockaddr *)&mask6, 0, &rt);
 	if (error == 0) {
 		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
 
 		/* report the route deletion to the routing socket. */
 		if (rt != NULL)
 			nd6_rtmsg(RTM_DELETE, rt);
 
 		/*
 		 * There might be the same prefix on another interface,
 		 * the prefix which could not be on-link just because we have
 		 * the interface route (see comments in nd6_prefix_onlink).
 		 * If there's one, try to make the prefix on-link on the
 		 * interface.
 		 */
 		for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
 			if (opr == pr)
 				continue;
 
 			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
 				continue;
 
 			/*
 			 * KAME specific: detached prefixes should not be
 			 * on-link.
 			 */
 			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				continue;
 
 			if (opr->ndpr_plen == pr->ndpr_plen &&
 			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
 			    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
 				int e;
 
 				if ((e = nd6_prefix_onlink(opr)) != 0) {
 					nd6log((LOG_ERR,
 					    "nd6_prefix_offlink: failed to "
 					    "recover a prefix %s/%d from %s "
 					    "to %s (errno = %d)\n",
 					    ip6_sprintf(ip6buf,
 						&opr->ndpr_prefix.sin6_addr),
 					    opr->ndpr_plen, if_name(ifp),
 					    if_name(opr->ndpr_ifp), e));
 				}
 			}
 		}
 	} else {
 		/* XXX: can we still set the NDPRF_ONLINK flag? */
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: failed to delete route: "
 		    "%s/%d on %s (errno = %d)\n",
 		    ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
 		    if_name(ifp), error));
 	}
 
 	if (rt != NULL) {
 		RTFREE(rt);
 	}
 
 	return (error);
 }
 
 static struct in6_ifaddr *
 in6_ifadd(struct nd_prefixctl *pr, int mcast)
 {
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct ifaddr *ifa;
 	struct in6_aliasreq ifra;
 	struct in6_ifaddr *ia, *ib;
 	int error, plen0;
 	struct in6_addr mask;
 	int prefixlen = pr->ndpr_plen;
 	int updateflags;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	in6_prefixlen2mask(&mask, prefixlen);
 
 	/*
 	 * find a link-local address (will be interface ID).
 	 * Is it really mandatory? Theoretically, a global or a site-local
 	 * address can be configured without a link-local address, if we
 	 * have a unique interface identifier...
 	 *
 	 * it is not mandatory to have a link-local address, we can generate
 	 * interface identifier on the fly.  we do this because:
 	 * (1) it should be the easiest way to find interface identifier.
 	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
 	 * for multiple addresses on a single interface, and possible shortcut
 	 * of DAD.  we omitted DAD for this reason in the past.
 	 * (3) a user can prevent autoconfiguration of global address
 	 * by removing link-local address by hand (this is partly because we
 	 * don't have other way to control the use of IPv6 on an interface.
 	 * this has been our design choice - cf. NRL's "ifconfig auto").
 	 * (4) it is easier to manage when an interface has addresses
 	 * with the same interface identifier, than to have multiple addresses
 	 * with different interface identifiers.
 	 */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
 	if (ifa)
 		ib = (struct in6_ifaddr *)ifa;
 	else
 		return NULL;
 
 	/* prefixlen + ifidlen must be equal to 128 */
 	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
 	if (prefixlen != plen0) {
 		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
 		    "(prefix=%d ifid=%d)\n",
 		    if_name(ifp), prefixlen, 128 - plen0));
 		return NULL;
 	}
 
 	/* make ifaddr */
 
 	bzero(&ifra, sizeof(ifra));
 	/*
 	 * in6_update_ifa() does not use ifra_name, but we accurately set it
 	 * for safety.
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 	ifra.ifra_addr.sin6_family = AF_INET6;
 	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	/* prefix */
 	ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
 	ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
 
 	/* interface ID */
 	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
 	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
 
 	/* new prefix mask. */
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
 	    sizeof(ifra.ifra_prefixmask.sin6_addr));
 
 	/* lifetimes. */
 	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
 	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
 
 	/*
 	 * Make sure that we do not have this address already.  This should
 	 * usually not happen, but we can still see this case, e.g., if we
 	 * have manually configured the exact address to be configured.
 	 */
 	if (in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr) != NULL) {
 		/* this should be rare enough to make an explicit log */
 		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
 		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
 		return (NULL);
 	}
 
 	/*
 	 * Allocate ifaddr structure, link into chain, etc.
 	 * If we are going to create a new address upon receiving a multicasted
 	 * RA, we need to impose a random delay before starting DAD.
 	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
 	 */
 	updateflags = 0;
 	if (mcast)
 		updateflags |= IN6_IFAUPDATE_DADDELAY;
 	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
 		nd6log((LOG_ERR,
 		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
 		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
 		    if_name(ifp), error));
 		return (NULL);	/* ifaddr must not have been allocated. */
 	}
 
 	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 
 	return (ia);		/* this is always non-NULL */
 }
 
 /*
  * ia0 - corresponding public address
  */
 int
 in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
 {
 	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
 	struct in6_ifaddr *newia, *ia;
 	struct in6_aliasreq ifra;
 	int i, error;
 	int trylimit = 3;	/* XXX: adhoc value */
 	int updateflags;
 	u_int32_t randid[2];
 	time_t vltime0, pltime0;
 
 	bzero(&ifra, sizeof(ifra));
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 	ifra.ifra_addr = ia0->ia_addr;
 	/* copy prefix mask */
 	ifra.ifra_prefixmask = ia0->ia_prefixmask;
 	/* clear the old IFID */
 	for (i = 0; i < 4; i++) {
 		ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
 		    ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
 	}
 
   again:
 	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
 	    (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
 		nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
 		    "random IFID\n"));
 		return (EINVAL);
 	}
 	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
 	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
 	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
 
 	/*
 	 * in6_get_tmpifid() quite likely provided a unique interface ID.
 	 * However, we may still have a chance to see collision, because
 	 * there may be a time lag between generation of the ID and generation
 	 * of the address.  So, we'll do one more sanity check.
 	 */
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
 		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
 		    &ifra.ifra_addr.sin6_addr)) {
 			if (trylimit-- == 0) {
 				/*
 				 * Give up.  Something strange should have
 				 * happened.
 				 */
 				nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
 				    "find a unique random IFID\n"));
 				return (EEXIST);
 			}
 			forcegen = 1;
 			goto again;
 		}
 	}
 
 	/*
 	 * The Valid Lifetime is the lower of the Valid Lifetime of the
          * public address or TEMP_VALID_LIFETIME.
 	 * The Preferred Lifetime is the lower of the Preferred Lifetime
          * of the public address or TEMP_PREFERRED_LIFETIME -
          * DESYNC_FACTOR.
 	 */
 	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
 		    (ia0->ia6_lifetime.ia6t_vltime -
 		    (time_second - ia0->ia6_updatetime));
 		if (vltime0 > ip6_temp_valid_lifetime)
 			vltime0 = ip6_temp_valid_lifetime;
 	} else
 		vltime0 = ip6_temp_valid_lifetime;
 	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
 		    (ia0->ia6_lifetime.ia6t_pltime -
 		    (time_second - ia0->ia6_updatetime));
 		if (pltime0 > ip6_temp_preferred_lifetime - ip6_desync_factor){
 			pltime0 = ip6_temp_preferred_lifetime -
 			    ip6_desync_factor;
 		}
 	} else
 		pltime0 = ip6_temp_preferred_lifetime - ip6_desync_factor;
 	ifra.ifra_lifetime.ia6t_vltime = vltime0;
 	ifra.ifra_lifetime.ia6t_pltime = pltime0;
 
 	/*
 	 * A temporary address is created only if this calculated Preferred
 	 * Lifetime is greater than REGEN_ADVANCE time units.
 	 */
 	if (ifra.ifra_lifetime.ia6t_pltime <= ip6_temp_regen_advance)
 		return (0);
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
 
 	/* allocate ifaddr structure, link into chain, etc. */
 	updateflags = 0;
 	if (delay)
 		updateflags |= IN6_IFAUPDATE_DADDELAY;
 	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
 		return (error);
 
 	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 	if (newia == NULL) {	/* XXX: can it happen? */
 		nd6log((LOG_ERR,
 		    "in6_tmpifadd: ifa update succeeded, but we got "
 		    "no ifaddr\n"));
 		return (EINVAL); /* XXX */
 	}
 	newia->ia6_ndpr = ia0->ia6_ndpr;
 	newia->ia6_ndpr->ndpr_refcnt++;
 
 	/*
 	 * A newly added address might affect the status of other addresses.
 	 * XXX: when the temporary address is generated with a new public
 	 * address, the onlink check is redundant.  However, it would be safe
 	 * to do the check explicitly everywhere a new address is generated,
 	 * and, in fact, we surely need the check when we create a new
 	 * temporary address due to deprecation of an old temporary address.
 	 */
 	pfxlist_onlink_check();
 
 	return (0);
 }
 
 static int
 in6_init_prefix_ltimes(struct nd_prefix *ndpr)
 {
 	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_preferred = 0;
 	else
 		ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
 	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 		ndpr->ndpr_expire = 0;
 	else
 		ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
 
 	return 0;
 }
 
 static void
 in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
 {
 	/* init ia6t_expire */
 	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_expire = 0;
 	else {
 		lt6->ia6t_expire = time_second;
 		lt6->ia6t_expire += lt6->ia6t_vltime;
 	}
 
 	/* init ia6t_preferred */
 	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_preferred = 0;
 	else {
 		lt6->ia6t_preferred = time_second;
 		lt6->ia6t_preferred += lt6->ia6t_pltime;
 	}
 }
 
 /*
  * Delete all the routing table entries that use the specified gateway.
  * XXX: this function causes search through all entries of routing table, so
  * it shouldn't be called when acting as a router.
  */
 void
 rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
 {
-	struct radix_node_head *rnh = rt_tables[AF_INET6];
+
+	struct radix_node_head *rnh = rt_tables[0][AF_INET6];
 	int s = splnet();
 
 	/* We'll care only link-local addresses */
 	if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
 		splx(s);
 		return;
 	}
 
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 	splx(s);
 }
 
 static int
 rt6_deleteroute(struct radix_node *rn, void *arg)
 {
 #define SIN6(s)	((struct sockaddr_in6 *)s)
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct in6_addr *gate = (struct in6_addr *)arg;
 
 	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
 		return (0);
 
 	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
 		return (0);
 	}
 
 	/*
 	 * Do not delete a static route.
 	 * XXX: this seems to be a bit ad-hoc. Should we consider the
 	 * 'cloned' bit instead?
 	 */
 	if ((rt->rt_flags & RTF_STATIC) != 0)
 		return (0);
 
 	/*
 	 * We delete only host route. This means, in particular, we don't
 	 * delete default route.
 	 */
 	if ((rt->rt_flags & RTF_HOST) == 0)
 		return (0);
 
 	return (rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
 	    rt_mask(rt), rt->rt_flags, 0));
 #undef SIN6
 }
 
 int
 nd6_setdefaultiface(int ifindex)
 {
 	int error = 0;
 
 	if (ifindex < 0 || if_index < ifindex)
 		return (EINVAL);
 	if (ifindex != 0 && !ifnet_byindex(ifindex))
 		return (EINVAL);
 
 	if (nd6_defifindex != ifindex) {
 		nd6_defifindex = ifindex;
 		if (nd6_defifindex > 0)
 			nd6_defifp = ifnet_byindex(nd6_defifindex);
 		else
 			nd6_defifp = NULL;
 
 		/*
 		 * Our current implementation assumes one-to-one maping between
 		 * interfaces and links, so it would be natural to use the
 		 * default interface as the default link.
 		 */
 		scope6_setdefault(nd6_defifp);
 	}
 
 	return (error);
 }
Index: head/sys/netipx/ipx_proto.c
===================================================================
--- head/sys/netipx/ipx_proto.c	(revision 178887)
+++ head/sys/netipx/ipx_proto.c	(revision 178888)
@@ -1,149 +1,159 @@
 /*-
  * Copyright (c) 1984, 1985, 1986, 1987, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Copyright (c) 1995, Mike Mitchell
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ipx_proto.c
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipx.h"
 
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 
 #include <net/radix.h>
 
 #include <netipx/ipx.h>
 #include <netipx/ipx_var.h>
 #include <netipx/spx.h>
 
 static	struct pr_usrreqs nousrreqs;
 
 /*
  * IPX protocol family: IPX, ERR, PXP, SPX, ROUTE.
  */
 
 static	struct domain ipxdomain;
 
 static struct protosw ipxsw[] = {
 {
 	.pr_domain =		&ipxdomain,
 	.pr_init =		ipx_init,
 	.pr_usrreqs =		&nousrreqs
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&ipxdomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_ctlinput =		ipx_ctlinput,
 	.pr_ctloutput =		ipx_ctloutput,
 	.pr_usrreqs =		&ipx_usrreqs
 },
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&ipxdomain,
 	.pr_protocol =		IPXPROTO_SPX,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD,
 	.pr_ctlinput =		spx_ctlinput,
 	.pr_ctloutput =		spx_ctloutput,
 	.pr_init =		spx_init,
 	.pr_fasttimo =		spx_fasttimo,
 	.pr_slowtimo =		spx_slowtimo,
 	.pr_usrreqs =		&spx_usrreqs
 },
 {
 	.pr_type =		SOCK_SEQPACKET,
 	.pr_domain =		&ipxdomain,
 	.pr_protocol =		IPXPROTO_SPX,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_ATOMIC,
 	.pr_ctlinput =		spx_ctlinput,
 	.pr_ctloutput =		spx_ctloutput,
 	.pr_usrreqs =		&spx_usrreq_sps
 },
 {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&ipxdomain,
 	.pr_protocol =		IPXPROTO_RAW,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_ctloutput =		ipx_ctloutput,
 	.pr_usrreqs =		&ripx_usrreqs
 },
 };
 
+extern int ipx_inithead(void **, int);
+
 static struct	domain ipxdomain = {
 	.dom_family =		AF_IPX,
 	.dom_name =		"network systems",
 	.dom_protosw =		ipxsw,
 	.dom_protoswNPROTOSW =	&ipxsw[sizeof(ipxsw)/sizeof(ipxsw[0])],
-	.dom_rtattach =		rn_inithead,
+	.dom_rtattach =		ipx_inithead,
 	.dom_rtoffset =		16,
 	.dom_maxrtkey =		sizeof(struct sockaddr_ipx)
 };
+
+
+/* shim to adapt arguments */
+int
+ipx_inithead(void **head, int offset)
+{
+	return rn_inithead(head, offset);
+}
 
 DOMAIN_SET(ipx);
 SYSCTL_NODE(_net,	PF_IPX,		ipx,	CTLFLAG_RW, 0,
 	"IPX/SPX");
 
 SYSCTL_NODE(_net_ipx,	IPXPROTO_RAW,	ipx,	CTLFLAG_RW, 0, "IPX");
 SYSCTL_NODE(_net_ipx,	IPXPROTO_SPX,	spx,	CTLFLAG_RW, 0, "SPX");
Index: head/sys/nfs4client/nfs4_vfsops.c
===================================================================
--- head/sys/nfs4client/nfs4_vfsops.c	(revision 178887)
+++ head/sys/nfs4client/nfs4_vfsops.c	(revision 178888)
@@ -1,878 +1,879 @@
 /* $Id: nfs_vfsops.c,v 1.38 2003/11/05 14:59:01 rees Exp $ */
 
 /*-
  * copyright (c) 2003
  * the regents of the university of michigan
  * all rights reserved
  * 
  * permission is granted to use, copy, create derivative works and redistribute
  * this software and such derivative works for any purpose, so long as the name
  * of the university of michigan is not used in any advertising or publicity
  * pertaining to the use or distribution of this software without specific,
  * written prior authorization.  if the above copyright notice or any other
  * identification of the university of michigan is included in any copy of any
  * portion of this software, then the disclaimer below must also be included.
  * 
  * this software is provided as is, without representation from the university
  * of michigan as to its fitness for any purpose, and without warranty by the
  * university of michigan of any kind, either express or implied, including
  * without limitation the implied warranties of merchantability and fitness for
  * a particular purpose. the regents of the university of michigan shall not be
  * liable for any damages, including special, indirect, incidental, or
  * consequential damages, with respect to any claim arising out of or in
  * connection with the use of the software, even if it has been or is hereafter
  * advised of the possibility of such damages.
  */
 
 /*-
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_vfsops.c	8.12 (Berkeley) 5/20/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 #include "opt_nfsroot.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 #include <rpc/rpcclnt.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfsclient/nfs.h>
 #include <nfs4client/nfs4.h>
 #include <nfsclient/nfsnode.h>
 #include <nfsclient/nfsmount.h>
 #include <nfs/xdr_subs.h>
 #include <nfsclient/nfsm_subs.h>
 #include <nfsclient/nfsdiskless.h>
 
 #include <nfs4client/nfs4m_subs.h>
 #include <nfs4client/nfs4_vfs.h>
 
 #include <nfs4client/nfs4_dev.h>
 #include <nfs4client/nfs4_idmap.h>
 
 SYSCTL_NODE(_vfs, OID_AUTO, nfs4, CTLFLAG_RW, 0, "NFS4 filesystem");
 SYSCTL_STRUCT(_vfs_nfs4, NFS_NFSSTATS, nfsstats, CTLFLAG_RD,
 	&nfsstats, nfsstats, "S,nfsstats");
 
 static void	nfs4_decode_args(struct nfsmount *nmp, struct nfs_args *argp);
 static void	nfs4_daemon(void *arg);
 static int	mountnfs(struct nfs_args *, struct mount *,
 		    struct sockaddr *, char *, struct vnode **,
 		    struct ucred *cred);
 static int	nfs4_do_setclientid(struct nfsmount *nmp, struct ucred *cred);
 static vfs_mount_t nfs4_mount;
 static vfs_cmount_t nfs4_cmount;
 static vfs_unmount_t nfs4_unmount;
 static vfs_root_t nfs4_root;
 static vfs_statfs_t nfs4_statfs;
 static vfs_sync_t nfs4_sync;
 
 /*
  * nfs vfs operations.
  */
 static struct vfsops nfs4_vfsops = {
 	.vfs_init =		nfs4_init,
 	.vfs_mount =		nfs4_mount,
 	.vfs_cmount =		nfs4_cmount,
 	.vfs_root =		nfs4_root,
 	.vfs_statfs =		nfs4_statfs,
 	.vfs_sync =		nfs4_sync,
 	.vfs_uninit =		nfs4_uninit,
 	.vfs_unmount =		nfs4_unmount,
 };
 VFS_SET(nfs4_vfsops, nfs4, VFCF_NETWORK);
 
 static struct nfs_rpcops nfs4_rpcops = {
 	nfs4_readrpc,
 	nfs4_writerpc,
 	nfs4_writebp,
 	nfs4_readlinkrpc,
 	nfs4_invaldir,
 	nfs4_commit,
 };
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_VERSION(nfs4, 1);
 
 void		nfsargs_ntoh(struct nfs_args *);
 
 int
 nfs4_init(struct vfsconf *vfsp)
 {
 
 	rpcclnt_init();
 	nfs4dev_init();
 	idmap_init();
 	nfsm_v4init();
 
 	return (0);
 }
 
 int
 nfs4_uninit(struct vfsconf *vfsp)
 {
 
 	rpcclnt_uninit();
 	nfs4dev_uninit();
 	idmap_uninit();
 
 	return (0);
 }
 
 /*
  * nfs statfs call
  */
 static int
 nfs4_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 {
 	struct vnode *vp;
 	struct nfs_statfs *sfp;
 	caddr_t bpos, dpos;
 	struct nfsmount *nmp = VFSTONFS(mp);
 	int error = 0;
 	struct mbuf *mreq, *mrep = NULL, *md, *mb;
 	struct nfsnode *np;
 	struct nfs4_compound cp;
 	struct nfs4_oparg_getattr ga;
 	struct nfsv4_fattr *fap = &ga.fa;
 
 #ifndef nolint
 	sfp = NULL;
 #endif
 	error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
 	if (error)
 		return (error);
 	vp = NFSTOV(np);
 	nfsstats.rpccnt[NFSPROC_FSSTAT]++;
 	mreq = nfsm_reqhead(vp, NFSV4PROC_COMPOUND, NFSX_FH(1));
 	mb = mreq;
 	bpos = mtod(mb, caddr_t);
 
 	ga.bm = &nfsv4_fsattrbm;
 	nfs_v4initcompound(&cp);
 
 	nfsm_v4build_compound(&cp, "statfs()");
 	nfsm_v4build_putfh(&cp, vp);
 	nfsm_v4build_getattr(&cp, &ga);
 	nfsm_v4build_finalize(&cp);
 
 	nfsm_request(vp, NFSV4PROC_COMPOUND, td, td->td_ucred);
 	if (error != 0)
 		goto nfsmout;
 
 	nfsm_v4dissect_compound(&cp);
 	nfsm_v4dissect_putfh(&cp);
 	nfsm_v4dissect_getattr(&cp, &ga);
 
 	nfs4_vfsop_statfs(fap, sbp, mp);
 
 nfsmout:
 	error = nfs_v4postop(&cp, error);
 
 	vput(vp);
 	if (mrep != NULL)
 		m_freem(mrep);
 
 	return (error);
 }
 
 static void
 nfs4_decode_args(struct nfsmount *nmp, struct nfs_args *argp)
 {
 	int s;
 	int adjsock;
 	int maxio;
 
 	s = splnet();
 
 	/*
 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
 	 * no sense in that context.  Also, set appropriate retransmit
 	 * and soft timeout behavior.
 	 */
 	if (argp->sotype == SOCK_STREAM) {
 		nmp->nm_flag &= ~NFSMNT_NOCONN;
 		nmp->nm_flag |= NFSMNT_DUMBTIMR;
 		nmp->nm_timeo = NFS_MAXTIMEO;
 		nmp->nm_retry = NFS_RETRANS_TCP;
 	}
 
 	nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 
 	/* Re-bind if rsrvd port requested and wasn't on one */
 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
 		  && (argp->flags & NFSMNT_RESVPORT);
 	/* Also re-bind if we're switching to/from a connected UDP socket */
 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
 		    (argp->flags & NFSMNT_NOCONN));
 
 	/* Update flags atomically.  Don't change the lock bits. */
 	nmp->nm_flag = argp->flags | nmp->nm_flag;
 	splx(s);
 
 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
 		if (nmp->nm_timeo < NFS_MINTIMEO)
 			nmp->nm_timeo = NFS_MINTIMEO;
 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
 			nmp->nm_timeo = NFS_MAXTIMEO;
 	}
 
 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
 		nmp->nm_retry = argp->retrans;
 		if (nmp->nm_retry > NFS_MAXREXMIT)
 			nmp->nm_retry = NFS_MAXREXMIT;
 	}
 
 	if (argp->flags & NFSMNT_NFSV3) {
 		if (argp->sotype == SOCK_DGRAM)
 			maxio = NFS_MAXDGRAMDATA;
 		else
 			maxio = NFS_MAXDATA;
 	} else
 		maxio = NFS_V2MAXDATA;
 
 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
 		nmp->nm_wsize = argp->wsize;
 		/* Round down to multiple of blocksize */
 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
 		if (nmp->nm_wsize <= 0)
 			nmp->nm_wsize = NFS_FABLKSIZE;
 	}
 	if (nmp->nm_wsize > maxio)
 		nmp->nm_wsize = maxio;
 	if (nmp->nm_wsize > MAXBSIZE)
 		nmp->nm_wsize = MAXBSIZE;
 
 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
 		nmp->nm_rsize = argp->rsize;
 		/* Round down to multiple of blocksize */
 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
 		if (nmp->nm_rsize <= 0)
 			nmp->nm_rsize = NFS_FABLKSIZE;
 	}
 	if (nmp->nm_rsize > maxio)
 		nmp->nm_rsize = maxio;
 	if (nmp->nm_rsize > MAXBSIZE)
 		nmp->nm_rsize = MAXBSIZE;
 
 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
 		nmp->nm_readdirsize = argp->readdirsize;
 	}
 	if (nmp->nm_readdirsize > maxio)
 		nmp->nm_readdirsize = maxio;
 	if (nmp->nm_readdirsize > nmp->nm_rsize)
 		nmp->nm_readdirsize = nmp->nm_rsize;
 
 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
 		nmp->nm_acregmin = argp->acregmin;
 	else
 		nmp->nm_acregmin = NFS_MINATTRTIMO;
 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
 		nmp->nm_acregmax = argp->acregmax;
 	else
 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
 		nmp->nm_acdirmin = argp->acdirmin;
 	else
 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
 		nmp->nm_acdirmax = argp->acdirmax;
 	else
 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
 		nmp->nm_acdirmin = nmp->nm_acdirmax;
 	if (nmp->nm_acregmin > nmp->nm_acregmax)
 		nmp->nm_acregmin = nmp->nm_acregmax;
 
 	if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
 		if (argp->maxgrouplist <= NFS_MAXGRPS)
 			nmp->nm_numgrps = argp->maxgrouplist;
 		else
 			nmp->nm_numgrps = NFS_MAXGRPS;
 	}
 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
 		if (argp->readahead <= NFS_MAXRAHEAD)
 			nmp->nm_readahead = argp->readahead;
 		else
 			nmp->nm_readahead = NFS_MAXRAHEAD;
 	}
 	if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
 		if (argp->deadthresh <= NFS_MAXDEADTHRESH)
 			nmp->nm_deadthresh = argp->deadthresh;
 		else
 			nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
 	}
 
 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
 		    (nmp->nm_soproto != argp->proto));
 	nmp->nm_sotype = argp->sotype;
 	nmp->nm_soproto = argp->proto;
 
 	if (nmp->nm_rpcclnt.rc_so && adjsock) {
 		nfs_safedisconnect(nmp);
 		if (nmp->nm_sotype == SOCK_DGRAM) {
 			while (nfs4_connect(nmp)) {
 				printf("nfs4_decode_args: retrying connect\n");
 				(void)tsleep(&lbolt, PSOCK, "nfscon", 0);
 			}
 		}
 	}
 }
 
 /*
  * VFS Operations.
  *
  * mount system call
  * It seems a bit dumb to copyinstr() the host and path here and then
  * bcopy() them in mountnfs(), but I wanted to detect errors before
  * doing the sockargs() call because sockargs() allocates an mbuf and
  * an error after that means that I have to release the mbuf.
  */
 /* ARGSUSED */
 static int
 nfs4_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
 {
 	struct nfs_args args;
 	int error;
 
 	error = copyin(data, &args, sizeof(struct nfs_args));
 	if (error)
 		return (error);
 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
 	error = kernel_mount(ma, flags);
 	return (error);
 }
 
 static int
 nfs4_mount(struct mount *mp, struct thread *td)
 {
 	int error;
 	struct nfs_args args;
 	struct sockaddr *nam;
 	struct vnode *vp;
 	char hst[MNAMELEN];
 	size_t len;
 
 	if (mp->mnt_flag & MNT_ROOTFS) {
 		printf("nfs4_mountroot not supported\n");
 		return (EINVAL);
 	}
 	error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
 	if (error)
 		return (error);
 
 	if (args.version != NFS_ARGSVERSION)
 		return (EPROGMISMATCH);
 	if (mp->mnt_flag & MNT_UPDATE) {
 		struct nfsmount *nmp = VFSTONFS(mp);
 
 		if (nmp == NULL)
 			return (EIO);
 		/*
 		 * When doing an update, we can't change from or to
 		 * v3, switch lockd strategies or change cookie translation
 		 */
 		args.flags = (args.flags &
 		    ~(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_NOLOCKD)) |
 		    (nmp->nm_flag &
 			(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_NOLOCKD));
 		nfs4_decode_args(nmp, &args);
 		return (0);
 	}
 
 	error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
 	if (error)
 		return (error);
 	bzero(&hst[len], MNAMELEN - len);
 	/* sockargs() call must be after above copyin() calls */
 	error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
 	if (error)
 		return (error);
 	error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
 	return (error);
 }
 
 /*
  * renew should be done async
  * should re-scan mount queue each time
  */
 struct proc *nfs4_daemonproc;
 
 static int
 nfs4_do_renew(struct nfsmount *nmp, struct ucred *cred)
 {
 	struct nfs4_compound cp;
 	struct mbuf *mreq, *mrep = NULL, *md, *mb;
 	caddr_t bpos, dpos;	
 	int error;
 
 	mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, sizeof(uint64_t));
 	mb = mreq;
 	bpos = mtod(mb, caddr_t);
 
 	nfs_v4initcompound(&cp);
 
 	nfsm_v4build_compound(&cp, "nfs4_do_renew()");
 	nfsm_v4build_renew(&cp, nmp->nm_clientid);
 	nfsm_v4build_finalize(&cp);
 
 	nfsm_request_mnt(nmp, NFSV4PROC_COMPOUND, curthread, cred);
 	if (error != 0)
 		goto nfsmout;
 
 	nfsm_v4dissect_compound(&cp);
 	nfsm_v4dissect_renew(&cp);
 	nmp->nm_last_renewal = time_second;
 	return (0);
 
  nfsmout:
 	error = nfs_v4postop(&cp, error);
 
 	/* XXX */
 	if (mrep != NULL)
 		m_freem(mrep);
 	return (error);
 }
 
 static void
 nfs4_daemon(void *arg)
 {
 	struct mount *mp;
 	struct nfsmount *nmp;
 	int nmounts;
 
 	while (1) {
 		nmounts = 0;
 		mtx_lock(&mountlist_mtx);
 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 			if (strcmp(mp->mnt_vfc->vfc_name, "nfs4") != 0)
 				continue;
 			nmounts++;
 			nmp = VFSTONFS(mp);
 			if (time_second < nmp->nm_last_renewal + nmp->nm_lease_time - 4)
 				continue;
 			mtx_unlock(&mountlist_mtx);
 			mtx_lock(&Giant);
 			nfs4_do_renew(nmp, (struct ucred *) arg);
 			mtx_unlock(&Giant);
 			mtx_lock(&mountlist_mtx);
 		}
 		mtx_unlock(&mountlist_mtx);
 
 		/* Must kill the daemon here, or module unload will cause a panic */
 		if (nmounts == 0) {
 			mtx_lock(&Giant);
 			nfs4_daemonproc = NULL;
 			mtx_unlock(&Giant);
 			/*printf("nfsv4 renewd exiting\n");*/
 			kproc_exit(0);
 		}
 		tsleep(&nfs4_daemonproc, PVFS, "nfs4", 2 * hz);
 	}
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
     char *hst, struct vnode **vpp, struct ucred *cred)
 {
 	struct nfsmount *nmp;
 	char *rpth, *cp1, *cp2;
 	int nlkup = 0, error;
 	struct nfs4_compound cp;
 	struct mbuf *mreq, *mrep = NULL, *md, *mb;
 	caddr_t bpos, dpos;	
 	struct nfs4_oparg_lookup lkup;
 	struct nfs4_oparg_getfh gfh;
 	struct nfs4_oparg_getattr ga;
 	struct thread *td = curthread; /* XXX */
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		nmp = VFSTONFS(mp);
 		/* update paths, file handles, etc, here	XXX */
 		FREE(nam, M_SONAME);
 		return (0);
 	} else {
 		nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
 		bzero((caddr_t)nmp, sizeof (struct nfsmount));
 		TAILQ_INIT(&nmp->nm_bufq);
 		mp->mnt_data = nmp;
 	}
 
 	vfs_getnewfsid(mp);
 	nmp->nm_mountp = mp;
 	mtx_init(&nmp->nm_mtx, "NFS4mount lock", NULL, MTX_DEF);			
 
 	nmp->nm_maxfilesize = 0xffffffffLL;
 	nmp->nm_timeo = NFS_TIMEO;
 	nmp->nm_retry = NFS_RETRANS;
 	nmp->nm_wsize = NFS_WSIZE;
 	nmp->nm_rsize = NFS_RSIZE;
 	nmp->nm_readdirsize = NFS_READDIRSIZE;
 	nmp->nm_numgrps = NFS_MAXGRPS;
 	nmp->nm_readahead = NFS_DEFRAHEAD;
 	nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
 	vfs_mountedfrom(mp, hst);
 	nmp->nm_nam = nam;
 	/* Set up the sockets and per-host congestion */
 	nmp->nm_sotype = argp->sotype;
 	nmp->nm_soproto = argp->proto;
 	nmp->nm_rpcops = &nfs4_rpcops;
 	/* XXX */
         mp->mnt_stat.f_iosize = PAGE_SIZE;
 
 	argp->flags |= (NFSMNT_NFSV3 | NFSMNT_NFSV4);
 
 	nfs4_decode_args(nmp, argp);
 
 	if ((error = nfs4_connect(nmp)))
 		goto bad;
 
 	mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, NFSX_FH(1));
 	mb = mreq;
 	bpos = mtod(mb, caddr_t);
 
 	ga.bm = &nfsv4_fsinfobm;
 	nfs_v4initcompound(&cp);
 
 	/* Get remote path */
 	rpth = hst;
 	strsep(&rpth, ":");
 
 	nfsm_v4build_compound(&cp, "mountnfs()");
 	nfsm_v4build_putrootfh(&cp);
 	for (cp1 = rpth; cp1 && *cp1; cp1 = cp2)  {
 		while (*cp1 == '/')
 			cp1++;
 		if (!*cp1)
 			break;
 		for (cp2 = cp1; *cp2 && *cp2 != '/'; cp2++)
 			;
 		lkup.name = cp1;
 		lkup.namelen = cp2 - cp1;
 		nfsm_v4build_lookup(&cp, &lkup);
 		nlkup++;
 	}
 	nfsm_v4build_getfh(&cp, &gfh);
 	nfsm_v4build_getattr(&cp, &ga);
 	nfsm_v4build_finalize(&cp);
 
 	nfsm_request_mnt(nmp, NFSV4PROC_COMPOUND, td, cred);
 	if (error != 0)
 		goto nfsmout;
 
 	nfsm_v4dissect_compound(&cp);
 	nfsm_v4dissect_putrootfh(&cp);
 	while (nlkup--)
 		nfsm_v4dissect_lookup(&cp);
 	nfsm_v4dissect_getfh(&cp, &gfh);
 	nfsm_v4dissect_getattr(&cp, &ga);
 
 	nfs4_vfsop_fsinfo(&ga.fa, nmp);
 	nmp->nm_state |= NFSSTA_GOTFSINFO;
 
 	/* Copy root fh into nfsmount. */
 	nmp->nm_fhsize = gfh.fh_len;
 	bcopy(&gfh.fh_val, nmp->nm_fh, nmp->nm_fhsize);
 	nmp->nm_last_renewal = time_second;
 
 	if ((error = nfs4_do_setclientid(nmp, cred)) != 0)
 		goto nfsmout;
 
 	/* Start renewd if it isn't already running */
 	if (nfs4_daemonproc == NULL)
 		kproc_create(nfs4_daemon, crdup(cred), &nfs4_daemonproc,
 			       (RFPROC|RFMEM), 0, "nfs4rd");
 
 	return (0);
  nfsmout:
 	error = nfs_v4postop(&cp, error);
 
 	/* XXX */
 	if (mrep != NULL)
 		m_freem(mrep);
 bad:
 	mtx_destroy(&nmp->nm_mtx);
 	nfs4_disconnect(nmp);
 	uma_zfree(nfsmount_zone, nmp);
 	FREE(nam, M_SONAME);
 
 	return (error);
 }
 
 /*
  * unmount system call
  */
 static int
 nfs4_unmount(struct mount *mp, int mntflags, struct thread *td)
 {
 	struct nfsmount *nmp;
 	int error, flags = 0;
 
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	nmp = VFSTONFS(mp);
 	/*
 	 * Goes something like this..
 	 * - Call vflush(, td) to clear out vnodes for this filesystem
 	 * - Close the socket
 	 * - Free up the data structures
 	 */
 	/* In the forced case, cancel any outstanding requests. */
 	if (flags & FORCECLOSE) {
 		error = nfs_nmcancelreqs(nmp);
 		if (error)
 			return (error);
 		nfs4dev_purge();
 	}
 
 	error = vflush(mp, 0, flags, td);
 	if (error)
 		return (error);
 
 	/*
 	 * We are now committed to the unmount.
 	 */
 	nfs4_disconnect(nmp);
 	FREE(nmp->nm_nam, M_SONAME);
 
 	/* XXX there's a race condition here for SMP */
 	wakeup(&nfs4_daemonproc);
 
 	mtx_destroy(&nmp->nm_mtx);
 	uma_zfree(nfsmount_zone, nmp);
 	return (0);
 }
 
 /*
  * Return root of a filesystem
  */
 static int
 nfs4_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
 {
 	struct vnode *vp;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	int error;
 
 	nmp = VFSTONFS(mp);
 	error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np,
 	    LK_EXCLUSIVE);
 	if (error)
 		return (error);
 	vp = NFSTOV(np);
 	if (vp->v_type == VNON)
 	    vp->v_type = VDIR;
 	vp->v_vflag |= VV_ROOT;
 	*vpp = vp;
 
 	return (0);
 }
 
 /*
  * Flush out the buffer cache
  */
 static int
 nfs4_sync(struct mount *mp, int waitfor, struct thread *td)
 {
 	struct vnode *vp, *mvp;
 	int error, allerror = 0;
 
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
 	MNT_ILOCK(mp);
 loop:
 	MNT_VNODE_FOREACH(vp, mp, mvp) {
 		VI_LOCK(vp);
 		MNT_IUNLOCK(mp);
 		/* XXX racy bv_cnt check. */
 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY) {
 			VI_UNLOCK(vp);
 			MNT_ILOCK(mp);
 			continue;
 		}
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 			MNT_ILOCK(mp);
 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
 			goto loop;
 		}
 		error = VOP_FSYNC(vp, waitfor, td);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
 
 		MNT_ILOCK(mp);
 	}
 	MNT_IUNLOCK(mp);
 	return (allerror);
 }
 
 static int
 nfs4_do_setclientid(struct nfsmount *nmp, struct ucred *cred)
 {
 	struct nfs4_oparg_setclientid scid;
 	struct nfs4_compound cp;
 	struct mbuf *mreq, *mrep = NULL, *md, *mb;
 	caddr_t bpos, dpos;	
 	struct route ro;
 	char *ipsrc = NULL, uaddr[24], name[24];
 	int try = 0;
 	static unsigned long seq;
 	int error;
 
 #ifndef NFS4_USE_RPCCLNT
 	return (0);
 #endif
 	if (nmp->nm_clientid) {
 		printf("nfs4_do_setclientid: already have clientid!\n");
 		error = 0;
 		goto nfsmout;
 	}
 
 	/* Try not to re-use clientids */
 	if (seq == 0)
 		seq = time_second;
 
 #ifdef NFS4_USE_RPCCLNT
 	scid.cb_netid = (nmp->nm_rpcclnt.rc_sotype == SOCK_STREAM) ? "tcp" : "udp";
 #endif
 	scid.cb_netid = "tcp";
 	scid.cb_netidlen = 3;
 	scid.cb_prog = 0x1234; /* XXX */
 
 	/* Do a route lookup to find our source address for talking to this server */
 	bzero(&ro, sizeof ro);
 
 #ifdef NFS4_USE_RPCCLNT
 	ro.ro_dst = *nmp->nm_rpcclnt.rc_name;
 #endif
-	rtalloc(&ro);
+/* XXX MRT NFS uses table 0 */
+	in_rtalloc(&ro, 0);
 	if (ro.ro_rt == NULL) {
 		error = EHOSTUNREACH;
 		goto nfsmout;
 	}
 	ipsrc = inet_ntoa(IA_SIN(ifatoia(ro.ro_rt->rt_ifa))->sin_addr);
 	sprintf(uaddr, "%s.12.48", ipsrc);
 	scid.cb_univaddr = uaddr;
 	scid.cb_univaddrlen = strlen(uaddr);
 	RTFREE(ro.ro_rt);
 
  try_again:
 	sprintf(name, "%s-%d", ipsrc, (int) ((seq + try) % 1000000L));
 	scid.namelen = strlen(name);
 	scid.name = name;
 	nfs_v4initcompound(&cp);
 
 	mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, NFSX_FH(1));
 	mb = mreq;
 	bpos = mtod(mb, caddr_t);
 
 	nfsm_v4build_compound(&cp, "nfs4_do_setclientid()");
 	nfsm_v4build_setclientid(&cp, &scid);
 	nfsm_v4build_finalize(&cp);
 
 	nfsm_request_mnt(nmp, NFSV4PROC_COMPOUND, curthread, cred);
 	if (error != 0)
 		goto nfsmout;
 
 	nfsm_v4dissect_compound(&cp);
 	nfsm_v4dissect_setclientid(&cp, &scid);
 	nmp->nm_clientid = scid.clientid;
 
 	error = nfs_v4postop(&cp, error);
 
 	/* Confirm */
 	m_freem(mrep);
 	mreq = nfsm_reqhead(NULL, NFSV4PROC_COMPOUND, NFSX_FH(1));
 	mb = mreq;
 	bpos = mtod(mb, caddr_t);
 
 	nfs_v4initcompound(&cp);
 
 	nfsm_v4build_compound(&cp, "nfs4_do_setclientid() (confirm)");
 	nfsm_v4build_setclientid_confirm(&cp, &scid);
 	nfsm_v4build_finalize(&cp);
 
 	nfsm_request_mnt(nmp, NFSV4PROC_COMPOUND, curthread, cred);
 	if (error != 0)
 		goto nfsmout;
 
 	nfsm_v4dissect_compound(&cp);
 	nfsm_v4dissect_setclientid_confirm(&cp);
 
  nfsmout:
 	error = nfs_v4postop(&cp, error);
 
 	if (mrep)
 		m_freem(mrep);
 	if (error == NFSERR_CLID_INUSE && (++try < NFS4_SETCLIENTID_MAXTRIES))
 		goto try_again;
 
 	return (error);
 }
Index: head/sys/nfsclient/bootp_subr.c
===================================================================
--- head/sys/nfsclient/bootp_subr.c	(revision 178887)
+++ head/sys/nfsclient/bootp_subr.c	(revision 178888)
@@ -1,1861 +1,1862 @@
 /*-
  * Copyright (c) 1995 Gordon Ross, Adam Glass
  * Copyright (c) 1992 Regents of the University of California.
  * All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * based on:
  *      nfs/krpc_subr.c
  *	$NetBSD: krpc_subr.c,v 1.10 1995/08/08 20:43:43 gwr Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 
 #include <rpc/rpcclnt.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfsclient/nfs.h>
 #include <nfsclient/nfsdiskless.h>
 #include <nfsclient/krpc.h>
 #include <nfs/xdr_subs.h>
 
 
 #define BOOTP_MIN_LEN		300	/* Minimum size of bootp udp packet */
 
 #ifndef BOOTP_SETTLE_DELAY
 #define BOOTP_SETTLE_DELAY 3
 #endif
 
 /*
  * What is the longest we will wait before re-sending a request?
  * Note this is also the frequency of "RPC timeout" messages.
  * The re-send loop count sup linearly to this maximum, so the
  * first complaint will happen after (1+2+3+4+5)=15 seconds.
  */
 #define	MAX_RESEND_DELAY 5	/* seconds */
 
 /* Definitions from RFC951 */
 struct bootp_packet {
 	u_int8_t op;
 	u_int8_t htype;
 	u_int8_t hlen;
 	u_int8_t hops;
 	u_int32_t xid;
 	u_int16_t secs;
 	u_int16_t flags;
 	struct in_addr ciaddr;
 	struct in_addr yiaddr;
 	struct in_addr siaddr;
 	struct in_addr giaddr;
 	unsigned char chaddr[16];
 	char sname[64];
 	char file[128];
 	unsigned char vend[1222];
 };
 
 struct bootpc_ifcontext {
 	struct bootpc_ifcontext *next;
 	struct bootp_packet call;
 	struct bootp_packet reply;
 	int replylen;
 	int overload;
 	struct socket *so;
 	struct ifreq ireq;
 	struct ifnet *ifp;
 	struct sockaddr_dl *sdl;
 	struct sockaddr_in myaddr;
 	struct sockaddr_in netmask;
 	struct sockaddr_in gw;
 	struct sockaddr_in broadcast;	/* Different for each interface */
 	int gotgw;
 	int gotnetmask;
 	int gotrootpath;
 	int outstanding;
 	int sentmsg;
 	u_int32_t xid;
 	enum {
 		IF_BOOTP_UNRESOLVED,
 		IF_BOOTP_RESOLVED,
 		IF_BOOTP_FAILED,
 		IF_DHCP_UNRESOLVED,
 		IF_DHCP_OFFERED,
 		IF_DHCP_RESOLVED,
 		IF_DHCP_FAILED,
 	} state;
 	int dhcpquerytype;		/* dhcp type sent */
 	struct in_addr dhcpserver;
 	int gotdhcpserver;
 };
 
 #define TAG_MAXLEN 1024
 struct bootpc_tagcontext {
 	char buf[TAG_MAXLEN + 1];
 	int overload;
 	int badopt;
 	int badtag;
 	int foundopt;
 	int taglen;
 };
 
 struct bootpc_globalcontext {
 	struct bootpc_ifcontext *interfaces;
 	struct bootpc_ifcontext *lastinterface;
 	u_int32_t xid;
 	int gotrootpath;
 	int gotgw;
 	int ifnum;
 	int secs;
 	int starttime;
 	struct bootp_packet reply;
 	int replylen;
 	struct bootpc_ifcontext *setrootfs;
 	struct bootpc_ifcontext *sethostname;
 	struct bootpc_tagcontext tmptag;
 	struct bootpc_tagcontext tag;
 };
 
 #define IPPORT_BOOTPC 68
 #define IPPORT_BOOTPS 67
 
 #define BOOTP_REQUEST 1
 #define BOOTP_REPLY 2
 
 /* Common tags */
 #define TAG_PAD		  0  /* Pad option, implicit length 1 */
 #define TAG_SUBNETMASK	  1  /* RFC 950 subnet mask */
 #define TAG_ROUTERS	  3  /* Routers (in order of preference) */
 #define TAG_HOSTNAME	 12  /* Client host name */
 #define TAG_ROOT	 17  /* Root path */
 
 /* DHCP specific tags */
 #define TAG_OVERLOAD	 52  /* Option Overload */
 #define TAG_MAXMSGSIZE   57  /* Maximum DHCP Message Size */
 
 #define TAG_END		255  /* End Option (i.e. no more options) */
 
 /* Overload values */
 #define OVERLOAD_FILE     1
 #define OVERLOAD_SNAME    2
 
 /* Site specific tags: */
 #define TAG_ROOTOPTS	130
 #define TAG_COOKIE	134	/* ascii info for userland, via sysctl */
 
 #define TAG_DHCP_MSGTYPE 53
 #define TAG_DHCP_REQ_ADDR 50
 #define TAG_DHCP_SERVERID 54
 #define TAG_DHCP_LEASETIME 51
 
 #define TAG_VENDOR_INDENTIFIER 60
 
 #define DHCP_NOMSG    0
 #define DHCP_DISCOVER 1
 #define DHCP_OFFER    2
 #define DHCP_REQUEST  3
 #define DHCP_ACK      5
 
 static char bootp_cookie[128];
 SYSCTL_STRING(_kern, OID_AUTO, bootp_cookie, CTLFLAG_RD,
 	bootp_cookie, 0, "Cookie (T134) supplied by bootp server");
 
 /* mountd RPC */
 static int	md_mount(struct sockaddr_in *mdsin, char *path, u_char *fhp,
 		    int *fhsizep, struct nfs_args *args, struct thread *td);
 static int	setfs(struct sockaddr_in *addr, char *path, char *p,
 		    const struct in_addr *siaddr);
 static int	getdec(char **ptr);
 static int	getip(char **ptr, struct in_addr *ip);
 static void	mountopts(struct nfs_args *args, char *p);
 static int	xdr_opaque_decode(struct mbuf **ptr, u_char *buf, int len);
 static int	xdr_int_decode(struct mbuf **ptr, int *iptr);
 static void	print_in_addr(struct in_addr addr);
 static void	print_sin_addr(struct sockaddr_in *addr);
 static void	clear_sinaddr(struct sockaddr_in *sin);
 static void	allocifctx(struct bootpc_globalcontext *gctx);
 static void	bootpc_compose_query(struct bootpc_ifcontext *ifctx,
 		    struct bootpc_globalcontext *gctx, struct thread *td);
 static unsigned char *bootpc_tag(struct bootpc_tagcontext *tctx,
 		    struct bootp_packet *bp, int len, int tag);
 static void bootpc_tag_helper(struct bootpc_tagcontext *tctx,
 		    unsigned char *start, int len, int tag);
 
 #ifdef BOOTP_DEBUG
 void bootpboot_p_sa(struct sockaddr *sa, struct sockaddr *ma);
 void bootpboot_p_ma(struct sockaddr *ma);
 void bootpboot_p_rtentry(struct rtentry *rt);
 void bootpboot_p_tree(struct radix_node *rn);
 void bootpboot_p_rtlist(void);
 void bootpboot_p_if(struct ifnet *ifp, struct ifaddr *ifa);
 void bootpboot_p_iflist(void);
 #endif
 
 static int	bootpc_call(struct bootpc_globalcontext *gctx,
 		    struct thread *td);
 
 static int	bootpc_fakeup_interface(struct bootpc_ifcontext *ifctx,
 		    struct bootpc_globalcontext *gctx, struct thread *td);
 
 static int	bootpc_adjust_interface(struct bootpc_ifcontext *ifctx,
 		    struct bootpc_globalcontext *gctx, struct thread *td);
 
 static void	bootpc_decode_reply(struct nfsv3_diskless *nd,
 		    struct bootpc_ifcontext *ifctx,
 		    struct bootpc_globalcontext *gctx);
 
 static int	bootpc_received(struct bootpc_globalcontext *gctx,
 		    struct bootpc_ifcontext *ifctx);
 
 static __inline int bootpc_ifctx_isresolved(struct bootpc_ifcontext *ifctx);
 static __inline int bootpc_ifctx_isunresolved(struct bootpc_ifcontext *ifctx);
 static __inline int bootpc_ifctx_isfailed(struct bootpc_ifcontext *ifctx);
 
 /*
  * In order to have multiple active interfaces with address 0.0.0.0
  * and be able to send data to a selected interface, we perform
  * some tricks:
  *
  *  - The 'broadcast' address is different for each interface.
  *
  *  - We temporarily add routing pointing 255.255.255.255 to the
  *    selected interface broadcast address, thus the packet sent
  *    goes to that interface.
  */
 
 #ifdef BOOTP_DEBUG
 void
 bootpboot_p_sa(struct sockaddr *sa, struct sockaddr *ma)
 {
 
 	if (sa == NULL) {
 		printf("(sockaddr *) <null>");
 		return;
 	}
 	switch (sa->sa_family) {
 	case AF_INET:
 	{
 		struct sockaddr_in *sin;
 
 		sin = (struct sockaddr_in *) sa;
 		printf("inet ");
 		print_sin_addr(sin);
 		if (ma != NULL) {
 			sin = (struct sockaddr_in *) ma;
 			printf(" mask ");
 			print_sin_addr(sin);
 		}
 	}
 	break;
 	case AF_LINK:
 	{
 		struct sockaddr_dl *sli;
 		int i;
 
 		sli = (struct sockaddr_dl *) sa;
 		printf("link %.*s ", sli->sdl_nlen, sli->sdl_data);
 		for (i = 0; i < sli->sdl_alen; i++) {
 			if (i > 0)
 				printf(":");
 			printf("%x", ((unsigned char *) LLADDR(sli))[i]);
 		}
 	}
 	break;
 	default:
 		printf("af%d", sa->sa_family);
 	}
 }
 
 void
 bootpboot_p_ma(struct sockaddr *ma)
 {
 
 	if (ma == NULL) {
 		printf("<null>");
 		return;
 	}
 	printf("%x", *(int *)ma);
 }
 
 void
 bootpboot_p_rtentry(struct rtentry *rt)
 {
 
 	bootpboot_p_sa(rt_key(rt), rt_mask(rt));
 	printf(" ");
 	bootpboot_p_ma(rt->rt_genmask);
 	printf(" ");
 	bootpboot_p_sa(rt->rt_gateway, NULL);
 	printf(" ");
 	printf("flags %x", (unsigned short) rt->rt_flags);
 	printf(" %d", (int) rt->rt_rmx.rmx_expire);
 	printf(" %s\n", rt->rt_ifp->if_xname);
 }
 
 void
 bootpboot_p_tree(struct radix_node *rn)
 {
 
 	while (rn != NULL) {
 		if (rn->rn_bit < 0) {
 			if ((rn->rn_flags & RNF_ROOT) != 0) {
 			} else {
 				bootpboot_p_rtentry((struct rtentry *) rn);
 			}
 			rn = rn->rn_dupedkey;
 		} else {
 			bootpboot_p_tree(rn->rn_left);
 			bootpboot_p_tree(rn->rn_right);
 			return;
 		}
 	}
 }
 
 void
 bootpboot_p_rtlist(void)
 {
 
 	printf("Routing table:\n");
 	RADIX_NODE_LOCK(rt_tables[AF_INET]);	/* could sleep XXX */
 	bootpboot_p_tree(rt_tables[AF_INET]->rnh_treetop);
 	RADIX_NODE_UNLOCK(rt_tables[AF_INET]);
 }
 
 void
 bootpboot_p_if(struct ifnet *ifp, struct ifaddr *ifa)
 {
 
 	printf("%s flags %x, addr ",
 	       ifp->if_xname, ifp->if_flags);
 	print_sin_addr((struct sockaddr_in *) ifa->ifa_addr);
 	printf(", broadcast ");
 	print_sin_addr((struct sockaddr_in *) ifa->ifa_dstaddr);
 	printf(", netmask ");
 	print_sin_addr((struct sockaddr_in *) ifa->ifa_netmask);
 	printf("\n");
 }
 
 void
 bootpboot_p_iflist(void)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 
 	printf("Interface list:\n");
 	IFNET_RLOCK(); /* could sleep, but okay for debugging XXX */
 	for (ifp = TAILQ_FIRST(&ifnet);
 	     ifp != NULL;
 	     ifp = TAILQ_NEXT(ifp, if_link)) {
 		for (ifa = TAILQ_FIRST(&ifp->if_addrhead);
 		     ifa != NULL;
 		     ifa = TAILQ_NEXT(ifa, ifa_link))
 			if (ifa->ifa_addr->sa_family == AF_INET)
 				bootpboot_p_if(ifp, ifa);
 	}
 	IFNET_RUNLOCK();
 }
 #endif /* defined(BOOTP_DEBUG) */
 
 static void
 clear_sinaddr(struct sockaddr_in *sin)
 {
 
 	bzero(sin, sizeof(*sin));
 	sin->sin_len = sizeof(*sin);
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = INADDR_ANY; /* XXX: htonl(INAADDR_ANY) ? */
 	sin->sin_port = 0;
 }
 
 static void
 allocifctx(struct bootpc_globalcontext *gctx)
 {
 	struct bootpc_ifcontext *ifctx;
 	ifctx = (struct bootpc_ifcontext *) malloc(sizeof(*ifctx),
 						   M_TEMP, M_WAITOK | M_ZERO);
 	if (ifctx == NULL)
 		panic("Failed to allocate bootp interface context structure");
 
 	ifctx->xid = gctx->xid;
 #ifdef BOOTP_NO_DHCP
 	ifctx->state = IF_BOOTP_UNRESOLVED;
 #else
 	ifctx->state = IF_DHCP_UNRESOLVED;
 #endif
 	gctx->xid += 0x100;
 	if (gctx->interfaces != NULL)
 		gctx->lastinterface->next = ifctx;
 	else
 		gctx->interfaces = ifctx;
 	gctx->lastinterface = ifctx;
 }
 
 static __inline int
 bootpc_ifctx_isresolved(struct bootpc_ifcontext *ifctx)
 {
 
 	if (ifctx->state == IF_BOOTP_RESOLVED ||
 	    ifctx->state == IF_DHCP_RESOLVED)
 		return 1;
 	return 0;
 }
 
 static __inline int
 bootpc_ifctx_isunresolved(struct bootpc_ifcontext *ifctx)
 {
 
 	if (ifctx->state == IF_BOOTP_UNRESOLVED ||
 	    ifctx->state == IF_DHCP_UNRESOLVED)
 		return 1;
 	return 0;
 }
 
 static __inline int
 bootpc_ifctx_isfailed(struct bootpc_ifcontext *ifctx)
 {
 
 	if (ifctx->state == IF_BOOTP_FAILED ||
 	    ifctx->state == IF_DHCP_FAILED)
 		return 1;
 	return 0;
 }
 
 static int
 bootpc_received(struct bootpc_globalcontext *gctx,
     struct bootpc_ifcontext *ifctx)
 {
 	unsigned char dhcpreplytype;
 	char *p;
 
 	/*
 	 * Need timeout for fallback to less
 	 * desirable alternative.
 	 */
 
 	/* This call used for the side effect (badopt flag) */
 	(void) bootpc_tag(&gctx->tmptag, &gctx->reply,
 			  gctx->replylen,
 			  TAG_END);
 
 	/* If packet is invalid, ignore it */
 	if (gctx->tmptag.badopt != 0)
 		return 0;
 
 	p = bootpc_tag(&gctx->tmptag, &gctx->reply,
 		       gctx->replylen, TAG_DHCP_MSGTYPE);
 	if (p != NULL)
 		dhcpreplytype = *p;
 	else
 		dhcpreplytype = DHCP_NOMSG;
 
 	switch (ifctx->dhcpquerytype) {
 	case DHCP_DISCOVER:
 		if (dhcpreplytype != DHCP_OFFER 	/* Normal DHCP offer */
 #ifndef BOOTP_FORCE_DHCP
 		    && dhcpreplytype != DHCP_NOMSG	/* Fallback to BOOTP */
 #endif
 			)
 			return 0;
 		break;
 	case DHCP_REQUEST:
 		if (dhcpreplytype != DHCP_ACK)
 			return 0;
 	case DHCP_NOMSG:
 		break;
 	}
 
 	/* Ignore packet unless it gives us a root tag we didn't have */
 
 	if ((ifctx->state == IF_BOOTP_RESOLVED ||
 	     (ifctx->dhcpquerytype == DHCP_DISCOVER &&
 	      (ifctx->state == IF_DHCP_OFFERED ||
 	       ifctx->state == IF_DHCP_RESOLVED))) &&
 	    (bootpc_tag(&gctx->tmptag, &ifctx->reply,
 			ifctx->replylen,
 			TAG_ROOT) != NULL ||
 	     bootpc_tag(&gctx->tmptag, &gctx->reply,
 			gctx->replylen,
 			TAG_ROOT) == NULL))
 		return 0;
 
 	bcopy(&gctx->reply, &ifctx->reply, gctx->replylen);
 	ifctx->replylen = gctx->replylen;
 
 	/* XXX: Only reset if 'perfect' response */
 	if (ifctx->state == IF_BOOTP_UNRESOLVED)
 		ifctx->state = IF_BOOTP_RESOLVED;
 	else if (ifctx->state == IF_DHCP_UNRESOLVED &&
 		 ifctx->dhcpquerytype == DHCP_DISCOVER) {
 		if (dhcpreplytype == DHCP_OFFER)
 			ifctx->state = IF_DHCP_OFFERED;
 		else
 			ifctx->state = IF_BOOTP_RESOLVED;	/* Fallback */
 	} else if (ifctx->state == IF_DHCP_OFFERED &&
 		   ifctx->dhcpquerytype == DHCP_REQUEST)
 		ifctx->state = IF_DHCP_RESOLVED;
 
 
 	if (ifctx->dhcpquerytype == DHCP_DISCOVER &&
 	    ifctx->state != IF_BOOTP_RESOLVED) {
 		p = bootpc_tag(&gctx->tmptag, &ifctx->reply,
 			       ifctx->replylen, TAG_DHCP_SERVERID);
 		if (p != NULL && gctx->tmptag.taglen == 4) {
 			memcpy(&ifctx->dhcpserver, p, 4);
 			ifctx->gotdhcpserver = 1;
 		} else
 			ifctx->gotdhcpserver = 0;
 		return 1;
 	}
 
 	ifctx->gotrootpath = (bootpc_tag(&gctx->tmptag, &ifctx->reply,
 					 ifctx->replylen,
 					 TAG_ROOT) != NULL);
 	ifctx->gotgw = (bootpc_tag(&gctx->tmptag, &ifctx->reply,
 				   ifctx->replylen,
 				   TAG_ROUTERS) != NULL);
 	ifctx->gotnetmask = (bootpc_tag(&gctx->tmptag, &ifctx->reply,
 					ifctx->replylen,
 					TAG_SUBNETMASK) != NULL);
 	return 1;
 }
 
 static int
 bootpc_call(struct bootpc_globalcontext *gctx, struct thread *td)
 {
 	struct socket *so;
 	struct sockaddr_in *sin, dst;
 	struct uio auio;
 	struct sockopt sopt;
 	struct iovec aio;
 	int error, on, rcvflg, timo, len;
 	time_t atimo;
 	time_t rtimo;
 	struct timeval tv;
 	struct bootpc_ifcontext *ifctx;
 	int outstanding;
 	int gotrootpath;
 	int retry;
 	const char *s;
 
 	/*
 	 * Create socket and set its recieve timeout.
 	 */
 	error = socreate(AF_INET, &so, SOCK_DGRAM, 0, td->td_ucred, td);
 	if (error != 0)
 		goto out0;
 
 	tv.tv_sec = 1;
 	tv.tv_usec = 0;
 	bzero(&sopt, sizeof(sopt));
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = SOL_SOCKET;
 	sopt.sopt_name = SO_RCVTIMEO;
 	sopt.sopt_val = &tv;
 	sopt.sopt_valsize = sizeof tv;
 
 	error = sosetopt(so, &sopt);
 	if (error != 0)
 		goto out;
 
 	/*
 	 * Enable broadcast.
 	 */
 	on = 1;
 	sopt.sopt_name = SO_BROADCAST;
 	sopt.sopt_val = &on;
 	sopt.sopt_valsize = sizeof on;
 
 	error = sosetopt(so, &sopt);
 	if (error != 0)
 		goto out;
 
 	/*
 	 * Disable routing.
 	 */
 
 	on = 1;
 	sopt.sopt_name = SO_DONTROUTE;
 	sopt.sopt_val = &on;
 	sopt.sopt_valsize = sizeof on;
 
 	error = sosetopt(so, &sopt);
 	if (error != 0)
 		goto out;
 
 	/*
 	 * Bind the local endpoint to a bootp client port.
 	 */
 	sin = &dst;
 	clear_sinaddr(sin);
 	sin->sin_port = htons(IPPORT_BOOTPC);
 	error = sobind(so, (struct sockaddr *)sin, td);
 	if (error != 0) {
 		printf("bind failed\n");
 		goto out;
 	}
 
 	/*
 	 * Setup socket address for the server.
 	 */
 	sin = &dst;
 	clear_sinaddr(sin);
 	sin->sin_addr.s_addr = INADDR_BROADCAST;
 	sin->sin_port = htons(IPPORT_BOOTPS);
 
 	/*
 	 * Send it, repeatedly, until a reply is received,
 	 * but delay each re-send by an increasing amount.
 	 * If the delay hits the maximum, start complaining.
 	 */
 	timo = 0;
 	rtimo = 0;
 	for (;;) {
 
 		outstanding = 0;
 		gotrootpath = 0;
 
 		for (ifctx = gctx->interfaces;
 		     ifctx != NULL;
 		     ifctx = ifctx->next) {
 			if (bootpc_ifctx_isresolved(ifctx) != 0 &&
 			    bootpc_tag(&gctx->tmptag, &ifctx->reply,
 				       ifctx->replylen,
 				       TAG_ROOT) != NULL)
 				gotrootpath = 1;
 		}
 
 		for (ifctx = gctx->interfaces;
 		     ifctx != NULL;
 		     ifctx = ifctx->next) {
 			ifctx->outstanding = 0;
 			if (bootpc_ifctx_isresolved(ifctx)  != 0 &&
 			    gotrootpath != 0) {
 				continue;
 			}
 			if (bootpc_ifctx_isfailed(ifctx) != 0)
 				continue;
 
 			outstanding++;
 			ifctx->outstanding = 1;
 
 			/* Proceed to next step in DHCP negotiation */
 			if ((ifctx->state == IF_DHCP_OFFERED &&
 			     ifctx->dhcpquerytype != DHCP_REQUEST) ||
 			    (ifctx->state == IF_DHCP_UNRESOLVED &&
 			     ifctx->dhcpquerytype != DHCP_DISCOVER) ||
 			    (ifctx->state == IF_BOOTP_UNRESOLVED &&
 			     ifctx->dhcpquerytype != DHCP_NOMSG)) {
 				ifctx->sentmsg = 0;
 				bootpc_compose_query(ifctx, gctx, td);
 			}
 
 			/* Send BOOTP request (or re-send). */
 
 			if (ifctx->sentmsg == 0) {
 				switch(ifctx->dhcpquerytype) {
 				case DHCP_DISCOVER:
 					s = "DHCP Discover";
 					break;
 				case DHCP_REQUEST:
 					s = "DHCP Request";
 					break;
 				case DHCP_NOMSG:
 				default:
 					s = "BOOTP Query";
 					break;
 				}
 				printf("Sending %s packet from "
 				       "interface %s (%*D)\n",
 				       s,
 				       ifctx->ireq.ifr_name,
 				       ifctx->sdl->sdl_alen,
 				       (unsigned char *) LLADDR(ifctx->sdl),
 				       ":");
 				ifctx->sentmsg = 1;
 			}
 
 			aio.iov_base = (caddr_t) &ifctx->call;
 			aio.iov_len = sizeof(ifctx->call);
 
 			auio.uio_iov = &aio;
 			auio.uio_iovcnt = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_WRITE;
 			auio.uio_offset = 0;
 			auio.uio_resid = sizeof(ifctx->call);
 			auio.uio_td = td;
 
 			/* Set netmask to 0.0.0.0 */
 
 			sin = (struct sockaddr_in *) &ifctx->ireq.ifr_addr;
 			clear_sinaddr(sin);
 			error = ifioctl(ifctx->so, SIOCSIFNETMASK,
 					(caddr_t) &ifctx->ireq, td);
 			if (error != 0)
 				panic("bootpc_call:"
 				      "set if netmask, error=%d",
 				      error);
 
 			error = sosend(so, (struct sockaddr *) &dst,
 				       &auio, NULL, NULL, 0, td);
 			if (error != 0) {
 				printf("bootpc_call: sosend: %d state %08x\n",
 				       error, (int) so->so_state);
 			}
 
 			/* XXX: Is this needed ? */
 			pause("bootpw", hz/10);
 
 			/* Set netmask to 255.0.0.0 */
 
 			sin = (struct sockaddr_in *) &ifctx->ireq.ifr_addr;
 			clear_sinaddr(sin);
 			sin->sin_addr.s_addr = htonl(0xff000000u);
 			error = ifioctl(ifctx->so, SIOCSIFNETMASK,
 					(caddr_t) &ifctx->ireq, td);
 			if (error != 0)
 				panic("bootpc_call:"
 				      "set if netmask, error=%d",
 				      error);
 
 		}
 
 		if (outstanding == 0 &&
 		    (rtimo == 0 || time_second >= rtimo)) {
 			error = 0;
 			goto gotreply;
 		}
 
 		/* Determine new timeout. */
 		if (timo < MAX_RESEND_DELAY)
 			timo++;
 		else {
 			printf("DHCP/BOOTP timeout for server ");
 			print_sin_addr(&dst);
 			printf("\n");
 		}
 
 		/*
 		 * Wait for up to timo seconds for a reply.
 		 * The socket receive timeout was set to 1 second.
 		 */
 		atimo = timo + time_second;
 		while (time_second < atimo) {
 			aio.iov_base = (caddr_t) &gctx->reply;
 			aio.iov_len = sizeof(gctx->reply);
 
 			auio.uio_iov = &aio;
 			auio.uio_iovcnt = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_offset = 0;
 			auio.uio_resid = sizeof(gctx->reply);
 			auio.uio_td = td;
 
 			rcvflg = 0;
 			error = soreceive(so, NULL, &auio,
 					  NULL, NULL, &rcvflg);
 			gctx->secs = time_second - gctx->starttime;
 			for (ifctx = gctx->interfaces;
 			     ifctx != NULL;
 			     ifctx = ifctx->next) {
 				if (bootpc_ifctx_isresolved(ifctx) != 0 ||
 				    bootpc_ifctx_isfailed(ifctx) != 0)
 					continue;
 
 				ifctx->call.secs = htons(gctx->secs);
 			}
 			if (error == EWOULDBLOCK)
 				continue;
 			if (error != 0)
 				goto out;
 			len = sizeof(gctx->reply) - auio.uio_resid;
 
 			/* Do we have the required number of bytes ? */
 			if (len < BOOTP_MIN_LEN)
 				continue;
 			gctx->replylen = len;
 
 			/* Is it a reply? */
 			if (gctx->reply.op != BOOTP_REPLY)
 				continue;
 
 			/* Is this an answer to our query */
 			for (ifctx = gctx->interfaces;
 			     ifctx != NULL;
 			     ifctx = ifctx->next) {
 				if (gctx->reply.xid != ifctx->call.xid)
 					continue;
 
 				/* Same HW address size ? */
 				if (gctx->reply.hlen != ifctx->call.hlen)
 					continue;
 
 				/* Correct HW address ? */
 				if (bcmp(gctx->reply.chaddr,
 					 ifctx->call.chaddr,
 					 ifctx->call.hlen) != 0)
 					continue;
 
 				break;
 			}
 
 			if (ifctx != NULL) {
 				s =  bootpc_tag(&gctx->tmptag,
 						&gctx->reply,
 						gctx->replylen,
 						TAG_DHCP_MSGTYPE);
 				if (s != NULL) {
 					switch (*s) {
 					case DHCP_OFFER:
 						s = "DHCP Offer";
 						break;
 					case DHCP_ACK:
 						s = "DHCP Ack";
 						break;
 					default:
 						s = "DHCP (unexpected)";
 						break;
 					}
 				} else
 					s = "BOOTP Reply";
 
 				printf("Received %s packet"
 				       " on %s from ",
 				       s,
 				       ifctx->ireq.ifr_name);
 				print_in_addr(gctx->reply.siaddr);
 				if (gctx->reply.giaddr.s_addr !=
 				    htonl(INADDR_ANY)) {
 					printf(" via ");
 					print_in_addr(gctx->reply.giaddr);
 				}
 				if (bootpc_received(gctx, ifctx) != 0) {
 					printf(" (accepted)");
 					if (ifctx->outstanding) {
 						ifctx->outstanding = 0;
 						outstanding--;
 					}
 					/* Network settle delay */
 					if (outstanding == 0)
 						atimo = time_second +
 							BOOTP_SETTLE_DELAY;
 				} else
 					printf(" (ignored)");
 				if (ifctx->gotrootpath) {
 					gotrootpath = 1;
 					rtimo = time_second +
 						BOOTP_SETTLE_DELAY;
 					printf(" (got root path)");
 				} else
 					printf(" (no root path)");
 				printf("\n");
 			}
 		} /* while secs */
 #ifdef BOOTP_TIMEOUT
 		if (gctx->secs > BOOTP_TIMEOUT && BOOTP_TIMEOUT > 0)
 			break;
 #endif
 		/* Force a retry if halfway in DHCP negotiation */
 		retry = 0;
 		for (ifctx = gctx->interfaces; ifctx != NULL;
 		     ifctx = ifctx->next) {
 			if (ifctx->state == IF_DHCP_OFFERED) {
 				if (ifctx->dhcpquerytype == DHCP_DISCOVER)
 					retry = 1;
 				else
 					ifctx->state = IF_DHCP_UNRESOLVED;
 			}
 		}
 
 		if (retry != 0)
 			continue;
 
 		if (gotrootpath != 0) {
 			gctx->gotrootpath = gotrootpath;
 			if (rtimo != 0 && time_second >= rtimo)
 				break;
 		}
 	} /* forever send/receive */
 
 	/*
 	 * XXX: These are errors of varying seriousness being silently
 	 * ignored
 	 */
 
 	for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) {
 		if (bootpc_ifctx_isresolved(ifctx) == 0) {
 			printf("%s timeout for interface %s\n",
 			       ifctx->dhcpquerytype != DHCP_NOMSG ?
 			       "DHCP" : "BOOTP",
 			       ifctx->ireq.ifr_name);
 		}
 	}
 	if (gctx->gotrootpath != 0) {
 #if 0
 		printf("Got a root path, ignoring remaining timeout\n");
 #endif
 		error = 0;
 		goto out;
 	}
 #ifndef BOOTP_NFSROOT
 	for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) {
 		if (bootpc_ifctx_isresolved(ifctx) != 0) {
 			error = 0;
 			goto out;
 		}
 	}
 #endif
 	error = ETIMEDOUT;
 	goto out;
 
 gotreply:
 out:
 	soclose(so);
 out0:
 	return error;
 }
 
 static int
 bootpc_fakeup_interface(struct bootpc_ifcontext *ifctx,
     struct bootpc_globalcontext *gctx, struct thread *td)
 {
 	struct sockaddr_in *sin;
 	int error;
 	struct ifreq *ireq;
 	struct socket *so;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	error = socreate(AF_INET, &ifctx->so, SOCK_DGRAM, 0, td->td_ucred, td);
 	if (error != 0)
 		panic("nfs_boot: socreate, error=%d", error);
 
 	ireq = &ifctx->ireq;
 	so = ifctx->so;
 
 	/*
 	 * Bring up the interface.
 	 *
 	 * Get the old interface flags and or IFF_UP into them; if
 	 * IFF_UP set blindly, interface selection can be clobbered.
 	 */
 	error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, td);
 	if (error != 0)
 		panic("bootpc_fakeup_interface: GIFFLAGS, error=%d", error);
 	ireq->ifr_flags |= IFF_UP;
 	error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, td);
 	if (error != 0)
 		panic("bootpc_fakeup_interface: SIFFLAGS, error=%d", error);
 
 	/*
 	 * Do enough of ifconfig(8) so that the chosen interface
 	 * can talk to the servers.  (just set the address)
 	 */
 
 	/* addr is 0.0.0.0 */
 
 	sin = (struct sockaddr_in *) &ireq->ifr_addr;
 	clear_sinaddr(sin);
 	error = ifioctl(so, SIOCSIFADDR, (caddr_t) ireq, td);
 	if (error != 0 && (error != EEXIST || ifctx == gctx->interfaces))
 		panic("bootpc_fakeup_interface: "
 		      "set if addr, error=%d", error);
 
 	/* netmask is 255.0.0.0 */
 
 	sin = (struct sockaddr_in *) &ireq->ifr_addr;
 	clear_sinaddr(sin);
 	sin->sin_addr.s_addr = htonl(0xff000000u);
 	error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, td);
 	if (error != 0)
 		panic("bootpc_fakeup_interface: set if netmask, error=%d",
 		      error);
 
 	/* Broadcast is 255.255.255.255 */
 
 	sin = (struct sockaddr_in *)&ireq->ifr_addr;
 	clear_sinaddr(sin);
 	clear_sinaddr(&ifctx->broadcast);
 	sin->sin_addr.s_addr = htonl(INADDR_BROADCAST);
 	ifctx->broadcast.sin_addr.s_addr = sin->sin_addr.s_addr;
 
 	error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, td);
 	if (error != 0)
 		panic("bootpc_fakeup_interface: "
 		      "set if broadcast addr, error=%d",
 		      error);
 
 	/* Get HW address */
 
 	sdl = NULL;
 	TAILQ_FOREACH(ifa, &ifctx->ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_LINK) {
 			sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 			if (sdl->sdl_type == IFT_ETHER)
 				break;
 		}
 
 	if (sdl == NULL)
 		panic("bootpc: Unable to find HW address for %s",
 		      ifctx->ireq.ifr_name);
 	ifctx->sdl = sdl;
 
 	return error;
 }
 
 
 static int
 bootpc_adjust_interface(struct bootpc_ifcontext *ifctx,
     struct bootpc_globalcontext *gctx, struct thread *td)
 {
 	int error;
 	struct sockaddr_in defdst;
 	struct sockaddr_in defmask;
 	struct sockaddr_in *sin;
 	struct ifreq *ireq;
 	struct socket *so;
 	struct sockaddr_in *myaddr;
 	struct sockaddr_in *netmask;
 	struct sockaddr_in *gw;
 
 	ireq = &ifctx->ireq;
 	so = ifctx->so;
 	myaddr = &ifctx->myaddr;
 	netmask = &ifctx->netmask;
 	gw = &ifctx->gw;
 
 	if (bootpc_ifctx_isresolved(ifctx) == 0) {
 
 		/* Shutdown interfaces where BOOTP failed */
 
 		printf("Shutdown interface %s\n", ifctx->ireq.ifr_name);
 		error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, td);
 		if (error != 0)
 			panic("bootpc_adjust_interface: "
 			      "SIOCGIFFLAGS, error=%d", error);
 		ireq->ifr_flags &= ~IFF_UP;
 		error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, td);
 		if (error != 0)
 			panic("bootpc_adjust_interface: "
 			      "SIOCSIFFLAGS, error=%d", error);
 
 		sin = (struct sockaddr_in *) &ireq->ifr_addr;
 		clear_sinaddr(sin);
 		error = ifioctl(so, SIOCDIFADDR, (caddr_t) ireq, td);
 		if (error != 0 && (error != EEXIST ||
 				   ifctx == gctx->interfaces))
 			panic("bootpc_adjust_interface: "
 			      "SIOCDIFADDR, error=%d", error);
 
 		return 0;
 	}
 
 	printf("Adjusted interface %s\n", ifctx->ireq.ifr_name);
 	/*
 	 * Do enough of ifconfig(8) so that the chosen interface
 	 * can talk to the servers.  (just set the address)
 	 */
 	bcopy(netmask, &ireq->ifr_addr, sizeof(*netmask));
 	error = ifioctl(so, SIOCSIFNETMASK, (caddr_t) ireq, td);
 	if (error != 0)
 		panic("bootpc_adjust_interface: "
 		      "set if netmask, error=%d", error);
 
 	/* Broadcast is with host part of IP address all 1's */
 
 	sin = (struct sockaddr_in *) &ireq->ifr_addr;
 	clear_sinaddr(sin);
 	sin->sin_addr.s_addr = myaddr->sin_addr.s_addr |
 		~ netmask->sin_addr.s_addr;
 	error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t) ireq, td);
 	if (error != 0)
 		panic("bootpc_adjust_interface: "
 		      "set if broadcast addr, error=%d", error);
 
 	bcopy(myaddr, &ireq->ifr_addr, sizeof(*myaddr));
 	error = ifioctl(so, SIOCSIFADDR, (caddr_t) ireq, td);
 	if (error != 0 && (error != EEXIST || ifctx == gctx->interfaces))
 		panic("bootpc_adjust_interface: "
 		      "set if addr, error=%d", error);
 
 	/* Add new default route */
 
 	if (ifctx->gotgw != 0 || gctx->gotgw == 0) {
 		clear_sinaddr(&defdst);
 		clear_sinaddr(&defmask);
-		error = rtrequest(RTM_ADD,
+		/* XXX MRT just table 0 */
+		error = rtrequest_fib(RTM_ADD,
 				  (struct sockaddr *) &defdst,
 				  (struct sockaddr *) gw,
 				  (struct sockaddr *) &defmask,
-				  (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL);
+				  (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL, 0);
 		if (error != 0) {
 			printf("bootpc_adjust_interface: "
 			       "add net route, error=%d\n", error);
 			return error;
 		}
 	}
 
 	return 0;
 }
 
 static int
 setfs(struct sockaddr_in *addr, char *path, char *p,
     const struct in_addr *siaddr)
 {
 
 	if (getip(&p, &addr->sin_addr) == 0) {
 		if (siaddr != NULL && *p == '/')
 			bcopy(siaddr, &addr->sin_addr, sizeof(struct in_addr));
 		else
 			return 0;
 	} else {
 		if (*p != ':')
 			return 0;
 		p++;
 	}
 		
 	addr->sin_len = sizeof(struct sockaddr_in);
 	addr->sin_family = AF_INET;
 
 	strlcpy(path, p, MNAMELEN);
 	return 1;
 }
 
 static int
 getip(char **ptr, struct in_addr *addr)
 {
 	char *p;
 	unsigned int ip;
 	int val;
 
 	p = *ptr;
 	ip = 0;
 	if (((val = getdec(&p)) < 0) || (val > 255))
 		return 0;
 	ip = val << 24;
 	if (*p != '.')
 		return 0;
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255))
 		return 0;
 	ip |= (val << 16);
 	if (*p != '.')
 		return 0;
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255))
 		return 0;
 	ip |= (val << 8);
 	if (*p != '.')
 		return 0;
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255))
 		return 0;
 	ip |= val;
 
 	addr->s_addr = htonl(ip);
 	*ptr = p;
 	return 1;
 }
 
 static int
 getdec(char **ptr)
 {
 	char *p;
 	int ret;
 
 	p = *ptr;
 	ret = 0;
 	if ((*p < '0') || (*p > '9'))
 		return -1;
 	while ((*p >= '0') && (*p <= '9')) {
 		ret = ret * 10 + (*p - '0');
 		p++;
 	}
 	*ptr = p;
 	return ret;
 }
 
 static void
 mountopts(struct nfs_args *args, char *p)
 {
 	args->version = NFS_ARGSVERSION;
 	args->rsize = 8192;
 	args->wsize = 8192;
 	args->flags = NFSMNT_RSIZE | NFSMNT_WSIZE | NFSMNT_RESVPORT;
 	args->sotype = SOCK_DGRAM;
 	if (p != NULL)
 		nfs_parse_options(p, args);
 }
 
 static int
 xdr_opaque_decode(struct mbuf **mptr, u_char *buf, int len)
 {
 	struct mbuf *m;
 	int alignedlen;
 
 	m = *mptr;
 	alignedlen = ( len + 3 ) & ~3;
 
 	if (m->m_len < alignedlen) {
 		m = m_pullup(m, alignedlen);
 		if (m == NULL) {
 			*mptr = NULL;
 			return EBADRPC;
 		}
 	}
 	bcopy(mtod(m, u_char *), buf, len);
 	m_adj(m, alignedlen);
 	*mptr = m;
 	return 0;
 }
 
 static int
 xdr_int_decode(struct mbuf **mptr, int *iptr)
 {
 	u_int32_t i;
 
 	if (xdr_opaque_decode(mptr, (u_char *) &i, sizeof(u_int32_t)) != 0)
 		return EBADRPC;
 	*iptr = fxdr_unsigned(u_int32_t, i);
 	return 0;
 }
 
 static void
 print_sin_addr(struct sockaddr_in *sin)
 {
 
 	print_in_addr(sin->sin_addr);
 }
 
 static void
 print_in_addr(struct in_addr addr)
 {
 	unsigned int ip;
 
 	ip = ntohl(addr.s_addr);
 	printf("%d.%d.%d.%d",
 	       ip >> 24, (ip >> 16) & 255, (ip >> 8) & 255, ip & 255);
 }
 
 static void
 bootpc_compose_query(struct bootpc_ifcontext *ifctx,
     struct bootpc_globalcontext *gctx, struct thread *td)
 {
 	unsigned char *vendp;
 	unsigned char vendor_client[64];
 	uint32_t leasetime;
 	uint8_t vendor_client_len;
 
 	ifctx->gotrootpath = 0;
 
 	bzero((caddr_t) &ifctx->call, sizeof(ifctx->call));
 
 	/* bootpc part */
 	ifctx->call.op = BOOTP_REQUEST; 	/* BOOTREQUEST */
 	ifctx->call.htype = 1;			/* 10mb ethernet */
 	ifctx->call.hlen = ifctx->sdl->sdl_alen;/* Hardware address length */
 	ifctx->call.hops = 0;
 	if (bootpc_ifctx_isunresolved(ifctx) != 0)
 		ifctx->xid++;
 	ifctx->call.xid = txdr_unsigned(ifctx->xid);
 	bcopy(LLADDR(ifctx->sdl), &ifctx->call.chaddr, ifctx->sdl->sdl_alen);
 
 	vendp = ifctx->call.vend;
 	*vendp++ = 99;		/* RFC1048 cookie */
 	*vendp++ = 130;
 	*vendp++ = 83;
 	*vendp++ = 99;
 	*vendp++ = TAG_MAXMSGSIZE;
 	*vendp++ = 2;
 	*vendp++ = (sizeof(struct bootp_packet) >> 8) & 255;
 	*vendp++ = sizeof(struct bootp_packet) & 255;
 
 	snprintf(vendor_client, sizeof(vendor_client), "%s:%s:%s",
 		ostype, MACHINE, osrelease);
 	vendor_client_len = strlen(vendor_client);
 	*vendp++ = TAG_VENDOR_INDENTIFIER;
 	*vendp++ = vendor_client_len;
 	memcpy(vendp, vendor_client, vendor_client_len);
 	vendp += vendor_client_len;;
 	ifctx->dhcpquerytype = DHCP_NOMSG;
 	switch (ifctx->state) {
 	case IF_DHCP_UNRESOLVED:
 		*vendp++ = TAG_DHCP_MSGTYPE;
 		*vendp++ = 1;
 		*vendp++ = DHCP_DISCOVER;
 		ifctx->dhcpquerytype = DHCP_DISCOVER;
 		ifctx->gotdhcpserver = 0;
 		break;
 	case IF_DHCP_OFFERED:
 		*vendp++ = TAG_DHCP_MSGTYPE;
 		*vendp++ = 1;
 		*vendp++ = DHCP_REQUEST;
 		ifctx->dhcpquerytype = DHCP_REQUEST;
 		*vendp++ = TAG_DHCP_REQ_ADDR;
 		*vendp++ = 4;
 		memcpy(vendp, &ifctx->reply.yiaddr, 4);
 		vendp += 4;
 		if (ifctx->gotdhcpserver != 0) {
 			*vendp++ = TAG_DHCP_SERVERID;
 			*vendp++ = 4;
 			memcpy(vendp, &ifctx->dhcpserver, 4);
 			vendp += 4;
 		}
 		*vendp++ = TAG_DHCP_LEASETIME;
 		*vendp++ = 4;
 		leasetime = htonl(300);
 		memcpy(vendp, &leasetime, 4);
 		vendp += 4;
 		break;
 	default:
 		break;
 	}
 	*vendp = TAG_END;
 
 	ifctx->call.secs = 0;
 	ifctx->call.flags = htons(0x8000); /* We need a broadcast answer */
 }
 
 static int
 bootpc_hascookie(struct bootp_packet *bp)
 {
 
 	return (bp->vend[0] == 99 && bp->vend[1] == 130 &&
 		bp->vend[2] == 83 && bp->vend[3] == 99);
 }
 
 static void
 bootpc_tag_helper(struct bootpc_tagcontext *tctx,
     unsigned char *start, int len, int tag)
 {
 	unsigned char *j;
 	unsigned char *ej;
 	unsigned char code;
 
 	if (tctx->badtag != 0 || tctx->badopt != 0)
 		return;
 
 	j = start;
 	ej = j + len;
 
 	while (j < ej) {
 		code = *j++;
 		if (code == TAG_PAD)
 			continue;
 		if (code == TAG_END)
 			return;
 		if (j >= ej || j + *j + 1 > ej) {
 			tctx->badopt = 1;
 			return;
 		}
 		len = *j++;
 		if (code == tag) {
 			if (tctx->taglen + len > TAG_MAXLEN) {
 				tctx->badtag = 1;
 				return;
 			}
 			tctx->foundopt = 1;
 			if (len > 0)
 				memcpy(tctx->buf + tctx->taglen,
 				       j, len);
 			tctx->taglen += len;
 		}
 		if (code == TAG_OVERLOAD)
 			tctx->overload = *j;
 
 		j += len;
 	}
 }
 
 static unsigned char *
 bootpc_tag(struct bootpc_tagcontext *tctx,
     struct bootp_packet *bp, int len, int tag)
 {
 	tctx->overload = 0;
 	tctx->badopt = 0;
 	tctx->badtag = 0;
 	tctx->foundopt = 0;
 	tctx->taglen = 0;
 
 	if (bootpc_hascookie(bp) == 0)
 		return NULL;
 
 	bootpc_tag_helper(tctx, &bp->vend[4],
 			  (unsigned char *) bp + len - &bp->vend[4], tag);
 
 	if ((tctx->overload & OVERLOAD_FILE) != 0)
 		bootpc_tag_helper(tctx,
 				  (unsigned char *) bp->file,
 				  sizeof(bp->file),
 				  tag);
 	if ((tctx->overload & OVERLOAD_SNAME) != 0)
 		bootpc_tag_helper(tctx,
 				  (unsigned char *) bp->sname,
 				  sizeof(bp->sname),
 				  tag);
 
 	if (tctx->badopt != 0 || tctx->badtag != 0 || tctx->foundopt == 0)
 		return NULL;
 	tctx->buf[tctx->taglen] = '\0';
 	return tctx->buf;
 }
 
 static void
 bootpc_decode_reply(struct nfsv3_diskless *nd, struct bootpc_ifcontext *ifctx,
     struct bootpc_globalcontext *gctx)
 {
 	char *p;
 	unsigned int ip;
 
 	ifctx->gotgw = 0;
 	ifctx->gotnetmask = 0;
 
 	clear_sinaddr(&ifctx->myaddr);
 	clear_sinaddr(&ifctx->netmask);
 	clear_sinaddr(&ifctx->gw);
 
 	ifctx->myaddr.sin_addr = ifctx->reply.yiaddr;
 
 	ip = ntohl(ifctx->myaddr.sin_addr.s_addr);
 
 	printf("%s at ", ifctx->ireq.ifr_name);
 	print_sin_addr(&ifctx->myaddr);
 	printf(" server ");
 	print_in_addr(ifctx->reply.siaddr);
 
 	ifctx->gw.sin_addr = ifctx->reply.giaddr;
 	if (ifctx->reply.giaddr.s_addr != htonl(INADDR_ANY)) {
 		printf(" via gateway ");
 		print_in_addr(ifctx->reply.giaddr);
 	}
 
 	/* This call used for the side effect (overload flag) */
 	(void) bootpc_tag(&gctx->tmptag,
 			  &ifctx->reply, ifctx->replylen, TAG_END);
 
 	if ((gctx->tmptag.overload & OVERLOAD_SNAME) == 0)
 		if (ifctx->reply.sname[0] != '\0')
 			printf(" server name %s", ifctx->reply.sname);
 	if ((gctx->tmptag.overload & OVERLOAD_FILE) == 0)
 		if (ifctx->reply.file[0] != '\0')
 			printf(" boot file %s", ifctx->reply.file);
 
 	printf("\n");
 
 	p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen,
 		       TAG_SUBNETMASK);
 	if (p != NULL) {
 		if (gctx->tag.taglen != 4)
 			panic("bootpc: subnet mask len is %d",
 			      gctx->tag.taglen);
 		bcopy(p, &ifctx->netmask.sin_addr, 4);
 		ifctx->gotnetmask = 1;
 		printf("subnet mask ");
 		print_sin_addr(&ifctx->netmask);
 		printf(" ");
 	}
 
 	p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen,
 		       TAG_ROUTERS);
 	if (p != NULL) {
 		/* Routers */
 		if (gctx->tag.taglen % 4)
 			panic("bootpc: Router Len is %d", gctx->tag.taglen);
 		if (gctx->tag.taglen > 0) {
 			bcopy(p, &ifctx->gw.sin_addr, 4);
 			printf("router ");
 			print_sin_addr(&ifctx->gw);
 			printf(" ");
 			ifctx->gotgw = 1;
 			gctx->gotgw = 1;
 		}
 	}
 
 	p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen,
 		       TAG_ROOT);
 	if (p != NULL) {
 		if (gctx->setrootfs != NULL) {
 			printf("rootfs %s (ignored) ", p);
 		} else 	if (setfs(&nd->root_saddr,
 				  nd->root_hostnam, p, &ifctx->reply.siaddr)) {
 			if (*p == '/') {
 				printf("root_server ");
 				print_sin_addr(&nd->root_saddr);
 				printf(" ");
 			}
 			printf("rootfs %s ", p);
 			gctx->gotrootpath = 1;
 			ifctx->gotrootpath = 1;
 			gctx->setrootfs = ifctx;
 
 			p = bootpc_tag(&gctx->tag, &ifctx->reply,
 				       ifctx->replylen,
 				       TAG_ROOTOPTS);
 			if (p != NULL) {
 				mountopts(&nd->root_args, p);
 				printf("rootopts %s ", p);
 			}
 		} else
 			panic("Failed to set rootfs to %s", p);
 	}
 
 	p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen,
 		       TAG_HOSTNAME);
 	if (p != NULL) {
 		if (gctx->tag.taglen >= MAXHOSTNAMELEN)
 			panic("bootpc: hostname >= %d bytes",
 			      MAXHOSTNAMELEN);
 		if (gctx->sethostname != NULL) {
 			printf("hostname %s (ignored) ", p);
 		} else {
 			strcpy(nd->my_hostnam, p);
 			strcpy(hostname, p);
 			printf("hostname %s ", hostname);
 			gctx->sethostname = ifctx;
 		}
 	}
 	p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen,
 			TAG_COOKIE);
 	if (p != NULL) {        /* store in a sysctl variable */
 		int i, l = sizeof(bootp_cookie) - 1;
 		for (i = 0; i < l && p[i] != '\0'; i++)
 			bootp_cookie[i] = p[i];
 		p[i] = '\0';
 	}
 
 
 	printf("\n");
 
 	if (ifctx->gotnetmask == 0) {
 		if (IN_CLASSA(ntohl(ifctx->myaddr.sin_addr.s_addr)))
 			ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSA_NET);
 		else if (IN_CLASSB(ntohl(ifctx->myaddr.sin_addr.s_addr)))
 			ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSB_NET);
 		else
 			ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSC_NET);
 	}
 	if (ifctx->gotgw == 0) {
 		/* Use proxyarp */
 		ifctx->gw.sin_addr.s_addr = ifctx->myaddr.sin_addr.s_addr;
 	}
 }
 
 void
 bootpc_init(void)
 {
 	struct bootpc_ifcontext *ifctx, *nctx;	/* Interface BOOTP contexts */
 	struct bootpc_globalcontext *gctx; 	/* Global BOOTP context */
 	struct ifnet *ifp;
 	int error;
 #ifndef BOOTP_WIRED_TO
 	int ifcnt;
 #endif
 	struct nfsv3_diskless *nd;
 	struct thread *td;
 
 	nd = &nfsv3_diskless;
 	td = curthread;
 
 	/*
 	 * If already filled in, don't touch it here
 	 */
 	if (nfs_diskless_valid != 0)
 		return;
 
 	gctx = malloc(sizeof(*gctx), M_TEMP, M_WAITOK | M_ZERO);
 	if (gctx == NULL)
 		panic("Failed to allocate bootp global context structure");
 
 	gctx->xid = ~0xFFFF;
 	gctx->starttime = time_second;
 
 	/*
 	 * Find a network interface.
 	 */
 #ifdef BOOTP_WIRED_TO
 	printf("bootpc_init: wired to interface '%s'\n",
 	       __XSTRING(BOOTP_WIRED_TO));
 	allocifctx(gctx);
 #else
 	/*
 	 * Preallocate interface context storage, if another interface
 	 * attaches and wins the race, it won't be eligible for bootp.
 	 */
 	IFNET_RLOCK();
 	for (ifp = TAILQ_FIRST(&ifnet), ifcnt = 0;
 	     ifp != NULL;
 	     ifp = TAILQ_NEXT(ifp, if_link)) {
 		if ((ifp->if_flags &
 		     (IFF_LOOPBACK | IFF_POINTOPOINT | IFF_BROADCAST)) !=
 		    IFF_BROADCAST)
 			continue;
 		ifcnt++;
 	}
 	IFNET_RUNLOCK();
 	if (ifcnt == 0)
 		panic("bootpc_init: no eligible interfaces");
 	for (; ifcnt > 0; ifcnt--)
 		allocifctx(gctx);
 #endif
 
 	IFNET_RLOCK();
 	for (ifp = TAILQ_FIRST(&ifnet), ifctx = gctx->interfaces;
 	     ifp != NULL && ifctx != NULL;
 	     ifp = TAILQ_NEXT(ifp, if_link)) {
 		strlcpy(ifctx->ireq.ifr_name, ifp->if_xname,
 		    sizeof(ifctx->ireq.ifr_name));
 #ifdef BOOTP_WIRED_TO
 		if (strcmp(ifctx->ireq.ifr_name,
 			   __XSTRING(BOOTP_WIRED_TO)) != 0)
 			continue;
 #else
 		if ((ifp->if_flags &
 		     (IFF_LOOPBACK | IFF_POINTOPOINT | IFF_BROADCAST)) !=
 		    IFF_BROADCAST)
 			continue;
 #endif
 		ifctx->ifp = ifp;
 		ifctx = ifctx->next;
 	}
 	IFNET_RUNLOCK();
 
 	if (gctx->interfaces == NULL || gctx->interfaces->ifp == NULL) {
 #ifdef BOOTP_WIRED_TO
 		panic("bootpc_init: Could not find interface specified "
 		      "by BOOTP_WIRED_TO: "
 		      __XSTRING(BOOTP_WIRED_TO));
 #else
 		panic("bootpc_init: no suitable interface");
 #endif
 	}
 
 	for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next)
 		bootpc_fakeup_interface(ifctx, gctx, td);
 
 	for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next)
 		bootpc_compose_query(ifctx, gctx, td);
 
 	error = bootpc_call(gctx, td);
 
 	if (error != 0) {
 #ifdef BOOTP_NFSROOT
 		panic("BOOTP call failed");
 #else
 		printf("BOOTP call failed\n");
 #endif
 	}
 
 	rootdevnames[0] = "nfs:";
 	mountopts(&nd->root_args, NULL);
 
 	for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next)
 		if (bootpc_ifctx_isresolved(ifctx) != 0)
 			bootpc_decode_reply(nd, ifctx, gctx);
 
 #ifdef BOOTP_NFSROOT
 	if (gctx->gotrootpath == 0)
 		panic("bootpc: No root path offered");
 #endif
 
 	for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) {
 		bootpc_adjust_interface(ifctx, gctx, td);
 
 		soclose(ifctx->so);
 	}
 
 	for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next)
 		if (ifctx->gotrootpath != 0)
 			break;
 	if (ifctx == NULL) {
 		for (ifctx = gctx->interfaces;
 		     ifctx != NULL;
 		     ifctx = ifctx->next)
 			if (bootpc_ifctx_isresolved(ifctx) != 0)
 				break;
 	}
 	if (ifctx == NULL)
 		goto out;
 
 	if (gctx->gotrootpath != 0) {
 
 		setenv("boot.netif.name", ifctx->ifp->if_xname);
 
 		error = md_mount(&nd->root_saddr, nd->root_hostnam,
 				 nd->root_fh, &nd->root_fhsize,
 				 &nd->root_args, td);
 		if (error != 0)
 			panic("nfs_boot: mountd root, error=%d", error);
 
 		nfs_diskless_valid = 3;
 	}
 
 	strcpy(nd->myif.ifra_name, ifctx->ireq.ifr_name);
 	bcopy(&ifctx->myaddr, &nd->myif.ifra_addr, sizeof(ifctx->myaddr));
 	bcopy(&ifctx->myaddr, &nd->myif.ifra_broadaddr, sizeof(ifctx->myaddr));
 	((struct sockaddr_in *) &nd->myif.ifra_broadaddr)->sin_addr.s_addr =
 		ifctx->myaddr.sin_addr.s_addr |
 		~ ifctx->netmask.sin_addr.s_addr;
 	bcopy(&ifctx->netmask, &nd->myif.ifra_mask, sizeof(ifctx->netmask));
 
 out:
 	for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = nctx) {
 		nctx = ifctx->next;
 		free(ifctx, M_TEMP);
 	}
 	free(gctx, M_TEMP);
 }
 
 /*
  * RPC: mountd/mount
  * Given a server pathname, get an NFS file handle.
  * Also, sets sin->sin_port to the NFS service port.
  */
 static int
 md_mount(struct sockaddr_in *mdsin, char *path, u_char *fhp, int *fhsizep,
     struct nfs_args *args, struct thread *td)
 {
 	struct mbuf *m;
 	int error;
 	int authunixok;
 	int authcount;
 	int authver;
 
 	/* XXX honor v2/v3 flags in args->flags? */
 #ifdef BOOTP_NFSV3
 	/* First try NFS v3 */
 	/* Get port number for MOUNTD. */
 	error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER3,
 			     &mdsin->sin_port, td);
 	if (error == 0) {
 		m = xdr_string_encode(path, strlen(path));
 
 		/* Do RPC to mountd. */
 		error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER3,
 				  RPCMNT_MOUNT, &m, NULL, td);
 	}
 	if (error == 0) {
 		args->flags |= NFSMNT_NFSV3;
 	} else {
 #endif
 		/* Fallback to NFS v2 */
 
 		/* Get port number for MOUNTD. */
 		error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER1,
 				     &mdsin->sin_port, td);
 		if (error != 0)
 			return error;
 
 		m = xdr_string_encode(path, strlen(path));
 
 		/* Do RPC to mountd. */
 		error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER1,
 				  RPCMNT_MOUNT, &m, NULL, td);
 		if (error != 0)
 			return error;	/* message already freed */
 
 #ifdef BOOTP_NFSV3
 	}
 #endif
 
 	if (xdr_int_decode(&m, &error) != 0 || error != 0)
 		goto bad;
 
 	if ((args->flags & NFSMNT_NFSV3) != 0) {
 		if (xdr_int_decode(&m, fhsizep) != 0 ||
 		    *fhsizep > NFSX_V3FHMAX ||
 		    *fhsizep <= 0)
 			goto bad;
 	} else
 		*fhsizep = NFSX_V2FH;
 
 	if (xdr_opaque_decode(&m, fhp, *fhsizep) != 0)
 		goto bad;
 
 	if (args->flags & NFSMNT_NFSV3) {
 		if (xdr_int_decode(&m, &authcount) != 0)
 			goto bad;
 		authunixok = 0;
 		if (authcount < 0 || authcount > 100)
 			goto bad;
 		while (authcount > 0) {
 			if (xdr_int_decode(&m, &authver) != 0)
 				goto bad;
 			if (authver == RPCAUTH_UNIX)
 				authunixok = 1;
 			authcount--;
 		}
 		if (authunixok == 0)
 			goto bad;
 	}
 
 	/* Set port number for NFS use. */
 	error = krpc_portmap(mdsin, NFS_PROG,
 			     (args->flags &
 			      NFSMNT_NFSV3) ? NFS_VER3 : NFS_VER2,
 			     &mdsin->sin_port, td);
 
 	goto out;
 
 bad:
 	error = EBADRPC;
 
 out:
 	m_freem(m);
 	return error;
 }
 
 SYSINIT(bootp_rootconf, SI_SUB_ROOT_CONF, SI_ORDER_FIRST, bootpc_init, NULL);
Index: head/sys/nfsclient/nfs_vfsops.c
===================================================================
--- head/sys/nfsclient/nfs_vfsops.c	(revision 178887)
+++ head/sys/nfsclient/nfs_vfsops.c	(revision 178888)
@@ -1,1163 +1,1164 @@
 /*-
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_vfsops.c	8.12 (Berkeley) 5/20/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 
 #include "opt_bootp.h"
 #include "opt_nfsroot.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 
 #include <rpc/rpcclnt.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfsclient/nfs.h>
 #include <nfsclient/nfsnode.h>
 #include <nfsclient/nfsmount.h>
 #include <nfs/xdr_subs.h>
 #include <nfsclient/nfsm_subs.h>
 #include <nfsclient/nfsdiskless.h>
 
 MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
 MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
 MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
 MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
 MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
 
 uma_zone_t nfsmount_zone;
 
 struct nfsstats	nfsstats;
 
 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
 	&nfsstats, nfsstats, "S,nfsstats");
 static int nfs_ip_paranoia = 1;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
     &nfs_ip_paranoia, 0, "");
 #ifdef NFS_DEBUG
 int nfs_debug;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "");
 #endif
 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
 /* how long between console messages "nfs server foo not responding" */
 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
 
 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
 		    struct nfs_args *argp);
 static int	mountnfs(struct nfs_args *, struct mount *,
 		    struct sockaddr *, char *, struct vnode **,
 		    struct ucred *cred);
 static vfs_mount_t nfs_mount;
 static vfs_cmount_t nfs_cmount;
 static vfs_unmount_t nfs_unmount;
 static vfs_root_t nfs_root;
 static vfs_statfs_t nfs_statfs;
 static vfs_sync_t nfs_sync;
 static vfs_sysctl_t nfs_sysctl;
 
 /*
  * nfs vfs operations.
  */
 static struct vfsops nfs_vfsops = {
 	.vfs_init =		nfs_init,
 	.vfs_mount =		nfs_mount,
 	.vfs_cmount =		nfs_cmount,
 	.vfs_root =		nfs_root,
 	.vfs_statfs =		nfs_statfs,
 	.vfs_sync =		nfs_sync,
 	.vfs_uninit =		nfs_uninit,
 	.vfs_unmount =		nfs_unmount,
 	.vfs_sysctl =		nfs_sysctl,
 };
 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_VERSION(nfs, 1);
 
 static struct nfs_rpcops nfs_rpcops = {
 	nfs_readrpc,
 	nfs_writerpc,
 	nfs_writebp,
 	nfs_readlinkrpc,
 	nfs_invaldir,
 	nfs_commit,
 };
 
 /*
  * This structure must be filled in by a primary bootstrap or bootstrap
  * server for a diskless/dataless machine. It is initialized below just
  * to ensure that it is allocated to initialized data (.data not .bss).
  */
 struct nfs_diskless nfs_diskless = { { { 0 } } };
 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
 int nfs_diskless_valid = 0;
 
 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
 	&nfs_diskless_valid, 0, "");
 
 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
 	nfsv3_diskless.root_hostnam, 0, "");
 
 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
 	&nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
 	"%Ssockaddr_in", "");
 
 
 void		nfsargs_ntoh(struct nfs_args *);
 static int	nfs_mountdiskless(char *,
 		    struct sockaddr_in *, struct nfs_args *,
 		    struct thread *, struct vnode **, struct mount *);
 static void	nfs_convert_diskless(void);
 static void	nfs_convert_oargs(struct nfs_args *args,
 		    struct onfs_args *oargs);
 
 int
 nfs_iosize(struct nfsmount *nmp)
 {
 	int iosize;
 
 	/*
 	 * Calculate the size used for io buffers.  Use the larger
 	 * of the two sizes to minimise nfs requests but make sure
 	 * that it is at least one VM page to avoid wasting buffer
 	 * space.
 	 */
 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
 	iosize = imax(iosize, PAGE_SIZE);
 	return (iosize);
 }
 
 static void
 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
 {
 
 	args->version = NFS_ARGSVERSION;
 	args->addr = oargs->addr;
 	args->addrlen = oargs->addrlen;
 	args->sotype = oargs->sotype;
 	args->proto = oargs->proto;
 	args->fh = oargs->fh;
 	args->fhsize = oargs->fhsize;
 	args->flags = oargs->flags;
 	args->wsize = oargs->wsize;
 	args->rsize = oargs->rsize;
 	args->readdirsize = oargs->readdirsize;
 	args->timeo = oargs->timeo;
 	args->retrans = oargs->retrans;
 	args->maxgrouplist = oargs->maxgrouplist;
 	args->readahead = oargs->readahead;
 	args->deadthresh = oargs->deadthresh;
 	args->hostname = oargs->hostname;
 }
 
 static void
 nfs_convert_diskless(void)
 {
 
 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
 		sizeof(struct ifaliasreq));
 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
 		sizeof(struct sockaddr_in));
 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
 		nfsv3_diskless.root_fhsize = NFSX_V3FH;
 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
 	} else {
 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
 	}
 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
 		sizeof(struct sockaddr_in));
 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
 	nfsv3_diskless.root_time = nfs_diskless.root_time;
 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
 		MAXHOSTNAMELEN);
 	nfs_diskless_valid = 3;
 }
 
 /*
  * nfs statfs call
  */
 static int
 nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
 {
 	struct vnode *vp;
 	struct nfs_statfs *sfp;
 	caddr_t bpos, dpos;
 	struct nfsmount *nmp = VFSTONFS(mp);
 	int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
 	struct mbuf *mreq, *mrep, *md, *mb;
 	struct nfsnode *np;
 	u_quad_t tquad;
 
 #ifndef nolint
 	sfp = NULL;
 #endif
 	error = vfs_busy(mp, LK_NOWAIT, NULL, td);
 	if (error)
 		return (error);
 	error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
 	if (error) {
 		vfs_unbusy(mp, td);
 		return (error);
 	}
 	vp = NFSTOV(np);
 	mtx_lock(&nmp->nm_mtx);
 	if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
 		mtx_unlock(&nmp->nm_mtx);		
 		(void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
 	} else
 		mtx_unlock(&nmp->nm_mtx);
 	nfsstats.rpccnt[NFSPROC_FSSTAT]++;
 	mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3));
 	mb = mreq;
 	bpos = mtod(mb, caddr_t);
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
 	if (v3)
 		nfsm_postop_attr(vp, retattr);
 	if (error) {
 		if (mrep != NULL)
 			m_freem(mrep);
 		goto nfsmout;
 	}
 	sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
 	mtx_lock(&nmp->nm_mtx);
 	sbp->f_iosize = nfs_iosize(nmp);
 	mtx_unlock(&nmp->nm_mtx);
 	if (v3) {
 		sbp->f_bsize = NFS_FABLKSIZE;
 		tquad = fxdr_hyper(&sfp->sf_tbytes);
 		sbp->f_blocks = tquad / NFS_FABLKSIZE;
 		tquad = fxdr_hyper(&sfp->sf_fbytes);
 		sbp->f_bfree = tquad / NFS_FABLKSIZE;
 		tquad = fxdr_hyper(&sfp->sf_abytes);
 		sbp->f_bavail = tquad / NFS_FABLKSIZE;
 		sbp->f_files = (fxdr_unsigned(int32_t,
 		    sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
 		sbp->f_ffree = (fxdr_unsigned(int32_t,
 		    sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
 	} else {
 		sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
 		sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
 		sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
 		sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
 		sbp->f_files = 0;
 		sbp->f_ffree = 0;
 	}
 	m_freem(mrep);
 nfsmout:
 	vput(vp);
 	vfs_unbusy(mp, td);
 	return (error);
 }
 
 /*
  * nfs version 3 fsinfo rpc call
  */
 int
 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsv3_fsinfo *fsp;
 	u_int32_t pref, max;
 	caddr_t bpos, dpos;
 	int error = 0, retattr;
 	struct mbuf *mreq, *mrep, *md, *mb;
 	u_int64_t maxfsize;
 	
 	nfsstats.rpccnt[NFSPROC_FSINFO]++;
 	mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1));
 	mb = mreq;
 	bpos = mtod(mb, caddr_t);
 	nfsm_fhtom(vp, 1);
 	nfsm_request(vp, NFSPROC_FSINFO, td, cred);
 	nfsm_postop_attr(vp, retattr);
 	if (!error) {
 		fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
 		pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
 		mtx_lock(&nmp->nm_mtx);
 		if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
 			nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
 				~(NFS_FABLKSIZE - 1);
 		max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
 		if (max < nmp->nm_wsize && max > 0) {
 			nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
 			if (nmp->nm_wsize == 0)
 				nmp->nm_wsize = max;
 		}
 		pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
 		if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
 			nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
 				~(NFS_FABLKSIZE - 1);
 		max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
 		if (max < nmp->nm_rsize && max > 0) {
 			nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
 			if (nmp->nm_rsize == 0)
 				nmp->nm_rsize = max;
 		}
 		pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
 		if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
 			nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
 				~(NFS_DIRBLKSIZ - 1);
 		if (max < nmp->nm_readdirsize && max > 0) {
 			nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
 			if (nmp->nm_readdirsize == 0)
 				nmp->nm_readdirsize = max;
 		}
 		maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
 		if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
 			nmp->nm_maxfilesize = maxfsize;
 		nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
 		nmp->nm_state |= NFSSTA_GOTFSINFO;
 		mtx_unlock(&nmp->nm_mtx);
 	}
 	m_freem(mrep);
 nfsmout:
 	return (error);
 }
 
 /*
  * Mount a remote root fs via. nfs. This depends on the info in the
  * nfs_diskless structure that has been filled in properly by some primary
  * bootstrap.
  * It goes something like this:
  * - do enough of "ifconfig" by calling ifioctl() so that the system
  *   can talk to the server
  * - If nfs_diskless.mygateway is filled in, use that address as
  *   a default gateway.
  * - build the rootfs mount point and call mountnfs() to do the rest.
  *
  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
  * structure, as well as other global NFS client variables here, as
  * nfs_mountroot() will be called once in the boot before any other NFS
  * client activity occurs.
  */
 int
 nfs_mountroot(struct mount *mp, struct thread *td)
 {
 	struct nfsv3_diskless *nd = &nfsv3_diskless;
 	struct socket *so;
 	struct vnode *vp;
 	struct ifreq ir;
 	int error, i;
 	u_long l;
 	char buf[128];
 	char *cp;
 
 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
 #elif defined(NFS_ROOT)
 	nfs_setup_diskless();
 #endif
 
 	if (nfs_diskless_valid == 0)
 		return (-1);
 	if (nfs_diskless_valid == 1)
 		nfs_convert_diskless();
 
 	/*
 	 * XXX splnet, so networks will receive...
 	 */
 	splnet();
 
 	/*
 	 * Do enough of ifconfig(8) so that the critical net interface can
 	 * talk to the server.
 	 */
 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
 	    td->td_ucred, td);
 	if (error)
 		panic("nfs_mountroot: socreate(%04x): %d",
 			nd->myif.ifra_addr.sa_family, error);
 
 #if 0 /* XXX Bad idea */
 	/*
 	 * We might not have been told the right interface, so we pass
 	 * over the first ten interfaces of the same kind, until we get
 	 * one of them configured.
 	 */
 
 	for (i = strlen(nd->myif.ifra_name) - 1;
 		nd->myif.ifra_name[i] >= '0' &&
 		nd->myif.ifra_name[i] <= '9';
 		nd->myif.ifra_name[i] ++) {
 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
 		if(!error)
 			break;
 	}
 #endif
 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
 	if (error)
 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
 	if ((cp = getenv("boot.netif.mtu")) != NULL) {
 		ir.ifr_mtu = strtol(cp, NULL, 10);
 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
 		freeenv(cp);
 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
 		if (error)
 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
 	}
 	soclose(so);
 
 	/*
 	 * If the gateway field is filled in, set it as the default route.
 	 * Note that pxeboot will set a default route of 0 if the route
 	 * is not set by the DHCP server.  Check also for a value of 0
 	 * to avoid panicking inappropriately in that situation.
 	 */
 	if (nd->mygateway.sin_len != 0 &&
 	    nd->mygateway.sin_addr.s_addr != 0) {
 		struct sockaddr_in mask, sin;
 
 		bzero((caddr_t)&mask, sizeof(mask));
 		sin = mask;
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(sin);
+                /* XXX MRT use table 0 for this sort of thing */
 		error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
 		    (struct sockaddr *)&nd->mygateway,
 		    (struct sockaddr *)&mask,
 		    RTF_UP | RTF_GATEWAY, NULL);
 		if (error)
 			panic("nfs_mountroot: RTM_ADD: %d", error);
 	}
 
 	/*
 	 * Create the rootfs mount point.
 	 */
 	nd->root_args.fh = nd->root_fh;
 	nd->root_args.fhsize = nd->root_fhsize;
 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
 		(l >> 24) & 0xff, (l >> 16) & 0xff,
 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
 	printf("NFS ROOT: %s\n", buf);
 	if ((error = nfs_mountdiskless(buf,
 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
 		return (error);
 	}
 
 	/*
 	 * This is not really an nfs issue, but it is much easier to
 	 * set hostname here and then let the "/etc/rc.xxx" files
 	 * mount the right /var based upon its preset value.
 	 */
 	bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
 	hostname[MAXHOSTNAMELEN - 1] = '\0';
 	for (i = 0; i < MAXHOSTNAMELEN; i++)
 		if (hostname[i] == '\0')
 			break;
 	inittodr(ntohl(nd->root_time));
 	return (0);
 }
 
 /*
  * Internal version of mount system call for diskless setup.
  */
 static int
 nfs_mountdiskless(char *path,
     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
     struct vnode **vpp, struct mount *mp)
 {
 	struct sockaddr *nam;
 	int error;
 
 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
 	if ((error = mountnfs(args, mp, nam, path, vpp,
 	    td->td_ucred)) != 0) {
 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
 		return (error);
 	}
 	return (0);
 }
 
 static void
 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp)
 {
 	int s;
 	int adjsock;
 	int maxio;
 
 	s = splnet();
 
 	/*
 	 * Set read-only flag if requested; otherwise, clear it if this is
 	 * an update.  If this is not an update, then either the read-only
 	 * flag is already clear, or this is a root mount and it was set
 	 * intentionally at some previous point.
 	 */
 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_RDONLY;
 		MNT_IUNLOCK(mp);
 	} else if (mp->mnt_flag & MNT_UPDATE) {
 		MNT_ILOCK(mp);
 		mp->mnt_flag &= ~MNT_RDONLY;
 		MNT_IUNLOCK(mp);
 	}
 
 	/*
 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
 	 * no sense in that context.  Also, set up appropriate retransmit
 	 * and soft timeout behavior.
 	 */
 	if (argp->sotype == SOCK_STREAM) {
 		nmp->nm_flag &= ~NFSMNT_NOCONN;
 		nmp->nm_flag |= NFSMNT_DUMBTIMR;
 		nmp->nm_timeo = NFS_MAXTIMEO;
 		nmp->nm_retry = NFS_RETRANS_TCP;
 	}
 
 	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
 	if ((argp->flags & NFSMNT_NFSV3) == 0)
 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 
 	/* Re-bind if rsrvd port requested and wasn't on one */
 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
 		  && (argp->flags & NFSMNT_RESVPORT);
 	/* Also re-bind if we're switching to/from a connected UDP socket */
 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
 		    (argp->flags & NFSMNT_NOCONN));
 
 	/* Update flags atomically.  Don't change the lock bits. */
 	nmp->nm_flag = argp->flags | nmp->nm_flag;
 	splx(s);
 
 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
 		if (nmp->nm_timeo < NFS_MINTIMEO)
 			nmp->nm_timeo = NFS_MINTIMEO;
 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
 			nmp->nm_timeo = NFS_MAXTIMEO;
 	}
 
 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
 		nmp->nm_retry = argp->retrans;
 		if (nmp->nm_retry > NFS_MAXREXMIT)
 			nmp->nm_retry = NFS_MAXREXMIT;
 	}
 
 	if (argp->flags & NFSMNT_NFSV3) {
 		if (argp->sotype == SOCK_DGRAM)
 			maxio = NFS_MAXDGRAMDATA;
 		else
 			maxio = NFS_MAXDATA;
 	} else
 		maxio = NFS_V2MAXDATA;
 
 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
 		nmp->nm_wsize = argp->wsize;
 		/* Round down to multiple of blocksize */
 		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
 		if (nmp->nm_wsize <= 0)
 			nmp->nm_wsize = NFS_FABLKSIZE;
 	}
 	if (nmp->nm_wsize > maxio)
 		nmp->nm_wsize = maxio;
 	if (nmp->nm_wsize > MAXBSIZE)
 		nmp->nm_wsize = MAXBSIZE;
 
 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
 		nmp->nm_rsize = argp->rsize;
 		/* Round down to multiple of blocksize */
 		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
 		if (nmp->nm_rsize <= 0)
 			nmp->nm_rsize = NFS_FABLKSIZE;
 	}
 	if (nmp->nm_rsize > maxio)
 		nmp->nm_rsize = maxio;
 	if (nmp->nm_rsize > MAXBSIZE)
 		nmp->nm_rsize = MAXBSIZE;
 
 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
 		nmp->nm_readdirsize = argp->readdirsize;
 	}
 	if (nmp->nm_readdirsize > maxio)
 		nmp->nm_readdirsize = maxio;
 	if (nmp->nm_readdirsize > nmp->nm_rsize)
 		nmp->nm_readdirsize = nmp->nm_rsize;
 
 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
 		nmp->nm_acregmin = argp->acregmin;
 	else
 		nmp->nm_acregmin = NFS_MINATTRTIMO;
 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
 		nmp->nm_acregmax = argp->acregmax;
 	else
 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
 		nmp->nm_acdirmin = argp->acdirmin;
 	else
 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
 		nmp->nm_acdirmax = argp->acdirmax;
 	else
 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
 		nmp->nm_acdirmin = nmp->nm_acdirmax;
 	if (nmp->nm_acregmin > nmp->nm_acregmax)
 		nmp->nm_acregmin = nmp->nm_acregmax;
 
 	if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
 		if (argp->maxgrouplist <= NFS_MAXGRPS)
 			nmp->nm_numgrps = argp->maxgrouplist;
 		else
 			nmp->nm_numgrps = NFS_MAXGRPS;
 	}
 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
 		if (argp->readahead <= NFS_MAXRAHEAD)
 			nmp->nm_readahead = argp->readahead;
 		else
 			nmp->nm_readahead = NFS_MAXRAHEAD;
 	}
 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
 		if (argp->wcommitsize < nmp->nm_wsize)
 			nmp->nm_wcommitsize = nmp->nm_wsize;
 		else
 			nmp->nm_wcommitsize = argp->wcommitsize;
 	}
 	if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
 		if (argp->deadthresh <= NFS_MAXDEADTHRESH)
 			nmp->nm_deadthresh = argp->deadthresh;
 		else
 			nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
 	}
 
 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
 		    (nmp->nm_soproto != argp->proto));
 	nmp->nm_sotype = argp->sotype;
 	nmp->nm_soproto = argp->proto;
 
 	if (nmp->nm_so && adjsock) {
 		nfs_safedisconnect(nmp);
 		if (nmp->nm_sotype == SOCK_DGRAM)
 			while (nfs_connect(nmp, NULL)) {
 				printf("nfs_args: retrying connect\n");
 				(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
 			}
 	}
 }
 
 static const char *nfs_opts[] = { "from", "nfs_args",
     "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
     "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
     "readdirsize", "soft", "hard", "mntudp", "tcp", "wsize", "rsize",
     "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", 
     NULL };
 
 /*
  * VFS Operations.
  *
  * mount system call
  * It seems a bit dumb to copyinstr() the host and path here and then
  * bcopy() them in mountnfs(), but I wanted to detect errors before
  * doing the sockargs() call because sockargs() allocates an mbuf and
  * an error after that means that I have to release the mbuf.
  */
 /* ARGSUSED */
 static int
 nfs_mount(struct mount *mp, struct thread *td)
 {
 	struct nfs_args args = {
 	    .version = NFS_ARGSVERSION,
 	    .addr = NULL,
 	    .addrlen = sizeof (struct sockaddr_in),
 	    .sotype = SOCK_STREAM,
 	    .proto = 0,
 	    .fh = NULL,
 	    .fhsize = 0,
 	    .flags = NFSMNT_RESVPORT,
 	    .wsize = NFS_WSIZE,
 	    .rsize = NFS_RSIZE,
 	    .readdirsize = NFS_READDIRSIZE,
 	    .timeo = 10,
 	    .retrans = NFS_RETRANS,
 	    .maxgrouplist = NFS_MAXGRPS,
 	    .readahead = NFS_DEFRAHEAD,
 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
 	    .deadthresh = NFS_MAXDEADTHRESH,	/* was: NQ_DEADTHRESH */
 	    .hostname = NULL,
 	    /* args version 4 */
 	    .acregmin = NFS_MINATTRTIMO,
 	    .acregmax = NFS_MAXATTRTIMO,
 	    .acdirmin = NFS_MINDIRATTRTIMO,
 	    .acdirmax = NFS_MAXDIRATTRTIMO,
 	};
 	int error;
 	struct sockaddr *nam;
 	struct vnode *vp;
 	char hst[MNAMELEN];
 	size_t len;
 	u_char nfh[NFSX_V3FHMAX];
 
 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
 		error = EINVAL;
 		goto out;
 	}
 
 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
 		error = nfs_mountroot(mp, td);
 		goto out;
 	}
 
 	error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args);
 	if (error)
 		goto out;
 
 	if (args.version != NFS_ARGSVERSION) {
 		error = EPROGMISMATCH;
 		goto out;
 	}
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		struct nfsmount *nmp = VFSTONFS(mp);
 
 		if (nmp == NULL) {
 			error = EIO;
 			goto out;
 		}
 		/*
 		 * When doing an update, we can't change from or to
 		 * v3, switch lockd strategies or change cookie translation
 		 */
 		args.flags = (args.flags &
 		    ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
 		    (nmp->nm_flag &
 			(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
 		nfs_decode_args(mp, nmp, &args);
 		goto out;
 	}
 
 	/*
 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
 	 * or no-connection mode for those protocols that support 
 	 * no-connection mode (the flag will be cleared later for protocols
 	 * that do not support no-connection mode).  This will allow a client
 	 * to receive replies from a different IP then the request was
 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
 	 * not 0.
 	 */
 	if (nfs_ip_paranoia == 0)
 		args.flags |= NFSMNT_NOCONN;
 	if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
 		error = EINVAL;
 		goto out;
 	}
 	error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
 	if (error)
 		goto out;
 	error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
 	if (error)
 		goto out;
 	bzero(&hst[len], MNAMELEN - len);
 	/* sockargs() call must be after above copyin() calls */
 	error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
 	if (error)
 		goto out;
 	args.fh = nfh;
 	error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
 out:
 	if (!error) {
 		MNT_ILOCK(mp);
 		mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
 		MNT_IUNLOCK(mp);
 	}
 	return (error);
 }
 
 
 /*
  * VFS Operations.
  *
  * mount system call
  * It seems a bit dumb to copyinstr() the host and path here and then
  * bcopy() them in mountnfs(), but I wanted to detect errors before
  * doing the sockargs() call because sockargs() allocates an mbuf and
  * an error after that means that I have to release the mbuf.
  */
 /* ARGSUSED */
 static int
 nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
 {
 	int error;
 	struct nfs_args args;
 
 	error = copyin(data, &args, sizeof (struct nfs_args));
 	if (error)
 		return error;
 
 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
 
 	error = kernel_mount(ma, flags);
 	return (error);
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
     char *hst, struct vnode **vpp, struct ucred *cred)
 {
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	int error;
 	struct vattr attrs;
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		nmp = VFSTONFS(mp);
 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
 		FREE(nam, M_SONAME);
 		return (0);
 	} else {
 		nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
 		bzero((caddr_t)nmp, sizeof (struct nfsmount));
 		TAILQ_INIT(&nmp->nm_bufq);
 		mp->mnt_data = nmp;
 	}
 	vfs_getnewfsid(mp);
 	nmp->nm_mountp = mp;
 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);			
 
 	/*
 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
 	 * high, depending on whether we end up with negative offsets in
 	 * the client or server somewhere.  2GB-1 may be safer.
 	 *
 	 * For V3, nfs_fsinfo will adjust this as necessary.  Assume maximum
 	 * that we can handle until we find out otherwise.
 	 * XXX Our "safe" limit on the client is what we can store in our
 	 * buffer cache using signed(!) block numbers.
 	 */
 	if ((argp->flags & NFSMNT_NFSV3) == 0)
 		nmp->nm_maxfilesize = 0xffffffffLL;
 	else
 		nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1;
 
 	nmp->nm_timeo = NFS_TIMEO;
 	nmp->nm_retry = NFS_RETRANS;
 	if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
 		nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
 	} else {
 		nmp->nm_wsize = NFS_WSIZE;
 		nmp->nm_rsize = NFS_RSIZE;
 	}
 	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
 	nmp->nm_readdirsize = NFS_READDIRSIZE;
 	nmp->nm_numgrps = NFS_MAXGRPS;
 	nmp->nm_readahead = NFS_DEFRAHEAD;
 	nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
 	if (nmp->nm_tprintf_delay < 0)
 		nmp->nm_tprintf_delay = 0;
 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
 	if (nmp->nm_tprintf_initial_delay < 0)
 		nmp->nm_tprintf_initial_delay = 0;
 	nmp->nm_fhsize = argp->fhsize;
 	bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
 	nmp->nm_nam = nam;
 	/* Set up the sockets and per-host congestion */
 	nmp->nm_sotype = argp->sotype;
 	nmp->nm_soproto = argp->proto;
 	nmp->nm_rpcops = &nfs_rpcops;
 
 	nfs_decode_args(mp, nmp, argp);
 
 	/*
 	 * For Connection based sockets (TCP,...) defer the connect until
 	 * the first request, in case the server is not responding.
 	 */
 	if (nmp->nm_sotype == SOCK_DGRAM &&
 		(error = nfs_connect(nmp, NULL)))
 		goto bad;
 
 	/*
 	 * This is silly, but it has to be set so that vinifod() works.
 	 * We do not want to do an nfs_statfs() here since we can get
 	 * stuck on a dead server and we are holding a lock on the mount
 	 * point.
 	 */
 	mtx_lock(&nmp->nm_mtx);
 	mp->mnt_stat.f_iosize = nfs_iosize(nmp);
 	mtx_unlock(&nmp->nm_mtx);
 	/*
 	 * A reference count is needed on the nfsnode representing the
 	 * remote root.  If this object is not persistent, then backward
 	 * traversals of the mount point (i.e. "..") will not work if
 	 * the nfsnode gets flushed out of the cache. Ufs does not have
 	 * this problem, because one can identify root inodes by their
 	 * number == ROOTINO (2).
 	 */
 	error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
 	if (error)
 		goto bad;
 	*vpp = NFSTOV(np);
 
 	/*
 	 * Get file attributes and transfer parameters for the
 	 * mountpoint.  This has the side effect of filling in
 	 * (*vpp)->v_type with the correct value.
 	 */
 	if (argp->flags & NFSMNT_NFSV3)
 		nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
 	else
 		VOP_GETATTR(*vpp, &attrs, curthread->td_ucred, curthread);
 
 	/*
 	 * Lose the lock but keep the ref.
 	 */
 	VOP_UNLOCK(*vpp, 0);
 
 	return (0);
 bad:
 	nfs_disconnect(nmp);
 	mtx_destroy(&nmp->nm_mtx);
 	uma_zfree(nfsmount_zone, nmp);
 	FREE(nam, M_SONAME);
 	return (error);
 }
 
 /*
  * unmount system call
  */
 static int
 nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
 {
 	struct nfsmount *nmp;
 	int error, flags = 0;
 
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	nmp = VFSTONFS(mp);
 	/*
 	 * Goes something like this..
 	 * - Call vflush() to clear out vnodes for this filesystem
 	 * - Close the socket
 	 * - Free up the data structures
 	 */
 	/* In the forced case, cancel any outstanding requests. */
 	if (flags & FORCECLOSE) {
 		error = nfs_nmcancelreqs(nmp);
 		if (error)
 			goto out;
 	}
 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
 	error = vflush(mp, 1, flags, td);
 	if (error)
 		goto out;
 
 	/*
 	 * We are now committed to the unmount.
 	 */
 	nfs_disconnect(nmp);
 	FREE(nmp->nm_nam, M_SONAME);
 
 	mtx_destroy(&nmp->nm_mtx);
 	uma_zfree(nfsmount_zone, nmp);
 out:
 	return (error);
 }
 
 /*
  * Return root of a filesystem
  */
 static int
 nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
 {
 	struct vnode *vp;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	int error;
 
 	nmp = VFSTONFS(mp);
 	error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
 	if (error)
 		return error;
 	vp = NFSTOV(np);
 	/*
 	 * Get transfer parameters and attributes for root vnode once.
 	 */
 	mtx_lock(&nmp->nm_mtx);
 	if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
 	    (nmp->nm_flag & NFSMNT_NFSV3)) {
 		mtx_unlock(&nmp->nm_mtx);
 		nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
 	} else 
 		mtx_unlock(&nmp->nm_mtx);
 	if (vp->v_type == VNON)
 	    vp->v_type = VDIR;
 	vp->v_vflag |= VV_ROOT;
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Flush out the buffer cache
  */
 /* ARGSUSED */
 static int
 nfs_sync(struct mount *mp, int waitfor, struct thread *td)
 {
 	struct vnode *vp, *mvp;
 	int error, allerror = 0;
 
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
 	MNT_ILOCK(mp);
 loop:
 	MNT_VNODE_FOREACH(vp, mp, mvp) {
 		VI_LOCK(vp);
 		MNT_IUNLOCK(mp);
 		/* XXX Racy bv_cnt check. */
 		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY) {
 			VI_UNLOCK(vp);
 			MNT_ILOCK(mp);
 			continue;
 		}
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 			MNT_ILOCK(mp);
 			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
 			goto loop;
 		}
 		error = VOP_FSYNC(vp, waitfor, td);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
 
 		MNT_ILOCK(mp);
 	}
 	MNT_IUNLOCK(mp);
 	return (allerror);
 }
 
 static int
 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
 {
 	struct nfsmount *nmp = VFSTONFS(mp);
 	struct vfsquery vq;
 	int error;
 
 	bzero(&vq, sizeof(vq));
 	switch (op) {
 #if 0
 	case VFS_CTL_NOLOCKS:
 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
  		if (req->oldptr != NULL) {
  			error = SYSCTL_OUT(req, &val, sizeof(val));
  			if (error)
  				return (error);
  		}
  		if (req->newptr != NULL) {
  			error = SYSCTL_IN(req, &val, sizeof(val));
  			if (error)
  				return (error);
 			if (val)
 				nmp->nm_flag |= NFSMNT_NOLOCKS;
 			else
 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
  		}
 		break;
 #endif
 	case VFS_CTL_QUERY:
 		mtx_lock(&nmp->nm_mtx);
 		if (nmp->nm_state & NFSSTA_TIMEO)
 			vq.vq_flags |= VQ_NOTRESP;
 		mtx_unlock(&nmp->nm_mtx);
 #if 0
 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
 			vq.vq_flags |= VQ_NOTRESPLOCK;
 #endif
 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
 		break;
  	case VFS_CTL_TIMEO:
  		if (req->oldptr != NULL) {
  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
  			    sizeof(nmp->nm_tprintf_initial_delay));
  			if (error)
  				return (error);
  		}
  		if (req->newptr != NULL) {
 			error = vfs_suser(mp, req->td);
 			if (error)
 				return (error);
  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
  			    sizeof(nmp->nm_tprintf_initial_delay));
  			if (error)
  				return (error);
  			if (nmp->nm_tprintf_initial_delay < 0)
  				nmp->nm_tprintf_initial_delay = 0;
  		}
 		break;
 	default:
 		return (ENOTSUP);
 	}
 	return (0);
 }
Index: head/sys/sys/domain.h
===================================================================
--- head/sys/sys/domain.h	(revision 178887)
+++ head/sys/sys/domain.h	(revision 178888)
@@ -1,76 +1,82 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)domain.h	8.1 (Berkeley) 6/2/93
  * $FreeBSD$
  */
 
 #ifndef _SYS_DOMAIN_H_
 #define _SYS_DOMAIN_H_
 
 /*
  * Structure per communications domain.
  */
 
 /*
  * Forward structure declarations for function prototypes [sic].
  */
 struct	mbuf;
 struct	ifnet;
 
 struct domain {
 	int	dom_family;		/* AF_xxx */
 	char	*dom_name;
 	void	(*dom_init)		/* initialize domain data structures */
 		(void);
 	int	(*dom_externalize)	/* externalize access rights */
 		(struct mbuf *, struct mbuf **);
 	void	(*dom_dispose)		/* dispose of internalized rights */
 		(struct mbuf *);
 	struct	protosw *dom_protosw, *dom_protoswNPROTOSW;
 	struct	domain *dom_next;
 	int	(*dom_rtattach)		/* initialize routing table */
 		(void **, int);
 	int	dom_rtoffset;		/* an arg to rtattach, in bits */
+		/* XXX MRT.
+		 * rtoffset May be 0 if the domain supplies its own rtattach(),
+		 * in which case, a 0 indicates it's being called from 
+		 * vfs_export.c (HACK)  Only for AF_INET{,6} at this time.
+		 * Temporary ABI compat hack.. fix post RELENG_7
+		 */
 	int	dom_maxrtkey;		/* for routing layer */
 	void	*(*dom_ifattach)(struct ifnet *);
 	void	(*dom_ifdetach)(struct ifnet *, void *);
 					/* af-dependent data on ifnet */
 };
 
 #ifdef _KERNEL
 extern int	domain_init_status;
 extern struct	domain *domains;
 extern void	net_add_domain(void *);
 
 #define DOMAIN_SET(name) \
 	SYSINIT(domain_ ## name, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, net_add_domain, & name ## domain)
 
 #endif
 
 #endif
Index: head/sys/sys/mbuf.h
===================================================================
--- head/sys/sys/mbuf.h	(revision 178887)
+++ head/sys/sys/mbuf.h	(revision 178888)
@@ -1,970 +1,988 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mbuf.h	8.5 (Berkeley) 2/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_MBUF_H_
 #define	_SYS_MBUF_H_
 
 /* XXX: These includes suck. Sorry! */
 #include <sys/queue.h>
 #ifdef _KERNEL
 #include <sys/systm.h>
 #include <vm/uma.h>
 #ifdef WITNESS
 #include <sys/lock.h>
 #endif
 #endif
 
 /*
  * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead.
  * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in
  * sys/param.h), which has no additional overhead and is used instead of the
  * internal data area; this is done when at least MINCLSIZE of data must be
  * stored.  Additionally, it is possible to allocate a separate buffer
  * externally and attach it to the mbuf in a way similar to that of mbuf
  * clusters.
  */
 #define	MLEN		(MSIZE - sizeof(struct m_hdr))	/* normal data len */
 #define	MHLEN		(MLEN - sizeof(struct pkthdr))	/* data len w/pkthdr */
 #define	MINCLSIZE	(MHLEN + 1)	/* smallest amount to put in cluster */
 #define	M_MAXCOMPRESS	(MHLEN / 2)	/* max amount to copy for compression */
 
 #ifdef _KERNEL
 /*-
  * Macros for type conversion:
  * mtod(m, t)	-- Convert mbuf pointer to data pointer of correct type.
  * dtom(x)	-- Convert data pointer within mbuf to mbuf pointer (XXX).
  */
 #define	mtod(m, t)	((t)((m)->m_data))
 #define	dtom(x)		((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
 
 /*
  * Argument structure passed to UMA routines during mbuf and packet
  * allocations.
  */
 struct mb_args {
 	int	flags;	/* Flags for mbuf being allocated */
 	short	type;	/* Type of mbuf being allocated */
 };
 #endif /* _KERNEL */
 
 #if defined(__LP64__)
 #define M_HDR_PAD    6
 #else
 #define M_HDR_PAD    2
 #endif
 
 /*
  * Header present at the beginning of every mbuf.
  */
 struct m_hdr {
 	struct mbuf	*mh_next;	/* next buffer in chain */
 	struct mbuf	*mh_nextpkt;	/* next chain in queue/record */
 	caddr_t		 mh_data;	/* location of data */
 	int		 mh_len;	/* amount of data in this mbuf */
 	int		 mh_flags;	/* flags; see below */
 	short		 mh_type;	/* type of data in this mbuf */
 	uint8_t          pad[M_HDR_PAD];/* word align                  */
 };
 
 /*
  * Packet tag structure (see below for details).
  */
 struct m_tag {
 	SLIST_ENTRY(m_tag)	m_tag_link;	/* List of packet tags */
 	u_int16_t		m_tag_id;	/* Tag ID */
 	u_int16_t		m_tag_len;	/* Length of data */
 	u_int32_t		m_tag_cookie;	/* ABI/Module ID */
 	void			(*m_tag_free)(struct m_tag *);
 };
 
 /*
  * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
  */
 struct pkthdr {
 	struct ifnet	*rcvif;		/* rcv interface */
 	/* variables for ip and tcp reassembly */
 	void		*header;	/* pointer to packet header */
 	int		 len;		/* total packet length */
 	/* variables for hardware checksum */
 	int		 csum_flags;	/* flags regarding checksum */
 	int		 csum_data;	/* data field used by csum routines */
 	u_int16_t	 tso_segsz;	/* TSO segment size */
 	u_int16_t	 ether_vtag;	/* Ethernet 802.1p+q vlan tag */
 	SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
 };
 
 /*
  * Description of external storage mapped into mbuf; valid only if M_EXT is
  * set.
  */
 struct m_ext {
 	caddr_t		 ext_buf;	/* start of buffer */
 	void		(*ext_free)	/* free routine if not the usual */
 			    (void *, void *);
 	void		*ext_arg1;	/* optional argument pointer */
 	void		*ext_arg2;	/* optional argument pointer */
 	u_int		 ext_size;	/* size of buffer, for ext_free */
 	volatile u_int	*ref_cnt;	/* pointer to ref count info */
 	int		 ext_type;	/* type of external storage */
 };
 
 /*
  * The core of the mbuf object along with some shortcut defines for practical
  * purposes.
  */
 struct mbuf {
 	struct m_hdr	m_hdr;
 	union {
 		struct {
 			struct pkthdr	MH_pkthdr;	/* M_PKTHDR set */
 			union {
 				struct m_ext	MH_ext;	/* M_EXT set */
 				char		MH_databuf[MHLEN];
 			} MH_dat;
 		} MH;
 		char	M_databuf[MLEN];		/* !M_PKTHDR, !M_EXT */
 	} M_dat;
 };
 #define	m_next		m_hdr.mh_next
 #define	m_len		m_hdr.mh_len
 #define	m_data		m_hdr.mh_data
 #define	m_type		m_hdr.mh_type
 #define	m_flags		m_hdr.mh_flags
 #define	m_nextpkt	m_hdr.mh_nextpkt
 #define	m_act		m_nextpkt
 #define	m_pkthdr	M_dat.MH.MH_pkthdr
 #define	m_ext		M_dat.MH.MH_dat.MH_ext
 #define	m_pktdat	M_dat.MH.MH_dat.MH_databuf
 #define	m_dat		M_dat.M_databuf
 
 /*
  * mbuf flags.
  */
 #define	M_EXT		0x00000001 /* has associated external storage */
 #define	M_PKTHDR	0x00000002 /* start of record */
 #define	M_EOR		0x00000004 /* end of record */
 #define	M_RDONLY	0x00000008 /* associated data is marked read-only */
 #define	M_PROTO1	0x00000010 /* protocol-specific */
 #define	M_PROTO2	0x00000020 /* protocol-specific */
 #define	M_PROTO3	0x00000040 /* protocol-specific */
 #define	M_PROTO4	0x00000080 /* protocol-specific */
 #define	M_PROTO5	0x00000100 /* protocol-specific */
 #define	M_BCAST		0x00000200 /* send/received as link-level broadcast */
 #define	M_MCAST		0x00000400 /* send/received as link-level multicast */
 #define	M_FRAG		0x00000800 /* packet is a fragment of a larger packet */
 #define	M_FIRSTFRAG	0x00001000 /* packet is first fragment */
 #define	M_LASTFRAG	0x00002000 /* packet is last fragment */
 #define	M_SKIP_FIREWALL	0x00004000 /* skip firewall processing */
 #define	M_FREELIST	0x00008000 /* mbuf is on the free list */
 #define	M_VLANTAG	0x00010000 /* ether_vtag is valid */
 #define	M_PROMISC	0x00020000 /* packet was not for us */
 #define	M_NOFREE	0x00040000 /* do not free mbuf, embedded in cluster */
 #define	M_PROTO6	0x00080000 /* protocol-specific */
 #define	M_PROTO7	0x00100000 /* protocol-specific */
 #define	M_PROTO8	0x00200000 /* protocol-specific */
+/*
+ * For RELENG_{6,7} steal these flags for limited multiple routing table
+ * support. In RELENG_8 and beyond, use just one flag and a tag.
+ */
+#define	M_FIB		0xF0000000 /* steal some bits to store fib number. */
 
 #define	M_NOTIFICATION	M_PROTO5    /* SCTP notification */
 
 /*
  * Flags to purge when crossing layers.
  */
 #define	M_PROTOFLAGS \
     (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8)
 
 /*
  * Flags preserved when copying m_pkthdr.
  */
 #define	M_COPYFLAGS \
     (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\
-     M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC)
+     M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB)
 
 /*
  * External buffer types: identify ext_buf type.
  */
 #define	EXT_CLUSTER	1	/* mbuf cluster */
 #define	EXT_SFBUF	2	/* sendfile(2)'s sf_bufs */
 #define	EXT_JUMBOP	3	/* jumbo cluster 4096 bytes */
 #define	EXT_JUMBO9	4	/* jumbo cluster 9216 bytes */
 #define	EXT_JUMBO16	5	/* jumbo cluster 16184 bytes */
 #define	EXT_PACKET	6	/* mbuf+cluster from packet zone */
 #define	EXT_MBUF	7	/* external mbuf reference (M_IOVEC) */
 #define	EXT_NET_DRV	100	/* custom ext_buf provided by net driver(s) */
 #define	EXT_MOD_TYPE	200	/* custom module's ext_buf type */
 #define	EXT_DISPOSABLE	300	/* can throw this buffer away w/page flipping */
 #define	EXT_EXTREF	400	/* has externally maintained ref_cnt ptr */
 
 /*
  * Flags indicating hw checksum support and sw checksum requirements.  This
  * field can be directly tested against if_data.ifi_hwassist.
  */
 #define	CSUM_IP			0x0001		/* will csum IP */
 #define	CSUM_TCP		0x0002		/* will csum TCP */
 #define	CSUM_UDP		0x0004		/* will csum UDP */
 #define	CSUM_IP_FRAGS		0x0008		/* will csum IP fragments */
 #define	CSUM_FRAGMENT		0x0010		/* will do IP fragmentation */
 #define	CSUM_TSO		0x0020		/* will do TSO */
 
 #define	CSUM_IP_CHECKED		0x0100		/* did csum IP */
 #define	CSUM_IP_VALID		0x0200		/*   ... the csum is valid */
 #define	CSUM_DATA_VALID		0x0400		/* csum_data field is valid */
 #define	CSUM_PSEUDO_HDR		0x0800		/* csum_data has pseudo hdr */
 
 #define	CSUM_DELAY_DATA		(CSUM_TCP | CSUM_UDP)
 #define	CSUM_DELAY_IP		(CSUM_IP)	/* XXX add ipv6 here too? */
 
 /*
  * mbuf types.
  */
 #define	MT_NOTMBUF	0	/* USED INTERNALLY ONLY! Object is not mbuf */
 #define	MT_DATA		1	/* dynamic (data) allocation */
 #define	MT_HEADER	MT_DATA	/* packet header, use M_PKTHDR instead */
 #define	MT_SONAME	8	/* socket name */
 #define	MT_CONTROL	14	/* extra-data protocol message */
 #define	MT_OOBDATA	15	/* expedited data  */
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
 
 #define	MT_NOINIT	255	/* Not a type but a flag to allocate
 				   a non-initialized mbuf */
 
 #define MB_NOTAGS	0x1UL	/* no tags attached to mbuf */
 
 /*
  * General mbuf allocator statistics structure.
  *
  * Many of these statistics are no longer used; we instead track many
  * allocator statistics through UMA's built in statistics mechanism.
  */
 struct mbstat {
 	u_long	m_mbufs;	/* XXX */
 	u_long	m_mclusts;	/* XXX */
 
 	u_long	m_drain;	/* times drained protocols for space */
 	u_long	m_mcfail;	/* XXX: times m_copym failed */
 	u_long	m_mpfail;	/* XXX: times m_pullup failed */
 	u_long	m_msize;	/* length of an mbuf */
 	u_long	m_mclbytes;	/* length of an mbuf cluster */
 	u_long	m_minclsize;	/* min length of data to allocate a cluster */
 	u_long	m_mlen;		/* length of data in an mbuf */
 	u_long	m_mhlen;	/* length of data in a header mbuf */
 
-	/* Number of mbtypes (gives # elems in mbtypes[] array: */
+	/* Number of mbtypes (gives # elems in mbtypes[] array) */
 	short	m_numtypes;
 
 	/* XXX: Sendfile stats should eventually move to their own struct */
 	u_long	sf_iocnt;	/* times sendfile had to do disk I/O */
 	u_long	sf_allocfail;	/* times sfbuf allocation failed */
 	u_long	sf_allocwait;	/* times sfbuf allocation had to wait */
 };
 
 /*
  * Flags specifying how an allocation should be made.
  *
  * The flag to use is as follows:
  * - M_DONTWAIT or M_NOWAIT from an interrupt handler to not block allocation.
  * - M_WAIT or M_WAITOK from wherever it is safe to block.
  *
  * M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly and
  * if we cannot allocate immediately we may return NULL, whereas
  * M_WAIT/M_WAITOK means that if we cannot allocate resources we
  * will block until they are available, and thus never return NULL.
  *
  * XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT.
  */
 #define	MBTOM(how)	(how)
 #define	M_DONTWAIT	M_NOWAIT
 #define	M_TRYWAIT	M_WAITOK
 #define	M_WAIT		M_WAITOK
 
 /*
  * String names of mbuf-related UMA(9) and malloc(9) types.  Exposed to
  * !_KERNEL so that monitoring tools can look up the zones with
  * libmemstat(3).
  */
 #define	MBUF_MEM_NAME		"mbuf"
 #define	MBUF_CLUSTER_MEM_NAME	"mbuf_cluster"
 #define	MBUF_PACKET_MEM_NAME	"mbuf_packet"
 #define	MBUF_JUMBOP_MEM_NAME	"mbuf_jumbo_page"
 #define	MBUF_JUMBO9_MEM_NAME	"mbuf_jumbo_9k"
 #define	MBUF_JUMBO16_MEM_NAME	"mbuf_jumbo_16k"
 #define	MBUF_TAG_MEM_NAME	"mbuf_tag"
 #define	MBUF_EXTREFCNT_MEM_NAME	"mbuf_ext_refcnt"
 
 #ifdef _KERNEL
 
 #ifdef WITNESS
 #define	MBUF_CHECKSLEEP(how) do {					\
 	if (how == M_WAITOK)						\
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,		\
 		    "Sleeping in \"%s\"", __func__);			\
 } while (0)
 #else
 #define	MBUF_CHECKSLEEP(how)
 #endif
 
 /*
  * Network buffer allocation API
  *
  * The rest of it is defined in kern/kern_mbuf.c
  */
 
 extern uma_zone_t	zone_mbuf;
 extern uma_zone_t	zone_clust;
 extern uma_zone_t	zone_pack;
 extern uma_zone_t	zone_jumbop;
 extern uma_zone_t	zone_jumbo9;
 extern uma_zone_t	zone_jumbo16;
 extern uma_zone_t	zone_ext_refcnt;
 
 static __inline struct mbuf	*m_getcl(int how, short type, int flags);
 static __inline struct mbuf	*m_get(int how, short type);
 static __inline struct mbuf	*m_gethdr(int how, short type);
 static __inline struct mbuf	*m_getjcl(int how, short type, int flags,
 				    int size);
 static __inline struct mbuf	*m_getclr(int how, short type);	/* XXX */
 static __inline struct mbuf	*m_free(struct mbuf *m);
 static __inline void		 m_clget(struct mbuf *m, int how);
 static __inline void		*m_cljget(struct mbuf *m, int how, int size);
 static __inline void		 m_chtype(struct mbuf *m, short new_type);
 void				 mb_free_ext(struct mbuf *);
 static __inline struct mbuf	*m_last(struct mbuf *m);
 
 static __inline int
 m_gettype(int size)
 {
 	int type;
 	
 	switch (size) {
 	case MSIZE:
 		type = EXT_MBUF;
 		break;
 	case MCLBYTES:
 		type = EXT_CLUSTER;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case MJUMPAGESIZE:
 		type = EXT_JUMBOP;
 		break;
 #endif
 	case MJUM9BYTES:
 		type = EXT_JUMBO9;
 		break;
 	case MJUM16BYTES:
 		type = EXT_JUMBO16;
 		break;
 	default:
 		panic("%s: m_getjcl: invalid cluster size", __func__);
 	}
 
 	return (type);
 }
 
 static __inline uma_zone_t
 m_getzone(int size)
 {
 	uma_zone_t zone;
 	
 	switch (size) {
 	case MSIZE:
 		zone = zone_mbuf;
 		break;
 	case MCLBYTES:
 		zone = zone_clust;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case MJUMPAGESIZE:
 		zone = zone_jumbop;
 		break;
 #endif
 	case MJUM9BYTES:
 		zone = zone_jumbo9;
 		break;
 	case MJUM16BYTES:
 		zone = zone_jumbo16;
 		break;
 	default:
 		panic("%s: m_getjcl: invalid cluster type", __func__);
 	}
 
 	return (zone);
 }
 
 static __inline struct mbuf *
 m_get(int how, short type)
 {
 	struct mb_args args;
 
 	args.flags = 0;
 	args.type = type;
 	return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how)));
 }
 
 /*
  * XXX This should be deprecated, very little use.
  */
 static __inline struct mbuf *
 m_getclr(int how, short type)
 {
 	struct mbuf *m;
 	struct mb_args args;
 
 	args.flags = 0;
 	args.type = type;
 	m = uma_zalloc_arg(zone_mbuf, &args, how);
 	if (m != NULL)
 		bzero(m->m_data, MLEN);
 	return (m);
 }
 
 static __inline struct mbuf *
 m_gethdr(int how, short type)
 {
 	struct mb_args args;
 
 	args.flags = M_PKTHDR;
 	args.type = type;
 	return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how)));
 }
 
 static __inline struct mbuf *
 m_getcl(int how, short type, int flags)
 {
 	struct mb_args args;
 
 	args.flags = flags;
 	args.type = type;
 	return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how)));
 }
 
 /*
  * m_getjcl() returns an mbuf with a cluster of the specified size attached.
  * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
  *
  * XXX: This is rather large, should be real function maybe.
  */
 static __inline struct mbuf *
 m_getjcl(int how, short type, int flags, int size)
 {
 	struct mb_args args;
 	struct mbuf *m, *n;
 	uma_zone_t zone;
 
 	args.flags = flags;
 	args.type = type;
 
 	m = uma_zalloc_arg(zone_mbuf, &args, how);
 	if (m == NULL)
 		return (NULL);
 
 	zone = m_getzone(size);
 	n = uma_zalloc_arg(zone, m, how);
 	if (n == NULL) {
 		uma_zfree(zone_mbuf, m);
 		return (NULL);
 	}
 	return (m);
 }
 
 static __inline void
 m_free_fast(struct mbuf *m)
 {
 #ifdef INVARIANTS
 	if (m->m_flags & M_PKTHDR)
 		KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags"));
 #endif
 	
 	uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS);
 }
 
 static __inline struct mbuf *
 m_free(struct mbuf *m)
 {
 	struct mbuf *n = m->m_next;
 
 	if (m->m_flags & M_EXT)
 		mb_free_ext(m);
 	else if ((m->m_flags & M_NOFREE) == 0)
 		uma_zfree(zone_mbuf, m);
 	return (n);
 }
 
 static __inline void
 m_clget(struct mbuf *m, int how)
 {
 
 	if (m->m_flags & M_EXT)
 		printf("%s: %p mbuf already has cluster\n", __func__, m);
 	m->m_ext.ext_buf = (char *)NULL;
 	uma_zalloc_arg(zone_clust, m, how);
 	/*
 	 * On a cluster allocation failure, drain the packet zone and retry,
 	 * we might be able to loosen a few clusters up on the drain.
 	 */
 	if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
 		zone_drain(zone_pack);
 		uma_zalloc_arg(zone_clust, m, how);
 	}
 }
 
 /*
  * m_cljget() is different from m_clget() as it can allocate clusters without
  * attaching them to an mbuf.  In that case the return value is the pointer
  * to the cluster of the requested size.  If an mbuf was specified, it gets
  * the cluster attached to it and the return value can be safely ignored.
  * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
  */
 static __inline void *
 m_cljget(struct mbuf *m, int how, int size)
 {
 	uma_zone_t zone;
 
 	if (m && m->m_flags & M_EXT)
 		printf("%s: %p mbuf already has cluster\n", __func__, m);
 	if (m != NULL)
 		m->m_ext.ext_buf = NULL;
 
 	zone = m_getzone(size);
 	return (uma_zalloc_arg(zone, m, how));
 }
 
 static __inline void
 m_cljset(struct mbuf *m, void *cl, int type)
 {
 	uma_zone_t zone;
 	int size;
 	
 	switch (type) {
 	case EXT_CLUSTER:
 		size = MCLBYTES;
 		zone = zone_clust;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case EXT_JUMBOP:
 		size = MJUMPAGESIZE;
 		zone = zone_jumbop;
 		break;
 #endif
 	case EXT_JUMBO9:
 		size = MJUM9BYTES;
 		zone = zone_jumbo9;
 		break;
 	case EXT_JUMBO16:
 		size = MJUM16BYTES;
 		zone = zone_jumbo16;
 		break;
 	default:
 		panic("unknown cluster type");
 		break;
 	}
 
 	m->m_data = m->m_ext.ext_buf = cl;
 	m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
 	m->m_ext.ext_size = size;
 	m->m_ext.ext_type = type;
 	m->m_ext.ref_cnt = uma_find_refcnt(zone, cl);
 	m->m_flags |= M_EXT;
 
 }
 
 static __inline void
 m_chtype(struct mbuf *m, short new_type)
 {
 
 	m->m_type = new_type;
 }
 
 static __inline struct mbuf *
 m_last(struct mbuf *m)
 {
 
 	while (m->m_next)
 		m = m->m_next;
 	return (m);
 }
 
 /*
  * mbuf, cluster, and external object allocation macros (for compatibility
  * purposes).
  */
 #define	M_MOVE_PKTHDR(to, from)	m_move_pkthdr((to), (from))
 #define	MGET(m, how, type)	((m) = m_get((how), (type)))
 #define	MGETHDR(m, how, type)	((m) = m_gethdr((how), (type)))
 #define	MCLGET(m, how)		m_clget((m), (how))
 #define	MEXTADD(m, buf, size, free, arg1, arg2, flags, type)		\
     m_extadd((m), (caddr_t)(buf), (size), (free),(arg1),(arg2),(flags), (type))
 #define	m_getm(m, len, how, type)					\
     m_getm2((m), (len), (how), (type), M_PKTHDR)
 
 /*
  * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can
  * be both the local data payload, or an external buffer area, depending on
  * whether M_EXT is set).
  */
 #define	M_WRITABLE(m)	(!((m)->m_flags & M_RDONLY) &&			\
 			 (!(((m)->m_flags & M_EXT)) ||			\
 			 (*((m)->m_ext.ref_cnt) == 1)) )		\
 
 /* Check if the supplied mbuf has a packet header, or else panic. */
 #define	M_ASSERTPKTHDR(m)						\
 	KASSERT((m) != NULL && (m)->m_flags & M_PKTHDR,			\
 	    ("%s: no mbuf packet header!", __func__))
 
 /*
  * Ensure that the supplied mbuf is a valid, non-free mbuf.
  *
  * XXX: Broken at the moment.  Need some UMA magic to make it work again.
  */
 #define	M_ASSERTVALID(m)						\
 	KASSERT((((struct mbuf *)m)->m_flags & 0) == 0,			\
 	    ("%s: attempted use of a free mbuf!", __func__))
 
 /*
  * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an
  * object of the specified size at the end of the mbuf, longword aligned.
  */
 #define	M_ALIGN(m, len) do {						\
 	KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)),			\
 		("%s: M_ALIGN not normal mbuf", __func__));		\
 	KASSERT((m)->m_data == (m)->m_dat,				\
 		("%s: M_ALIGN not a virgin mbuf", __func__));		\
 	(m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1);		\
 } while (0)
 
 /*
  * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by
  * M_DUP/MOVE_PKTHDR.
  */
 #define	MH_ALIGN(m, len) do {						\
 	KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT),	\
 		("%s: MH_ALIGN not PKTHDR mbuf", __func__));		\
 	KASSERT((m)->m_data == (m)->m_pktdat,				\
 		("%s: MH_ALIGN not a virgin mbuf", __func__));		\
 	(m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1);		\
 } while (0)
 
 /*
  * Compute the amount of space available before the current start of data in
  * an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  */
 #define	M_LEADINGSPACE(m)						\
 	((m)->m_flags & M_EXT ?						\
 	    (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0):	\
 	    (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat :	\
 	    (m)->m_data - (m)->m_dat)
 
 /*
  * Compute the amount of space available after the end of data in an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  */
 #define	M_TRAILINGSPACE(m)						\
 	((m)->m_flags & M_EXT ?						\
 	    (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size	\
 		- ((m)->m_data + (m)->m_len) : 0) :			\
 	    &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
 
 /*
  * Arrange to prepend space of size plen to mbuf m.  If a new mbuf must be
  * allocated, how specifies whether to wait.  If the allocation fails, the
  * original mbuf chain is freed and m is set to NULL.
  */
 #define	M_PREPEND(m, plen, how) do {					\
 	struct mbuf **_mmp = &(m);					\
 	struct mbuf *_mm = *_mmp;					\
 	int _mplen = (plen);						\
 	int __mhow = (how);						\
 									\
 	MBUF_CHECKSLEEP(how);						\
 	if (M_LEADINGSPACE(_mm) >= _mplen) {				\
 		_mm->m_data -= _mplen;					\
 		_mm->m_len += _mplen;					\
 	} else								\
 		_mm = m_prepend(_mm, _mplen, __mhow);			\
 	if (_mm != NULL && _mm->m_flags & M_PKTHDR)			\
 		_mm->m_pkthdr.len += _mplen;				\
 	*_mmp = _mm;							\
 } while (0)
 
 /*
  * Change mbuf to new type.  This is a relatively expensive operation and
  * should be avoided.
  */
 #define	MCHTYPE(m, t)	m_chtype((m), (t))
 
 /* Length to m_copy to copy all. */
 #define	M_COPYALL	1000000000
 
 /* Compatibility with 4.3. */
 #define	m_copy(m, o, l)	m_copym((m), (o), (l), M_DONTWAIT)
 
 extern int		max_datalen;	/* MHLEN - max_hdr */
 extern int		max_hdr;	/* Largest link + protocol header */
 extern int		max_linkhdr;	/* Largest link-level header */
 extern int		max_protohdr;	/* Largest protocol header */
 extern struct mbstat	mbstat;		/* General mbuf stats/infos */
 extern int		nmbclusters;	/* Maximum number of clusters */
 
 struct uio;
 
 void		 m_adj(struct mbuf *, int);
 void		 m_align(struct mbuf *, int);
 int		 m_apply(struct mbuf *, int, int,
 		    int (*)(void *, void *, u_int), void *);
 int		 m_append(struct mbuf *, int, c_caddr_t);
 void		 m_cat(struct mbuf *, struct mbuf *);
 void		 m_extadd(struct mbuf *, caddr_t, u_int,
 		    void (*)(void *, void *), void *, void *, int, int);
 struct mbuf	*m_collapse(struct mbuf *, int, int);
 void		 m_copyback(struct mbuf *, int, int, c_caddr_t);
 void		 m_copydata(const struct mbuf *, int, int, caddr_t);
 struct mbuf	*m_copym(struct mbuf *, int, int, int);
 struct mbuf	*m_copymdata(struct mbuf *, struct mbuf *,
 		    int, int, int, int);
 struct mbuf	*m_copypacket(struct mbuf *, int);
 void		 m_copy_pkthdr(struct mbuf *, struct mbuf *);
 struct mbuf	*m_copyup(struct mbuf *n, int len, int dstoff);
 struct mbuf	*m_defrag(struct mbuf *, int);
 void		 m_demote(struct mbuf *, int);
 struct mbuf	*m_devget(char *, int, int, struct ifnet *,
 		    void (*)(char *, caddr_t, u_int));
 struct mbuf	*m_dup(struct mbuf *, int);
 int		 m_dup_pkthdr(struct mbuf *, struct mbuf *, int);
 u_int		 m_fixhdr(struct mbuf *);
 struct mbuf	*m_fragment(struct mbuf *, int, int);
 void		 m_freem(struct mbuf *);
 struct mbuf	*m_getm2(struct mbuf *, int, int, short, int);
 struct mbuf	*m_getptr(struct mbuf *, int, int *);
 u_int		 m_length(struct mbuf *, struct mbuf **);
 void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
 struct mbuf	*m_prepend(struct mbuf *, int, int);
 void		 m_print(const struct mbuf *, int);
 struct mbuf	*m_pulldown(struct mbuf *, int, int, int *);
 struct mbuf	*m_pullup(struct mbuf *, int);
 int		m_sanity(struct mbuf *, int);
 struct mbuf	*m_split(struct mbuf *, int, int);
 struct mbuf	*m_uiotombuf(struct uio *, int, int, int, int);
 struct mbuf	*m_unshare(struct mbuf *, int how);
 
 /*-
  * Network packets may have annotations attached by affixing a list of
  * "packet tags" to the pkthdr structure.  Packet tags are dynamically
  * allocated semi-opaque data structures that have a fixed header
  * (struct m_tag) that specifies the size of the memory block and a
  * <cookie,type> pair that identifies it.  The cookie is a 32-bit unique
  * unsigned value used to identify a module or ABI.  By convention this value
  * is chosen as the date+time that the module is created, expressed as the
  * number of seconds since the epoch (e.g., using date -u +'%s').  The type
  * value is an ABI/module-specific value that identifies a particular
  * annotation and is private to the module.  For compatibility with systems
  * like OpenBSD that define packet tags w/o an ABI/module cookie, the value
  * PACKET_ABI_COMPAT is used to implement m_tag_get and m_tag_find
  * compatibility shim functions and several tag types are defined below.
  * Users that do not require compatibility should use a private cookie value
  * so that packet tag-related definitions can be maintained privately.
  *
  * Note that the packet tag returned by m_tag_alloc has the default memory
  * alignment implemented by malloc.  To reference private data one can use a
  * construct like:
  *
  *	struct m_tag *mtag = m_tag_alloc(...);
  *	struct foo *p = (struct foo *)(mtag+1);
  *
  * if the alignment of struct m_tag is sufficient for referencing members of
  * struct foo.  Otherwise it is necessary to embed struct m_tag within the
  * private data structure to insure proper alignment; e.g.,
  *
  *	struct foo {
  *		struct m_tag	tag;
  *		...
  *	};
  *	struct foo *p = (struct foo *) m_tag_alloc(...);
  *	struct m_tag *mtag = &p->tag;
  */
 
 /*
  * Persistent tags stay with an mbuf until the mbuf is reclaimed.  Otherwise
  * tags are expected to ``vanish'' when they pass through a network
  * interface.  For most interfaces this happens normally as the tags are
  * reclaimed when the mbuf is free'd.  However in some special cases
  * reclaiming must be done manually.  An example is packets that pass through
  * the loopback interface.  Also, one must be careful to do this when
  * ``turning around'' packets (e.g., icmp_reflect).
  *
  * To mark a tag persistent bit-or this flag in when defining the tag id.
  * The tag will then be treated as described above.
  */
 #define	MTAG_PERSISTENT				0x800
 
 #define	PACKET_TAG_NONE				0  /* Nadda */
 
 /* Packet tags for use with PACKET_ABI_COMPAT. */
 #define	PACKET_TAG_IPSEC_IN_DONE		1  /* IPsec applied, in */
 #define	PACKET_TAG_IPSEC_OUT_DONE		2  /* IPsec applied, out */
 #define	PACKET_TAG_IPSEC_IN_CRYPTO_DONE		3  /* NIC IPsec crypto done */
 #define	PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED	4  /* NIC IPsec crypto req'ed */
 #define	PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO	5  /* NIC notifies IPsec */
 #define	PACKET_TAG_IPSEC_PENDING_TDB		6  /* Reminder to do IPsec */
 #define	PACKET_TAG_BRIDGE			7  /* Bridge processing done */
 #define	PACKET_TAG_GIF				8  /* GIF processing done */
 #define	PACKET_TAG_GRE				9  /* GRE processing done */
 #define	PACKET_TAG_IN_PACKET_CHECKSUM		10 /* NIC checksumming done */
 #define	PACKET_TAG_ENCAP			11 /* Encap.  processing */
 #define	PACKET_TAG_IPSEC_SOCKET			12 /* IPSEC socket ref */
 #define	PACKET_TAG_IPSEC_HISTORY		13 /* IPSEC history */
 #define	PACKET_TAG_IPV6_INPUT			14 /* IPV6 input processing */
 #define	PACKET_TAG_DUMMYNET			15 /* dummynet info */
 #define	PACKET_TAG_DIVERT			17 /* divert info */
 #define	PACKET_TAG_IPFORWARD			18 /* ipforward info */
 #define	PACKET_TAG_MACLABEL	(19 | MTAG_PERSISTENT) /* MAC label */
 #define	PACKET_TAG_PF				21 /* PF + ALTQ information */
 #define	PACKET_TAG_RTSOCKFAM			25 /* rtsock sa family */
 #define	PACKET_TAG_IPOPTIONS			27 /* Saved IP options */
 #define	PACKET_TAG_CARP                         28 /* CARP info */
 
 /* Specific cookies and tags. */
 
 /* Packet tag routines. */
 struct m_tag	*m_tag_alloc(u_int32_t, int, int, int);
 void		 m_tag_delete(struct mbuf *, struct m_tag *);
 void		 m_tag_delete_chain(struct mbuf *, struct m_tag *);
 void		 m_tag_free_default(struct m_tag *);
 struct m_tag	*m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *);
 struct m_tag	*m_tag_copy(struct m_tag *, int);
 int		 m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
 void		 m_tag_delete_nonpersistent(struct mbuf *);
 
 /*
  * Initialize the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_init(struct mbuf *m)
 {
 
 	SLIST_INIT(&m->m_pkthdr.tags);
 }
 
 /*
  * Set up the contents of a tag.  Note that this does not fill in the free
  * method; the caller is expected to do that.
  *
  * XXX probably should be called m_tag_init, but that was already taken.
  */
 static __inline void
 m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len)
 {
 
 	t->m_tag_id = type;
 	t->m_tag_len = len;
 	t->m_tag_cookie = cookie;
 }
 
 /*
  * Reclaim resources associated with a tag.
  */
 static __inline void
 m_tag_free(struct m_tag *t)
 {
 
 	(*t->m_tag_free)(t);
 }
 
 /*
  * Return the first tag associated with an mbuf.
  */
 static __inline struct m_tag *
 m_tag_first(struct mbuf *m)
 {
 
 	return (SLIST_FIRST(&m->m_pkthdr.tags));
 }
 
 /*
  * Return the next tag in the list of tags associated with an mbuf.
  */
 static __inline struct m_tag *
 m_tag_next(struct mbuf *m, struct m_tag *t)
 {
 
 	return (SLIST_NEXT(t, m_tag_link));
 }
 
 /*
  * Prepend a tag to the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_prepend(struct mbuf *m, struct m_tag *t)
 {
 
 	SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
 }
 
 /*
  * Unlink a tag from the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_unlink(struct mbuf *m, struct m_tag *t)
 {
 
 	SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
 }
 
 /* These are for OpenBSD compatibility. */
 #define	MTAG_ABI_COMPAT		0		/* compatibility ABI */
 
 static __inline struct m_tag *
 m_tag_get(int type, int length, int wait)
 {
 	return (m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait));
 }
 
 static __inline struct m_tag *
 m_tag_find(struct mbuf *m, int type, struct m_tag *start)
 {
 	return (SLIST_EMPTY(&m->m_pkthdr.tags) ? (struct m_tag *)NULL :
 	    m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
 }
+
+/* XXX temporary FIB methods probably eventually use tags.*/
+#define M_FIBSHIFT    28
+#define M_FIBMASK	0x0F
+
+/* get the fib from an mbuf and if it is not set, return the default */
+#define M_GETFIB(_m) \
+    ((((_m)->m_flags & M_FIB) >> M_FIBSHIFT) & M_FIBMASK)
+
+#define M_SETFIB(_m, _fib) do {						\
+	_m->m_flags &= ~M_FIB;					   	\
+	_m->m_flags |= (((_fib) << M_FIBSHIFT) & M_FIB);  \
+} while (0) 
 
 #endif /* _KERNEL */
 
 #ifdef MBUF_PROFILING
  void m_profile(struct mbuf *m);
  #define M_PROFILE(m) m_profile(m)
 #else
  #define M_PROFILE(m)
 #endif
 
 
 #endif /* !_SYS_MBUF_H_ */
Index: head/sys/sys/proc.h
===================================================================
--- head/sys/sys/proc.h	(revision 178887)
+++ head/sys/sys/proc.h	(revision 178888)
@@ -1,859 +1,860 @@
 /*-
  * Copyright (c) 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)proc.h	8.15 (Berkeley) 5/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_PROC_H_
 #define	_SYS_PROC_H_
 
 #include <sys/callout.h>		/* For struct callout. */
 #include <sys/event.h>			/* For struct klist. */
 #ifndef _KERNEL
 #include <sys/filedesc.h>
 #endif
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/lock_profile.h>
 #include <sys/_mutex.h>
 #include <sys/priority.h>
 #include <sys/rtprio.h>			/* XXX. */
 #include <sys/runq.h>
 #include <sys/resource.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #ifndef _KERNEL
 #include <sys/time.h>			/* For structs itimerval, timeval. */
 #else
 #include <sys/pcpu.h>
 #endif
 #include <sys/ucontext.h>
 #include <sys/ucred.h>
 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
 
 /*
  * One structure allocated per session.
  *
  * List of locks
  * (m)		locked by s_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct session {
 	int		s_count;	/* (m) Ref cnt; pgrps in session. */
 	struct proc	*s_leader;	/* (m + e) Session leader. */
 	struct vnode	*s_ttyvp;	/* (m) Vnode of controlling tty. */
 	struct tty	*s_ttyp;	/* (m) Controlling tty. */
 	pid_t		s_sid;		/* (c) Session ID. */
 					/* (m) Setlogin() name: */
 	char		s_login[roundup(MAXLOGNAME, sizeof(long))];
 	struct mtx	s_mtx;		/* Mutex to protect members. */
 };
 
 /*
  * One structure allocated per process group.
  *
  * List of locks
  * (m)		locked by pg_mtx mtx
  * (e)		locked by proctree_lock sx
  * (c)		const until freeing
  */
 struct pgrp {
 	LIST_ENTRY(pgrp) pg_hash;	/* (e) Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* (m + e) Pointer to pgrp members. */
 	struct session	*pg_session;	/* (c) Pointer to session. */
 	struct sigiolst	pg_sigiolst;	/* (m) List of sigio sources. */
 	pid_t		pg_id;		/* (c) Process group id. */
 	int		pg_jobc;	/* (m) Job control process count. */
 	struct mtx	pg_mtx;		/* Mutex to protect members */
 };
 
 /*
  * pargs, used to hold a copy of the command line, if it had a sane length.
  */
 struct pargs {
 	u_int	ar_ref;		/* Reference count. */
 	u_int	ar_length;	/* Length. */
 	u_char	ar_args[1];	/* Arguments. */
 };
 
 /*-
  * Description of a process.
  *
  * This structure contains the information needed to manage a thread of
  * control, known in UN*X as a process; it has references to substructures
  * containing descriptions of things that the process uses, but may share
  * with related processes.  The process structure and the substructures
  * are always addressable except for those marked "(CPU)" below,
  * which might be addressable only on a processor on which the process
  * is running.
  *
  * Below is a key of locks used to protect each member of struct proc.  The
  * lock is indicated by a reference to a specific character in parens in the
  * associated comment.
  *      * - not yet protected
  *      a - only touched by curproc or parent during fork/wait
  *      b - created at fork, never changes
  *		(exception aiods switch vmspaces, but they are also
  *		marked 'P_SYSTEM' so hopefully it will be left alone)
  *      c - locked by proc mtx
  *      d - locked by allproc_lock lock
  *      e - locked by proctree_lock lock
  *      f - session mtx
  *      g - process group mtx
  *      h - callout_lock mtx
  *      i - by curproc or the master session mtx
  *      j - locked by proc slock
  *      k - only accessed by curthread
  *	k*- only accessed by curthread and from an interrupt
  *      l - the attaching proc or attaching proc parent
  *      m - Giant
  *      n - not locked, lazy
  *      o - ktrace lock
  *      q - td_contested lock
  *      r - p_peers lock
  *      t - thread lock
  *      x - created at fork, only changes during single threading in exec
  *      z - zombie threads lock
  *
  * If the locking key specifies two identifiers (for example, p_pptr) then
  * either lock is sufficient for read access, but both locks must be held
  * for write access.
  */
 struct kaudit_record;
 struct td_sched;
 struct nlminfo;
 struct kaioinfo;
 struct p_sched;
 struct proc;
 struct sleepqueue;
 struct thread;
 struct trapframe;
 struct turnstile;
 struct mqueue_notifier;
 struct cpuset;
 
 /*
  * Kernel runnable context (thread).
  * This is what is put to sleep and reactivated.
  * Thread context.  Processes may have multiple threads.
  */
 struct thread {
 	struct mtx	*volatile td_lock; /* replaces sched lock */
 	struct proc	*td_proc;	/* (*) Associated process. */
 	TAILQ_ENTRY(thread) td_plist;	/* (*) All threads in this proc. */
 	TAILQ_ENTRY(thread) td_runq;	/* (t) Run queue. */
 	TAILQ_ENTRY(thread) td_slpq;	/* (t) Sleep queue. */
 	TAILQ_ENTRY(thread) td_lockq;	/* (t) Lock queue. */
 	struct cpuset	*td_cpuset;	/* (t) CPU affinity mask. */
 	struct seltd	*td_sel;	/* Select queue/channel. */
 	struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
 	struct turnstile *td_turnstile;	/* (k) Associated turnstile. */
 	struct umtx_q   *td_umtxq;	/* (c?) Link for when we're blocked. */
 	lwpid_t		td_tid;		/* (b) Thread ID. */
 	sigqueue_t	td_sigqueue;	/* (c) Sigs arrived, not delivered. */
 #define	td_siglist	td_sigqueue.sq_signals
 
 /* Cleared during fork1() */
 #define	td_startzero td_flags
 	int		td_flags;	/* (t) TDF_* flags. */
 	int		td_inhibitors;	/* (t) Why can not run. */
 	int		td_pflags;	/* (k) Private thread (TDP_*) flags. */
 	int		td_dupfd;	/* (k) Ret value from fdopen. XXX */
 	int		td_sqqueue;	/* (t) Sleepqueue queue blocked on. */
 	void		*td_wchan;	/* (t) Sleep address. */
 	const char	*td_wmesg;	/* (t) Reason for sleep. */
 	u_char		td_lastcpu;	/* (t) Last cpu we were on. */
 	u_char		td_oncpu;	/* (t) Which cpu we are on. */
 	volatile u_char td_owepreempt;  /* (k*) Preempt on last critical_exit */
 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
 	short		td_locks;	/* (k) Count of non-spin locks. */
 	short		td_rw_rlocks;	/* (k) Count of rwlock read locks. */
 	short		td_lk_slocks;	/* (k) Count of lockmgr shared locks. */
 	struct turnstile *td_blocked;	/* (t) Lock thread is blocked on. */
 	const char	*td_lockname;	/* (t) Name of lock blocked on. */
 	LIST_HEAD(, turnstile) td_contested;	/* (q) Contested locks. */
 	struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
 	int		td_intr_nesting_level; /* (k) Interrupt recursion. */
 	int		td_pinned;	/* (k) Temporary cpu pin count. */
 	struct ucred	*td_ucred;	/* (k) Reference to credentials. */
 	u_int		td_estcpu;	/* (t) estimated cpu utilization */
 	u_int		td_slptick;	/* (t) Time at sleep. */
 	struct rusage	td_ru;		/* (t) rusage information */
 	uint64_t	td_incruntime;	/* (t) Cpu ticks to transfer to proc. */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */
 	u_int 		td_pticks;	/* (t) Statclock hits for profiling */
 	u_int		td_sticks;	/* (t) Statclock hits in system mode. */
 	u_int		td_iticks;	/* (t) Statclock hits in intr mode. */
 	u_int		td_uticks;	/* (t) Statclock hits in user mode. */
 	u_int		td_uuticks;	/* (k) Statclock hits (usr), for UTS. */
 	u_int		td_usticks;	/* (k) Statclock hits (sys), for UTS. */
 	int		td_intrval;	/* (t) Return value for sleepq. */
 	sigset_t	td_oldsigmask;	/* (k) Saved mask from pre sigpause. */
 	sigset_t	td_sigmask;	/* (c) Current signal mask. */
 	volatile u_int	td_generation;	/* (k) For detection of preemption */
 	stack_t		td_sigstk;	/* (k) Stack ptr and on-stack flag. */
 	int		td_xsig;	/* (c) Signal for ptrace */
 	u_long		td_profil_addr;	/* (k) Temporary addr until AST. */
 	u_int		td_profil_ticks; /* (k) Temporary ticks until AST. */
 	char		td_name[MAXCOMLEN + 1];	/* (*) Thread name. */
 #define	td_endzero td_base_pri
 
 /* Copied during fork1() or thread_sched_upcall(). */
 #define	td_startcopy td_endzero
 	u_char		td_rqindex;	/* (t) Run queue index. */
 	u_char		td_base_pri;	/* (t) Thread base kernel priority. */
 	u_char		td_priority;	/* (t) Thread active priority. */
 	u_char		td_pri_class;	/* (t) Scheduling class. */
 	u_char		td_user_pri;	/* (t) User pri from estcpu and nice. */
 	u_char		td_base_user_pri; /* (t) Base user pri */
 #define	td_endcopy td_pcb
 
 /*
  * Fields that must be manually set in fork1() or thread_sched_upcall()
  * or already have been set in the allocator, constructor, etc.
  */
 	struct pcb	*td_pcb;	/* (k) Kernel VA of pcb and kstack. */
 	enum {
 		TDS_INACTIVE = 0x0,
 		TDS_INHIBITED,
 		TDS_CAN_RUN,
 		TDS_RUNQ,
 		TDS_RUNNING
 	} td_state;			/* (t) thread state */
 	register_t	td_retval[2];	/* (k) Syscall aux returns. */
 	struct callout	td_slpcallout;	/* (h) Callout for sleep. */
 	struct trapframe *td_frame;	/* (k) */
 	struct vm_object *td_kstack_obj;/* (a) Kstack object. */
 	vm_offset_t	td_kstack;	/* (a) Kernel VA of kstack. */
 	int		td_kstack_pages; /* (a) Size of the kstack. */
 	struct vm_object *td_altkstack_obj;/* (a) Alternate kstack object. */
 	vm_offset_t	td_altkstack;	/* (a) Kernel VA of alternate kstack. */
 	int		td_altkstack_pages; /* (a) Size of alternate kstack. */
 	volatile u_int	td_critnest;	/* (k*) Critical section nest level. */
 	struct mdthread td_md;		/* (k) Any machine-dependent fields. */
 	struct td_sched	*td_sched;	/* (*) Scheduler-specific data. */
 	struct kaudit_record	*td_ar;	/* (k) Active audit record, if any. */
 	int		td_syscalls;	/* per-thread syscall count (used by NFS :)) */
 	struct lpohead	td_lprof[2];	/* (a) lock profiling objects. */
 };
 
 struct mtx *thread_lock_block(struct thread *);
 void thread_lock_unblock(struct thread *, struct mtx *);
 void thread_lock_set(struct thread *, struct mtx *);
 #define	THREAD_LOCK_ASSERT(td, type)					\
 do {									\
 	struct mtx *__m = (td)->td_lock;				\
 	if (__m != &blocked_lock)					\
 		mtx_assert(__m, (type));				\
 } while (0)
 
 #ifdef INVARIANTS
 #define	THREAD_LOCKPTR_ASSERT(td, lock)					\
 do {									\
 	struct mtx *__m = (td)->td_lock;				\
 	KASSERT((__m == &blocked_lock || __m == (lock)),		\
 	    ("Thread %p lock %p does not match %p", td, __m, (lock)));	\
 } while (0)
 #else
 #define	THREAD_LOCKPTR_ASSERT(td, lock)
 #endif
 
 /*
  * Flags kept in td_flags:
  * To change these you MUST have the scheduler lock.
  */
 #define	TDF_BORROWING	0x00000001 /* Thread is borrowing pri from another. */
 #define	TDF_INPANIC	0x00000002 /* Caused a panic, let it drive crashdump. */
 #define	TDF_INMEM	0x00000004 /* Thread's stack is in memory. */
 #define	TDF_SINTR	0x00000008 /* Sleep is interruptible. */
 #define	TDF_TIMEOUT	0x00000010 /* Timing out during sleep. */
 #define	TDF_IDLETD	0x00000020 /* This is a per-CPU idle thread. */
 #define	TDF_CANSWAP	0x00000040 /* Thread can be swapped. */
 #define	TDF_SLEEPABORT	0x00000080 /* sleepq_abort was called. */
 #define	TDF_KTH_SUSP	0x00000100 /* kthread is suspended */
 #define	TDF_UBORROWING	0x00000200 /* Thread is borrowing user pri. */
 #define	TDF_BOUNDARY	0x00000400 /* Thread suspended at user boundary */
 #define	TDF_ASTPENDING	0x00000800 /* Thread has some asynchronous events. */
 #define	TDF_TIMOFAIL	0x00001000 /* Timeout from sleep after we were awake. */
 #define	TDF_UNUSED2000	0x00002000 /* --available-- */
 #define	TDF_UPIBLOCKED	0x00004000 /* Thread blocked on user PI mutex. */
 #define	TDF_NEEDSUSPCHK	0x00008000 /* Thread may need to suspend. */
 #define	TDF_NEEDRESCHED	0x00010000 /* Thread needs to yield. */
 #define	TDF_NEEDSIGCHK	0x00020000 /* Thread may need signal delivery. */
 #define	TDF_XSIG	0x00040000 /* Thread is exchanging signal under trace */
 #define	TDF_UNUSED19	0x00080000 /* Thread is sleeping on a umtx. */
 #define	TDF_THRWAKEUP	0x00100000 /* Libthr thread must not suspend itself. */
 #define	TDF_DBSUSPEND	0x00200000 /* Thread is suspended by debugger */
 #define	TDF_SWAPINREQ	0x00400000 /* Swapin request due to wakeup. */
 #define	TDF_UNUSED23	0x00800000 /* --available-- */
 #define	TDF_SCHED0	0x01000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED1	0x02000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED2	0x04000000 /* Reserved for scheduler private use */
 #define	TDF_SCHED3	0x08000000 /* Reserved for scheduler private use */
 #define	TDF_ALRMPEND	0x10000000 /* Pending SIGVTALRM needs to be posted. */
 #define	TDF_PROFPEND	0x20000000 /* Pending SIGPROF needs to be posted. */
 #define	TDF_MACPEND	0x40000000 /* AST-based MAC event pending. */
 
 /*
  * "Private" flags kept in td_pflags:
  * These are only written by curthread and thus need no locking.
  */
 #define	TDP_OLDMASK	0x00000001 /* Need to restore mask after suspend. */
 #define	TDP_INKTR	0x00000002 /* Thread is currently in KTR code. */
 #define	TDP_INKTRACE	0x00000004 /* Thread is currently in KTRACE code. */
 #define	TDP_UNUSED8	0x00000008 /* available */
 #define	TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */
 #define	TDP_ALTSTACK	0x00000020 /* Have alternate signal stack. */
 #define	TDP_DEADLKTREAT	0x00000040 /* Lock aquisition - deadlock treatment. */
 #define	TDP_UNUSED80	0x00000080 /* available. */
 #define	TDP_NOSLEEPING	0x00000100 /* Thread is not allowed to sleep on a sq. */
 #define	TDP_OWEUPC	0x00000200 /* Call addupc() at next AST. */
 #define	TDP_ITHREAD	0x00000400 /* Thread is an interrupt thread. */
 #define	TDP_UNUSED800	0x00000800 /* available. */
 #define	TDP_SCHED1	0x00001000 /* Reserved for scheduler private use */
 #define	TDP_SCHED2	0x00002000 /* Reserved for scheduler private use */
 #define	TDP_SCHED3	0x00004000 /* Reserved for scheduler private use */
 #define	TDP_SCHED4	0x00008000 /* Reserved for scheduler private use */
 #define	TDP_GEOM	0x00010000 /* Settle GEOM before finishing syscall */
 #define	TDP_SOFTDEP	0x00020000 /* Stuck processing softdep worklist */
 #define	TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */
 #define	TDP_WAKEUP	0x00080000 /* Don't sleep in umtx cond_wait */
 #define	TDP_INBDFLUSH	0x00100000 /* Already in BO_BDFLUSH, do not recurse */
 #define	TDP_KTHREAD	0x00200000 /* This is an official kernel thread */
 #define	TDP_CALLCHAIN	0x00400000 /* Capture thread's callchain */
 
 /*
  * Reasons that the current thread can not be run yet.
  * More than one may apply.
  */
 #define	TDI_SUSPENDED	0x0001	/* On suspension queue. */
 #define	TDI_SLEEPING	0x0002	/* Actually asleep! (tricky). */
 #define	TDI_SWAPPED	0x0004	/* Stack not in mem.  Bad juju if run. */
 #define	TDI_LOCK	0x0008	/* Stopped on a lock. */
 #define	TDI_IWAIT	0x0010	/* Awaiting interrupt. */
 
 #define	TD_IS_SLEEPING(td)	((td)->td_inhibitors & TDI_SLEEPING)
 #define	TD_ON_SLEEPQ(td)	((td)->td_wchan != NULL)
 #define	TD_IS_SUSPENDED(td)	((td)->td_inhibitors & TDI_SUSPENDED)
 #define	TD_IS_SWAPPED(td)	((td)->td_inhibitors & TDI_SWAPPED)
 #define	TD_ON_LOCK(td)		((td)->td_inhibitors & TDI_LOCK)
 #define	TD_AWAITING_INTR(td)	((td)->td_inhibitors & TDI_IWAIT)
 #define	TD_IS_RUNNING(td)	((td)->td_state == TDS_RUNNING)
 #define	TD_ON_RUNQ(td)		((td)->td_state == TDS_RUNQ)
 #define	TD_CAN_RUN(td)		((td)->td_state == TDS_CAN_RUN)
 #define	TD_IS_INHIBITED(td)	((td)->td_state == TDS_INHIBITED)
 #define	TD_ON_UPILOCK(td)	((td)->td_flags & TDF_UPIBLOCKED)
 #define TD_IS_IDLETHREAD(td)	((td)->td_flags & TDF_IDLETD)
 
 
 #define	TD_SET_INHIB(td, inhib) do {			\
 	(td)->td_state = TDS_INHIBITED;			\
 	(td)->td_inhibitors |= (inhib);			\
 } while (0)
 
 #define	TD_CLR_INHIB(td, inhib) do {			\
 	if (((td)->td_inhibitors & (inhib)) &&		\
 	    (((td)->td_inhibitors &= ~(inhib)) == 0))	\
 		(td)->td_state = TDS_CAN_RUN;		\
 } while (0)
 
 #define	TD_SET_SLEEPING(td)	TD_SET_INHIB((td), TDI_SLEEPING)
 #define	TD_SET_SWAPPED(td)	TD_SET_INHIB((td), TDI_SWAPPED)
 #define	TD_SET_LOCK(td)		TD_SET_INHIB((td), TDI_LOCK)
 #define	TD_SET_SUSPENDED(td)	TD_SET_INHIB((td), TDI_SUSPENDED)
 #define	TD_SET_IWAIT(td)	TD_SET_INHIB((td), TDI_IWAIT)
 #define	TD_SET_EXITING(td)	TD_SET_INHIB((td), TDI_EXITING)
 
 #define	TD_CLR_SLEEPING(td)	TD_CLR_INHIB((td), TDI_SLEEPING)
 #define	TD_CLR_SWAPPED(td)	TD_CLR_INHIB((td), TDI_SWAPPED)
 #define	TD_CLR_LOCK(td)		TD_CLR_INHIB((td), TDI_LOCK)
 #define	TD_CLR_SUSPENDED(td)	TD_CLR_INHIB((td), TDI_SUSPENDED)
 #define	TD_CLR_IWAIT(td)	TD_CLR_INHIB((td), TDI_IWAIT)
 
 #define	TD_SET_RUNNING(td)	(td)->td_state = TDS_RUNNING
 #define	TD_SET_RUNQ(td)		(td)->td_state = TDS_RUNQ
 #define	TD_SET_CAN_RUN(td)	(td)->td_state = TDS_CAN_RUN
 
 /*
  * XXX: Does this belong in resource.h or resourcevar.h instead?
  * Resource usage extension.  The times in rusage structs in the kernel are
  * never up to date.  The actual times are kept as runtimes and tick counts
  * (with control info in the "previous" times), and are converted when
  * userland asks for rusage info.  Backwards compatibility prevents putting
  * this directly in the user-visible rusage struct.
  *
  * Locking: (cj) means (j) for p_rux and (c) for p_crux.
  */
 struct rusage_ext {
 	u_int64_t	rux_runtime;    /* (cj) Real time. */
 	u_int64_t	rux_uticks;     /* (cj) Statclock hits in user mode. */
 	u_int64_t	rux_sticks;     /* (cj) Statclock hits in sys mode. */
 	u_int64_t	rux_iticks;     /* (cj) Statclock hits in intr mode. */
 	u_int64_t	rux_uu;         /* (c) Previous user time in usec. */
 	u_int64_t	rux_su;         /* (c) Previous sys time in usec. */
 	u_int64_t	rux_tu;         /* (c) Previous total time in usec. */
 };
 
 /*
  * Process structure.
  */
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* (d) List of all processes. */
 	TAILQ_HEAD(, thread) p_threads;	/* (c) all threads. */
 	struct mtx	p_slock;	/* process spin lock */
 	struct ucred	*p_ucred;	/* (c) Process owner's identity. */
 	struct filedesc	*p_fd;		/* (b) Open files. */
 	struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
 	struct pstats	*p_stats;	/* (b) Accounting/statistics (CPU). */
 	struct plimit	*p_limit;	/* (c) Process limits. */
 	struct callout	p_limco;	/* (c) Limit callout handle */
 	struct sigacts	*p_sigacts;	/* (x) Signal actions, state (CPU). */
 
 	/*
 	 * The following don't make too much sense.
 	 * See the td_ or ke_ versions of the same flags.
 	 */
 	int		p_flag;		/* (c) P_* flags. */
 	enum {
 		PRS_NEW = 0,		/* In creation */
 		PRS_NORMAL,		/* threads can be run. */
 		PRS_ZOMBIE
 	} p_state;			/* (j/c) S* process status. */
 	pid_t		p_pid;		/* (b) Process identifier. */
 	LIST_ENTRY(proc) p_hash;	/* (d) Hash chain. */
 	LIST_ENTRY(proc) p_pglist;	/* (g + e) List of processes in pgrp. */
 	struct proc	*p_pptr;	/* (c + e) Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* (e) List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* (e) Pointer to list of children. */
 	struct mtx	p_mtx;		/* (n) Lock for this struct. */
 	struct ksiginfo *p_ksi;	/* Locked by parent proc lock */
 	sigqueue_t	p_sigqueue;	/* (c) Sigs not delivered to a td. */
 #define p_siglist	p_sigqueue.sq_signals
 
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_oppid
 	pid_t		p_oppid;	/* (c + e) Save ppid in ptrace. XXX */
 	struct vmspace	*p_vmspace;	/* (b) Address space. */
 	u_int		p_swtick;	/* (c) Tick when swapped in or out. */
 	struct itimerval p_realtimer;	/* (c) Alarm timer. */
 	struct rusage	p_ru;		/* (a) Exit information. */
 	struct rusage_ext p_rux;	/* (cj) Internal resource usage. */
 	struct rusage_ext p_crux;	/* (c) Internal child resource usage. */
 	int		p_profthreads;	/* (c) Num threads in addupc_task. */
 	volatile int	p_exitthreads;	/* (j) Number of threads exiting */
 	int		p_traceflag;	/* (o) Kernel trace points. */
 	struct vnode	*p_tracevp;	/* (c + o) Trace to vnode. */
 	struct ucred	*p_tracecred;	/* (o) Credentials to trace with. */
 	struct vnode	*p_textvp;	/* (b) Vnode of executable. */
 	char		p_lock;		/* (c) Proclock (prevent swap) count. */
 	struct sigiolst	p_sigiolst;	/* (c) List of sigio sources. */
 	int		p_sigparent;	/* (c) Signal to parent on exit. */
 	int		p_sig;		/* (n) For core dump/debugger XXX. */
 	u_long		p_code;		/* (n) For core dump/debugger XXX. */
 	u_int		p_stops;	/* (c) Stop event bitmask. */
 	u_int		p_stype;	/* (c) Stop event type. */
 	char		p_step;		/* (c) Process is stopped. */
 	u_char		p_pfsflags;	/* (c) Procfs flags. */
 	struct nlminfo	*p_nlminfo;	/* (?) Only used by/for lockd. */
 	struct kaioinfo	*p_aioinfo;	/* (c) ASYNC I/O info. */
 	struct thread	*p_singlethread;/* (c + j) If single threading this is it */
 	int		p_suspcount;	/* (j) Num threads in suspended mode. */
 	struct thread	*p_xthread;	/* (c) Trap thread */
 	int		p_boundary_count;/* (c) Num threads at user boundary */
 	int		p_pendingcnt;	/* how many signals are pending */
 	struct itimers	*p_itimers;	/* (c) POSIX interval timers. */
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_magic
 
 /* The following fields are all copied upon creation in fork. */
 #define	p_startcopy	p_endzero
 	u_int		p_magic;	/* (b) Magic number. */
 	int		p_osrel;	/* (x) osreldate for the
 					       binary (from ELF note, if any) */
 	char		p_comm[MAXCOMLEN + 1];	/* (b) Process name. */
 	struct pgrp	*p_pgrp;	/* (c + e) Pointer to process group. */
 	struct sysentvec *p_sysent;	/* (b) Syscall dispatch info. */
 	struct pargs	*p_args;	/* (c) Process arguments. */
 	rlim_t		p_cpulimit;	/* (c) Current CPU limit in seconds. */
 	signed char	p_nice;		/* (c) Process "nice" value. */
+	int		p_fibnum;	/* in this routing domain XXX MRT */
 /* End area that is copied on creation. */
 #define	p_endcopy	p_xstat
 
 	u_short		p_xstat;	/* (c) Exit status; also stop sig. */
 	struct knlist	p_klist;	/* (c) Knotes attached to this proc. */
 	int		p_numthreads;	/* (c) Number of threads. */
 	struct mdproc	p_md;		/* Any machine-dependent fields. */
 	struct callout	p_itcallout;	/* (h + c) Interval timer callout. */
 	u_short		p_acflag;	/* (c) Accounting flags. */
 	struct proc	*p_peers;	/* (r) */
 	struct proc	*p_leader;	/* (b) */
 	void		*p_emuldata;	/* (c) Emulator state data. */
 	struct label	*p_label;	/* (*) Proc (not subject) MAC label. */
 	struct p_sched	*p_sched;	/* (*) Scheduler-specific data. */
 	STAILQ_HEAD(, ktr_request)	p_ktr;	/* (o) KTR event queue. */
 	LIST_HEAD(, mqueue_notifier)	p_mqnotifier; /* (c) mqueue notifiers.*/
 };
 
 #define	p_session	p_pgrp->pg_session
 #define	p_pgid		p_pgrp->pg_id
 
 #define	NOCPU	0xff		/* For when we aren't on a CPU. */
 
 #define	PROC_SLOCK(p)	mtx_lock_spin(&(p)->p_slock)
 #define	PROC_SUNLOCK(p)	mtx_unlock_spin(&(p)->p_slock)
 #define	PROC_SLOCK_ASSERT(p, type)	mtx_assert(&(p)->p_slock, (type))
 
 /* These flags are kept in p_flag. */
 #define	P_ADVLOCK	0x00001	/* Process may hold a POSIX advisory lock. */
 #define	P_CONTROLT	0x00002	/* Has a controlling terminal. */
 #define	P_KTHREAD	0x00004	/* Kernel thread (*). */
 #define	P_NOLOAD	0x00008	/* Ignore during load avg calculations. */
 #define	P_PPWAIT	0x00010	/* Parent is waiting for child to exec/exit. */
 #define	P_PROFIL	0x00020	/* Has started profiling. */
 #define	P_STOPPROF	0x00040	/* Has thread requesting to stop profiling. */
 #define	P_HADTHREADS	0x00080	/* Has had threads (no cleanup shortcuts) */
 #define	P_SUGID		0x00100	/* Had set id privileges since last exec. */
 #define	P_SYSTEM	0x00200	/* System proc: no sigs, stats or swapping. */
 #define	P_SINGLE_EXIT	0x00400	/* Threads suspending should exit, not wait. */
 #define	P_TRACED	0x00800	/* Debugged process being traced. */
 #define	P_WAITED	0x01000	/* Someone is waiting for us. */
 #define	P_WEXIT		0x02000	/* Working on exiting. */
 #define	P_EXEC		0x04000	/* Process called exec. */
 #define	P_UNUSED8000	0x08000	/* available. */
 #define	P_CONTINUED	0x10000	/* Proc has continued from a stopped state. */
 #define	P_STOPPED_SIG	0x20000	/* Stopped due to SIGSTOP/SIGTSTP. */
 #define	P_STOPPED_TRACE	0x40000	/* Stopped because of tracing. */
 #define	P_STOPPED_SINGLE 0x80000 /* Only 1 thread can continue (not to user). */
 #define	P_PROTECTED	0x100000 /* Do not kill on memory overcommit. */
 #define	P_SIGEVENT	0x200000 /* Process pending signals changed. */
 #define	P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */
 #define	P_HWPMC		0x800000 /* Process is using HWPMCs */
 
 #define	P_JAILED	0x1000000 /* Process is in jail. */
 #define	P_INEXEC	0x4000000 /* Process is in execve(). */
 #define	P_STATCHILD	0x8000000 /* Child process stopped or exited. */
 #define	P_INMEM		0x10000000 /* Loaded into memory. */
 #define	P_SWAPPINGOUT	0x20000000 /* Process is being swapped out. */
 #define	P_SWAPPINGIN	0x40000000 /* Process is being swapped in. */
 
 #define	P_STOPPED	(P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE)
 #define	P_SHOULDSTOP(p)	((p)->p_flag & P_STOPPED)
 
 /*
  * These were process status values (p_stat), now they are only used in
  * legacy conversion code.
  */
 #define	SIDL	1		/* Process being created by fork. */
 #define	SRUN	2		/* Currently runnable. */
 #define	SSLEEP	3		/* Sleeping on an address. */
 #define	SSTOP	4		/* Process debugging or suspension. */
 #define	SZOMB	5		/* Awaiting collection by parent. */
 #define	SWAIT	6		/* Waiting for interrupt. */
 #define	SLOCK	7		/* Blocked on a lock. */
 
 #define	P_MAGIC		0xbeefface
 
 #ifdef _KERNEL
 
 /* Types and flags for mi_switch(). */
 #define	SW_TYPE_MASK		0xff	/* First 8 bits are switch type */
 #define	SWT_NONE		0	/* Unspecified switch. */
 #define	SWT_PREEMPT		1	/* Switching due to preemption. */
 #define	SWT_OWEPREEMPT		2	/* Switching due to opepreempt. */
 #define	SWT_TURNSTILE		3	/* Turnstile contention. */
 #define	SWT_SLEEPQ		4	/* Sleepq wait. */
 #define	SWT_SLEEPQTIMO		5	/* Sleepq timeout wait. */
 #define	SWT_RELINQUISH		6	/* yield call. */
 #define	SWT_NEEDRESCHED		7	/* NEEDRESCHED was set. */
 #define	SWT_IDLE		8	/* Switching from the idle thread. */
 #define	SWT_IWAIT		9	/* Waiting for interrupts. */
 #define	SWT_SUSPEND		10	/* Thread suspended. */
 #define	SWT_REMOTEPREEMPT	11	/* Remote processor preempted. */
 #define	SWT_REMOTEWAKEIDLE	12	/* Remote processor preempted idle. */
 #define	SWT_COUNT		13	/* Number of switch types. */
 /* Flags */
 #define	SW_VOL		0x0100		/* Voluntary switch. */
 #define	SW_INVOL	0x0200		/* Involuntary switch. */
 #define SW_PREEMPT	0x0400		/* The invol switch is a preemption */
 
 /* How values for thread_single(). */
 #define	SINGLE_NO_EXIT	0
 #define	SINGLE_EXIT	1
 #define	SINGLE_BOUNDARY	2
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_PARGS);
 MALLOC_DECLARE(M_PGRP);
 MALLOC_DECLARE(M_SESSION);
 MALLOC_DECLARE(M_SUBPROC);
 MALLOC_DECLARE(M_ZOMBIE);
 #endif
 
 #define	FOREACH_PROC_IN_SYSTEM(p)					\
 	LIST_FOREACH((p), &allproc, p_list)
 #define	FOREACH_THREAD_IN_PROC(p, td)					\
 	TAILQ_FOREACH((td), &(p)->p_threads, td_plist)
 
 #define	FIRST_THREAD_IN_PROC(p)	TAILQ_FIRST(&(p)->p_threads)
 
 /*
  * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
  * as it is used to represent "no process group".
  */
 #define	PID_MAX		99999
 #define	NO_PID		100000
 
 #define	SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 #define	SESSHOLD(s)	((s)->s_count++)
 #define	SESSRELE(s)	sessrele(s)
 
 
 #define	STOPEVENT(p, e, v) do {						\
 	if ((p)->p_stops & (e))	{					\
 		PROC_LOCK(p);						\
 		stopevent((p), (e), (v));				\
 		PROC_UNLOCK(p);						\
 	}								\
 } while (0)
 #define	_STOPEVENT(p, e, v) do {					\
 	PROC_LOCK_ASSERT(p, MA_OWNED);					\
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, \
  	    "checking stopevent %d", (e));				\
 	if ((p)->p_stops & (e))						\
 		stopevent((p), (e), (v));				\
 } while (0)
 
 /* Lock and unlock a process. */
 #define	PROC_LOCK(p)	mtx_lock(&(p)->p_mtx)
 #define	PROC_TRYLOCK(p)	mtx_trylock(&(p)->p_mtx)
 #define	PROC_UNLOCK(p)	mtx_unlock(&(p)->p_mtx)
 #define	PROC_LOCKED(p)	mtx_owned(&(p)->p_mtx)
 #define	PROC_LOCK_ASSERT(p, type)	mtx_assert(&(p)->p_mtx, (type))
 
 /* Lock and unlock a process group. */
 #define	PGRP_LOCK(pg)	mtx_lock(&(pg)->pg_mtx)
 #define	PGRP_UNLOCK(pg)	mtx_unlock(&(pg)->pg_mtx)
 #define	PGRP_LOCKED(pg)	mtx_owned(&(pg)->pg_mtx)
 #define	PGRP_LOCK_ASSERT(pg, type)	mtx_assert(&(pg)->pg_mtx, (type))
 
 #define	PGRP_LOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_LOCK(pg);						\
 } while (0)
 #define	PGRP_UNLOCK_PGSIGNAL(pg) do {					\
 	if ((pg) != NULL)						\
 		PGRP_UNLOCK(pg);					\
 } while (0)
 
 /* Lock and unlock a session. */
 #define	SESS_LOCK(s)	mtx_lock(&(s)->s_mtx)
 #define	SESS_UNLOCK(s)	mtx_unlock(&(s)->s_mtx)
 #define	SESS_LOCKED(s)	mtx_owned(&(s)->s_mtx)
 #define	SESS_LOCK_ASSERT(s, type)	mtx_assert(&(s)->s_mtx, (type))
 
 /* Hold process U-area in memory, normally for ptrace/procfs work. */
 #define	PHOLD(p) do {							\
 	PROC_LOCK(p);							\
 	_PHOLD(p);							\
 	PROC_UNLOCK(p);							\
 } while (0)
 #define	_PHOLD(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc,		\
 	    ("PHOLD of exiting process"));				\
 	(p)->p_lock++;							\
 	if (((p)->p_flag & P_INMEM) == 0)				\
 		faultin((p));						\
 } while (0)
 #define PROC_ASSERT_HELD(p) do {					\
 	KASSERT((p)->p_lock > 0, ("process not held"));			\
 } while (0)
 
 #define	PRELE(p) do {							\
 	PROC_LOCK((p));							\
 	_PRELE((p));							\
 	PROC_UNLOCK((p));						\
 } while (0)
 #define	_PRELE(p) do {							\
 	PROC_LOCK_ASSERT((p), MA_OWNED);				\
 	(--(p)->p_lock);						\
 	if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0)		\
 		wakeup(&(p)->p_lock);					\
 } while (0)
 #define PROC_ASSERT_NOT_HELD(p) do {					\
 	KASSERT((p)->p_lock == 0, ("process held"));			\
 } while (0)
 
 /* Check whether a thread is safe to be swapped out. */
 #define	thread_safetoswapout(td)	((td)->td_flags & TDF_CANSWAP)
 
 /* Control whether or not it is safe for curthread to sleep. */
 #define	THREAD_NO_SLEEPING() do {					\
 	KASSERT(!(curthread->td_pflags & TDP_NOSLEEPING),		\
 	    ("nested no sleeping"));					\
 	curthread->td_pflags |= TDP_NOSLEEPING;				\
 } while (0)
 
 #define	THREAD_SLEEPING_OK() do {					\
 	KASSERT((curthread->td_pflags & TDP_NOSLEEPING),		\
 	    ("nested sleeping ok"));					\
 	curthread->td_pflags &= ~TDP_NOSLEEPING;			\
 } while (0)
 
 #define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern u_long pidhash;
 
 #define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
 extern struct sx allproc_lock;
 extern struct sx proctree_lock;
 extern struct mtx ppeers_lock;
 extern struct proc proc0;		/* Process slot for swapper. */
 extern struct thread thread0;		/* Primary thread in proc0. */
 extern struct vmspace vmspace0;		/* VM space for proc0. */
 extern int hogticks;			/* Limit on kernel cpu hogs. */
 extern int lastpid;
 extern int nprocs, maxproc;		/* Current and max number of procs. */
 extern int maxprocperuid;		/* Max procs per uid. */
 extern u_long ps_arg_cache_limit;
 
 LIST_HEAD(proclist, proc);
 TAILQ_HEAD(procqueue, proc);
 TAILQ_HEAD(threadqueue, thread);
 extern struct proclist allproc;		/* List of all processes. */
 extern struct proclist zombproc;	/* List of zombie processes. */
 extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
 
 extern struct uma_zone *proc_zone;
 
 struct	proc *pfind(pid_t);		/* Find process by id. */
 struct	pgrp *pgfind(pid_t);		/* Find process group by id. */
 struct	proc *zpfind(pid_t);		/* Find zombie process by id. */
 
 void	ast(struct trapframe *framep);
 struct	thread *choosethread(void);
 int	cr_cansignal(struct ucred *cred, struct proc *proc, int signum);
 int	enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp,
 	    struct session *sess);
 int	enterthispgrp(struct proc *p, struct pgrp *pgrp);
 void	faultin(struct proc *p);
 void	fixjobc(struct proc *p, struct pgrp *pgrp, int entering);
 int	fork1(struct thread *, int, int, struct proc **);
 void	fork_exit(void (*)(void *, struct trapframe *), void *,
 	    struct trapframe *);
 void	fork_return(struct thread *, struct trapframe *);
 int	inferior(struct proc *p);
 void 	kick_proc0(void);
 int	leavepgrp(struct proc *p);
 int	maybe_preempt(struct thread *td);
 void	mi_switch(int flags, struct thread *newtd);
 int	p_candebug(struct thread *td, struct proc *p);
 int	p_cansee(struct thread *td, struct proc *p);
 int	p_cansched(struct thread *td, struct proc *p);
 int	p_cansignal(struct thread *td, struct proc *p, int signum);
 int	p_canwait(struct thread *td, struct proc *p);
 struct	pargs *pargs_alloc(int len);
 void	pargs_drop(struct pargs *pa);
 void	pargs_free(struct pargs *pa);
 void	pargs_hold(struct pargs *pa);
 void	procinit(void);
 void	proc_linkup0(struct proc *p, struct thread *td);
 void	proc_linkup(struct proc *p, struct thread *td);
 void	proc_reparent(struct proc *child, struct proc *newparent);
 struct	pstats *pstats_alloc(void);
 void	pstats_fork(struct pstats *src, struct pstats *dst);
 void	pstats_free(struct pstats *ps);
 int	securelevel_ge(struct ucred *cr, int level);
 int	securelevel_gt(struct ucred *cr, int level);
 void	sessrele(struct session *);
 void	setrunnable(struct thread *);
 void	setsugid(struct proc *p);
 int	sigonstack(size_t sp);
 void	sleepinit(void);
 void	stopevent(struct proc *, u_int, u_int);
 void	threadinit(void);
 void	cpu_idle(int);
 int	cpu_idle_wakeup(int);
 extern	void (*cpu_idle_hook)(void);	/* Hook to machdep CPU idler. */
 void	cpu_switch(struct thread *, struct thread *, struct mtx *);
 void	cpu_throw(struct thread *, struct thread *) __dead2;
 void	unsleep(struct thread *);
 void	userret(struct thread *, struct trapframe *);
 
 void	cpu_exit(struct thread *);
 void	exit1(struct thread *, int) __dead2;
 void	cpu_fork(struct thread *, struct proc *, struct thread *, int);
 void	cpu_set_fork_handler(struct thread *, void (*)(void *), void *);
 
 void	cpu_set_upcall(struct thread *td, struct thread *td0);
 void	cpu_set_upcall_kse(struct thread *, void (*)(void *), void *,
 	    stack_t *);
 int	cpu_set_user_tls(struct thread *, void *tls_base);
 void	cpu_thread_alloc(struct thread *);
 void	cpu_thread_clean(struct thread *);
 void	cpu_thread_exit(struct thread *);
 void	cpu_thread_free(struct thread *);
 void	cpu_thread_swapin(struct thread *);
 void	cpu_thread_swapout(struct thread *);
 struct	thread *thread_alloc(void);
 void	thread_exit(void) __dead2;
 void	thread_free(struct thread *td);
 void	thread_link(struct thread *td, struct proc *p);
 void	thread_reap(void);
 int	thread_single(int how);
 void	thread_single_end(void);
 void	thread_stash(struct thread *td);
 void	thread_stopped(struct proc *p);
 void	childproc_stopped(struct proc *child, int reason);
 void	childproc_continued(struct proc *child);
 void	childproc_exited(struct proc *child);
 int	thread_suspend_check(int how);
 void	thread_suspend_switch(struct thread *);
 void	thread_suspend_one(struct thread *td);
 void	thread_unlink(struct thread *td);
 void	thread_unsuspend(struct proc *p);
 void	thread_unsuspend_one(struct thread *td);
 void	thread_unthread(struct thread *td);
 void	thread_wait(struct proc *p);
 struct thread	*thread_find(struct proc *p, lwpid_t tid);
 void	thr_exit1(void);
 
 #endif	/* _KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
Index: head/sys/sys/socket.h
===================================================================
--- head/sys/sys/socket.h	(revision 178887)
+++ head/sys/sys/socket.h	(revision 178888)
@@ -1,652 +1,653 @@
 /*-
  * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socket.h	8.4 (Berkeley) 2/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_SOCKET_H_
 #define	_SYS_SOCKET_H_
 
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 #include <sys/_iovec.h>
 #define _NO_NAMESPACE_POLLUTION
 #include <machine/param.h>
 #undef _NO_NAMESPACE_POLLUTION
 
 /*
  * Definitions related to sockets: types, address families, options.
  */
 
 /*
  * Data types.
  */
 #if __BSD_VISIBLE
 #ifndef _GID_T_DECLARED
 typedef	__gid_t		gid_t;
 #define	_GID_T_DECLARED
 #endif
 
 #ifndef _OFF_T_DECLARED
 typedef	__off_t		off_t;
 #define	_OFF_T_DECLARED
 #endif
 
 #ifndef _PID_T_DECLARED
 typedef	__pid_t		pid_t;
 #define	_PID_T_DECLARED
 #endif
 #endif
 
 #ifndef _SA_FAMILY_T_DECLARED
 typedef	__sa_family_t	sa_family_t;
 #define	_SA_FAMILY_T_DECLARED
 #endif
 
 #ifndef _SOCKLEN_T_DECLARED
 typedef	__socklen_t	socklen_t;
 #define	_SOCKLEN_T_DECLARED
 #endif
  
 #ifndef _SSIZE_T_DECLARED
 typedef	__ssize_t	ssize_t;
 #define	_SSIZE_T_DECLARED
 #endif
 
 #if __BSD_VISIBLE 
 #ifndef _UID_T_DECLARED
 typedef	__uid_t		uid_t;
 #define	_UID_T_DECLARED
 #endif
 #endif
 
 /*
  * Types
  */
 #define	SOCK_STREAM	1		/* stream socket */
 #define	SOCK_DGRAM	2		/* datagram socket */
 #define	SOCK_RAW	3		/* raw-protocol interface */
 #if __BSD_VISIBLE
 #define	SOCK_RDM	4		/* reliably-delivered message */
 #endif
 #define	SOCK_SEQPACKET	5		/* sequenced packet stream */
 
 /*
  * Option flags per-socket.
  */
 #define	SO_DEBUG	0x0001		/* turn on debugging info recording */
 #define	SO_ACCEPTCONN	0x0002		/* socket has had listen() */
 #define	SO_REUSEADDR	0x0004		/* allow local address reuse */
 #define	SO_KEEPALIVE	0x0008		/* keep connections alive */
 #define	SO_DONTROUTE	0x0010		/* just use interface addresses */
 #define	SO_BROADCAST	0x0020		/* permit sending of broadcast msgs */
 #if __BSD_VISIBLE
 #define	SO_USELOOPBACK	0x0040		/* bypass hardware when possible */
 #endif
 #define	SO_LINGER	0x0080		/* linger on close if data present */
 #define	SO_OOBINLINE	0x0100		/* leave received OOB data in line */
 #if __BSD_VISIBLE
 #define	SO_REUSEPORT	0x0200		/* allow local address & port reuse */
 #define	SO_TIMESTAMP	0x0400		/* timestamp received dgram traffic */
 #define	SO_NOSIGPIPE	0x0800		/* no SIGPIPE from EPIPE */
 #define	SO_ACCEPTFILTER	0x1000		/* there is an accept filter */
 #define	SO_BINTIME	0x2000		/* timestamp received dgram traffic */
 #endif
 #define	SO_NO_OFFLOAD	0x4000		/* socket cannot be offloaded */
 #define	SO_NO_DDP	0x8000		/* disable direct data placement */
 
 /*
  * Additional options, not kept in so_options.
  */
 #define	SO_SNDBUF	0x1001		/* send buffer size */
 #define	SO_RCVBUF	0x1002		/* receive buffer size */
 #define	SO_SNDLOWAT	0x1003		/* send low-water mark */
 #define	SO_RCVLOWAT	0x1004		/* receive low-water mark */
 #define	SO_SNDTIMEO	0x1005		/* send timeout */
 #define	SO_RCVTIMEO	0x1006		/* receive timeout */
 #define	SO_ERROR	0x1007		/* get error status and clear */
 #define	SO_TYPE		0x1008		/* get socket type */
 #if __BSD_VISIBLE
 #define	SO_LABEL	0x1009		/* socket's MAC label */
 #define	SO_PEERLABEL	0x1010		/* socket's peer's MAC label */
 #define	SO_LISTENQLIMIT	0x1011		/* socket's backlog limit */
 #define	SO_LISTENQLEN	0x1012		/* socket's complete queue length */
 #define	SO_LISTENINCQLEN	0x1013	/* socket's incomplete queue length */
+#define	SO_SETFIB	0x1014		/* use this FIB to route */
 #endif
 
 /*
  * Structure used for manipulating linger option.
  */
 struct linger {
 	int	l_onoff;		/* option on/off */
 	int	l_linger;		/* linger time */
 };
 
 #if __BSD_VISIBLE
 struct accept_filter_arg {
 	char	af_name[16];
 	char	af_arg[256-16];
 };
 #endif
 
 /*
  * Level number for (get/set)sockopt() to apply to socket itself.
  */
 #define	SOL_SOCKET	0xffff		/* options for socket level */
 
 /*
  * Address families.
  */
 #define	AF_UNSPEC	0		/* unspecified */
 #if __BSD_VISIBLE
 #define	AF_LOCAL	AF_UNIX		/* local to host (pipes, portals) */
 #endif
 #define	AF_UNIX		1		/* standardized name for AF_LOCAL */
 #define	AF_INET		2		/* internetwork: UDP, TCP, etc. */
 #if __BSD_VISIBLE
 #define	AF_IMPLINK	3		/* arpanet imp addresses */
 #define	AF_PUP		4		/* pup protocols: e.g. BSP */
 #define	AF_CHAOS	5		/* mit CHAOS protocols */
 #define	AF_NETBIOS	6		/* SMB protocols */
 #define	AF_ISO		7		/* ISO protocols */
 #define	AF_OSI		AF_ISO
 #define	AF_ECMA		8		/* European computer manufacturers */
 #define	AF_DATAKIT	9		/* datakit protocols */
 #define	AF_CCITT	10		/* CCITT protocols, X.25 etc */
 #define	AF_SNA		11		/* IBM SNA */
 #define AF_DECnet	12		/* DECnet */
 #define AF_DLI		13		/* DEC Direct data link interface */
 #define AF_LAT		14		/* LAT */
 #define	AF_HYLINK	15		/* NSC Hyperchannel */
 #define	AF_APPLETALK	16		/* Apple Talk */
 #define	AF_ROUTE	17		/* Internal Routing Protocol */
 #define	AF_LINK		18		/* Link layer interface */
 #define	pseudo_AF_XTP	19		/* eXpress Transfer Protocol (no AF) */
 #define	AF_COIP		20		/* connection-oriented IP, aka ST II */
 #define	AF_CNT		21		/* Computer Network Technology */
 #define pseudo_AF_RTIP	22		/* Help Identify RTIP packets */
 #define	AF_IPX		23		/* Novell Internet Protocol */
 #define	AF_SIP		24		/* Simple Internet Protocol */
 #define	pseudo_AF_PIP	25		/* Help Identify PIP packets */
 #define	AF_ISDN		26		/* Integrated Services Digital Network*/
 #define	AF_E164		AF_ISDN		/* CCITT E.164 recommendation */
 #define	pseudo_AF_KEY	27		/* Internal key-management function */
 #endif
 #define	AF_INET6	28		/* IPv6 */
 #if __BSD_VISIBLE
 #define	AF_NATM		29		/* native ATM access */
 #define	AF_ATM		30		/* ATM */
 #define pseudo_AF_HDRCMPLT 31		/* Used by BPF to not rewrite headers
 					 * in interface output routine
 					 */
 #define	AF_NETGRAPH	32		/* Netgraph sockets */
 #define	AF_SLOW		33		/* 802.3ad slow protocol */
 #define	AF_SCLUSTER	34		/* Sitara cluster protocol */
 #define	AF_ARP		35
 #define	AF_BLUETOOTH	36		/* Bluetooth sockets */
 #define	AF_IEEE80211	37		/* IEEE 802.11 protocol */
 #define	AF_MAX		38
 /*
  * When allocating a new AF_ constant, please only allocate
  * even numbered constants for FreeBSD until 134 as odd numbered AF_
  * constants 39-133 are now reserved for vendors.
  */
 #define AF_VENDOR00 39
 #define AF_VENDOR01 41
 #define AF_VENDOR02 43
 #define AF_VENDOR03 45
 #define AF_VENDOR04 47
 #define AF_VENDOR05 49
 #define AF_VENDOR06 51
 #define AF_VENDOR07 53
 #define AF_VENDOR08 55
 #define AF_VENDOR09 57
 #define AF_VENDOR10 59
 #define AF_VENDOR11 61
 #define AF_VENDOR12 63
 #define AF_VENDOR13 65
 #define AF_VENDOR14 67
 #define AF_VENDOR15 69
 #define AF_VENDOR16 71
 #define AF_VENDOR17 73
 #define AF_VENDOR18 75
 #define AF_VENDOR19 77
 #define AF_VENDOR20 79
 #define AF_VENDOR21 81
 #define AF_VENDOR22 83
 #define AF_VENDOR23 85
 #define AF_VENDOR24 87
 #define AF_VENDOR25 89
 #define AF_VENDOR26 91
 #define AF_VENDOR27 93
 #define AF_VENDOR28 95
 #define AF_VENDOR29 97
 #define AF_VENDOR30 99
 #define AF_VENDOR31 101
 #define AF_VENDOR32 103
 #define AF_VENDOR33 105
 #define AF_VENDOR34 107
 #define AF_VENDOR35 109
 #define AF_VENDOR36 111
 #define AF_VENDOR37 113
 #define AF_VENDOR38 115
 #define AF_VENDOR39 117
 #define AF_VENDOR40 119
 #define AF_VENDOR41 121
 #define AF_VENDOR42 123
 #define AF_VENDOR43 125
 #define AF_VENDOR44 127
 #define AF_VENDOR45 129
 #define AF_VENDOR46 131
 #define AF_VENDOR47 133
 #endif
 
 /*
  * Structure used by kernel to store most
  * addresses.
  */
 struct sockaddr {
 	unsigned char	sa_len;		/* total length */
 	sa_family_t	sa_family;	/* address family */
 	char		sa_data[14];	/* actually longer; address value */
 };
 #if __BSD_VISIBLE
 #define	SOCK_MAXADDRLEN	255		/* longest possible addresses */
 
 /*
  * Structure used by kernel to pass protocol
  * information in raw sockets.
  */
 struct sockproto {
 	unsigned short	sp_family;		/* address family */
 	unsigned short	sp_protocol;		/* protocol */
 };
 #endif
 
 #ifndef	_STRUCT_SOCKADDR_STORAGE_DECLARED
 /*
  * RFC 2553: protocol-independent placeholder for socket addresses
  */
 #define	_SS_MAXSIZE	128U
 #define	_SS_ALIGNSIZE	(sizeof(__int64_t))
 #define	_SS_PAD1SIZE	(_SS_ALIGNSIZE - sizeof(unsigned char) - \
 			    sizeof(sa_family_t))
 #define	_SS_PAD2SIZE	(_SS_MAXSIZE - sizeof(unsigned char) - \
 			    sizeof(sa_family_t) - _SS_PAD1SIZE - _SS_ALIGNSIZE)
 
 struct sockaddr_storage {
 	unsigned char	ss_len;		/* address length */
 	sa_family_t	ss_family;	/* address family */
 	char		__ss_pad1[_SS_PAD1SIZE];
 	__int64_t	__ss_align;	/* force desired struct alignment */
 	char		__ss_pad2[_SS_PAD2SIZE];
 };
 #define	_STRUCT_SOCKADDR_STORAGE_DECLARED
 #endif
 
 #if __BSD_VISIBLE
 /*
  * Protocol families, same as address families for now.
  */
 #define	PF_UNSPEC	AF_UNSPEC
 #define	PF_LOCAL	AF_LOCAL
 #define	PF_UNIX		PF_LOCAL	/* backward compatibility */
 #define	PF_INET		AF_INET
 #define	PF_IMPLINK	AF_IMPLINK
 #define	PF_PUP		AF_PUP
 #define	PF_CHAOS	AF_CHAOS
 #define	PF_NETBIOS	AF_NETBIOS
 #define	PF_ISO		AF_ISO
 #define	PF_OSI		AF_ISO
 #define	PF_ECMA		AF_ECMA
 #define	PF_DATAKIT	AF_DATAKIT
 #define	PF_CCITT	AF_CCITT
 #define	PF_SNA		AF_SNA
 #define PF_DECnet	AF_DECnet
 #define PF_DLI		AF_DLI
 #define PF_LAT		AF_LAT
 #define	PF_HYLINK	AF_HYLINK
 #define	PF_APPLETALK	AF_APPLETALK
 #define	PF_ROUTE	AF_ROUTE
 #define	PF_LINK		AF_LINK
 #define	PF_XTP		pseudo_AF_XTP	/* really just proto family, no AF */
 #define	PF_COIP		AF_COIP
 #define	PF_CNT		AF_CNT
 #define	PF_SIP		AF_SIP
 #define	PF_IPX		AF_IPX
 #define PF_RTIP		pseudo_AF_RTIP	/* same format as AF_INET */
 #define PF_PIP		pseudo_AF_PIP
 #define	PF_ISDN		AF_ISDN
 #define	PF_KEY		pseudo_AF_KEY
 #define	PF_INET6	AF_INET6
 #define	PF_NATM		AF_NATM
 #define	PF_ATM		AF_ATM
 #define	PF_NETGRAPH	AF_NETGRAPH
 #define	PF_SLOW		AF_SLOW
 #define PF_SCLUSTER	AF_SCLUSTER
 #define	PF_ARP		AF_ARP
 #define	PF_BLUETOOTH	AF_BLUETOOTH
 
 #define	PF_MAX		AF_MAX
 
 /*
  * Definitions for network related sysctl, CTL_NET.
  *
  * Second level is protocol family.
  * Third level is protocol number.
  *
  * Further levels are defined by the individual families below.
  */
 #define NET_MAXID	AF_MAX
 
 #define CTL_NET_NAMES { \
 	{ 0, 0 }, \
 	{ "unix", CTLTYPE_NODE }, \
 	{ "inet", CTLTYPE_NODE }, \
 	{ "implink", CTLTYPE_NODE }, \
 	{ "pup", CTLTYPE_NODE }, \
 	{ "chaos", CTLTYPE_NODE }, \
 	{ "xerox_ns", CTLTYPE_NODE }, \
 	{ "iso", CTLTYPE_NODE }, \
 	{ "emca", CTLTYPE_NODE }, \
 	{ "datakit", CTLTYPE_NODE }, \
 	{ "ccitt", CTLTYPE_NODE }, \
 	{ "ibm_sna", CTLTYPE_NODE }, \
 	{ "decnet", CTLTYPE_NODE }, \
 	{ "dec_dli", CTLTYPE_NODE }, \
 	{ "lat", CTLTYPE_NODE }, \
 	{ "hylink", CTLTYPE_NODE }, \
 	{ "appletalk", CTLTYPE_NODE }, \
 	{ "route", CTLTYPE_NODE }, \
 	{ "link_layer", CTLTYPE_NODE }, \
 	{ "xtp", CTLTYPE_NODE }, \
 	{ "coip", CTLTYPE_NODE }, \
 	{ "cnt", CTLTYPE_NODE }, \
 	{ "rtip", CTLTYPE_NODE }, \
 	{ "ipx", CTLTYPE_NODE }, \
 	{ "sip", CTLTYPE_NODE }, \
 	{ "pip", CTLTYPE_NODE }, \
 	{ "isdn", CTLTYPE_NODE }, \
 	{ "key", CTLTYPE_NODE }, \
 	{ "inet6", CTLTYPE_NODE }, \
 	{ "natm", CTLTYPE_NODE }, \
 	{ "atm", CTLTYPE_NODE }, \
 	{ "hdrcomplete", CTLTYPE_NODE }, \
 	{ "netgraph", CTLTYPE_NODE }, \
 	{ "snp", CTLTYPE_NODE }, \
 	{ "scp", CTLTYPE_NODE }, \
 }
 
 /*
  * PF_ROUTE - Routing table
  *
  * Three additional levels are defined:
  *	Fourth: address family, 0 is wildcard
  *	Fifth: type of info, defined below
  *	Sixth: flag(s) to mask with for NET_RT_FLAGS
  */
 #define NET_RT_DUMP	1		/* dump; may limit to a.f. */
 #define NET_RT_FLAGS	2		/* by flags, e.g. RESOLVING */
 #define NET_RT_IFLIST	3		/* survey interface list */
 #define	NET_RT_IFMALIST	4		/* return multicast address list */
 #define	NET_RT_MAXID	5
 
 #define CTL_NET_RT_NAMES { \
 	{ 0, 0 }, \
 	{ "dump", CTLTYPE_STRUCT }, \
 	{ "flags", CTLTYPE_STRUCT }, \
 	{ "iflist", CTLTYPE_STRUCT }, \
 	{ "ifmalist", CTLTYPE_STRUCT }, \
 }
 #endif /* __BSD_VISIBLE */
 
 /*
  * Maximum queue length specifiable by listen.
  */
 #define	SOMAXCONN	128
 
 /*
  * Message header for recvmsg and sendmsg calls.
  * Used value-result for recvmsg, value only for sendmsg.
  */
 struct msghdr {
 	void		*msg_name;		/* optional address */
 	socklen_t	 msg_namelen;		/* size of address */
 	struct iovec	*msg_iov;		/* scatter/gather array */
 	int		 msg_iovlen;		/* # elements in msg_iov */
 	void		*msg_control;		/* ancillary data, see below */
 	socklen_t	 msg_controllen;	/* ancillary data buffer len */
 	int		 msg_flags;		/* flags on received message */
 };
 
 #define	MSG_OOB		0x1		/* process out-of-band data */
 #define	MSG_PEEK	0x2		/* peek at incoming message */
 #define	MSG_DONTROUTE	0x4		/* send without using routing tables */
 #define	MSG_EOR		0x8		/* data completes record */
 #define	MSG_TRUNC	0x10		/* data discarded before delivery */
 #define	MSG_CTRUNC	0x20		/* control data lost before delivery */
 #define	MSG_WAITALL	0x40		/* wait for full request or error */
 #define MSG_NOTIFICATION 0x2000         /* SCTP notification */
 #if __BSD_VISIBLE
 #define	MSG_DONTWAIT	0x80		/* this message should be nonblocking */
 #define	MSG_EOF		0x100		/* data completes connection */
 #define	MSG_NBIO	0x4000		/* FIONBIO mode, used by fifofs */
 #define	MSG_COMPAT      0x8000		/* used in sendit() */
 #endif
 #ifdef _KERNEL
 #define	MSG_SOCALLBCK   0x10000		/* for use by socket callbacks - soreceive (TCP) */
 #endif
 #if __BSD_VISIBLE
 #define	MSG_NOSIGNAL	0x20000		/* do not generate SIGPIPE on EOF */
 #endif
 
 /*
  * Header for ancillary data objects in msg_control buffer.
  * Used for additional information with/about a datagram
  * not expressible by flags.  The format is a sequence
  * of message elements headed by cmsghdr structures.
  */
 struct cmsghdr {
 	socklen_t	cmsg_len;		/* data byte count, including hdr */
 	int		cmsg_level;		/* originating protocol */
 	int		cmsg_type;		/* protocol-specific type */
 /* followed by	u_char  cmsg_data[]; */
 };
 
 #if __BSD_VISIBLE
 /*
  * While we may have more groups than this, the cmsgcred struct must
  * be able to fit in an mbuf, and NGROUPS_MAX is too large to allow
  * this.
 */
 #define CMGROUP_MAX 16
 
 /*
  * Credentials structure, used to verify the identity of a peer
  * process that has sent us a message. This is allocated by the
  * peer process but filled in by the kernel. This prevents the
  * peer from lying about its identity. (Note that cmcred_groups[0]
  * is the effective GID.)
  */
 struct cmsgcred {
 	pid_t	cmcred_pid;		/* PID of sending process */
 	uid_t	cmcred_uid;		/* real UID of sending process */
 	uid_t	cmcred_euid;		/* effective UID of sending process */
 	gid_t	cmcred_gid;		/* real GID of sending process */
 	short	cmcred_ngroups;		/* number or groups */
 	gid_t	cmcred_groups[CMGROUP_MAX];	/* groups */
 };
 
 /*
  * Socket credentials.
  */
 struct sockcred {
 	uid_t	sc_uid;			/* real user id */
 	uid_t	sc_euid;		/* effective user id */
 	gid_t	sc_gid;			/* real group id */
 	gid_t	sc_egid;		/* effective group id */
 	int	sc_ngroups;		/* number of supplemental groups */
 	gid_t	sc_groups[1];		/* variable length */
 };
 
 /*
  * Compute size of a sockcred structure with groups.
  */
 #define	SOCKCREDSIZE(ngrps) \
 	(sizeof(struct sockcred) + (sizeof(gid_t) * ((ngrps) - 1)))
 
 #endif /* __BSD_VISIBLE */
 
 /* given pointer to struct cmsghdr, return pointer to data */
 #define	CMSG_DATA(cmsg)		((unsigned char *)(cmsg) + \
 				 _ALIGN(sizeof(struct cmsghdr)))
 
 /* given pointer to struct cmsghdr, return pointer to next cmsghdr */
 #define	CMSG_NXTHDR(mhdr, cmsg)	\
 	((char *)(cmsg) == NULL ? CMSG_FIRSTHDR(mhdr) : \
 	    ((char *)(cmsg) + _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len) + \
 	  _ALIGN(sizeof(struct cmsghdr)) > \
 	    (char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \
 	    (struct cmsghdr *)0 : \
 	    (struct cmsghdr *)((char *)(cmsg) + \
 	    _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len)))
 
 /*
  * RFC 2292 requires to check msg_controllen, in case that the kernel returns
  * an empty list for some reasons.
  */
 #define	CMSG_FIRSTHDR(mhdr) \
 	((mhdr)->msg_controllen >= sizeof(struct cmsghdr) ? \
 	 (struct cmsghdr *)(mhdr)->msg_control : \
 	 (struct cmsghdr *)NULL)
 
 #if __BSD_VISIBLE
 /* RFC 2292 additions */
 #define	CMSG_SPACE(l)		(_ALIGN(sizeof(struct cmsghdr)) + _ALIGN(l))
 #define	CMSG_LEN(l)		(_ALIGN(sizeof(struct cmsghdr)) + (l))
 #endif
 
 #ifdef _KERNEL
 #define	CMSG_ALIGN(n)	_ALIGN(n)
 #endif
 
 /* "Socket"-level control message types: */
 #define	SCM_RIGHTS	0x01		/* access rights (array of int) */
 #if __BSD_VISIBLE
 #define	SCM_TIMESTAMP	0x02		/* timestamp (struct timeval) */
 #define	SCM_CREDS	0x03		/* process creds (struct cmsgcred) */
 #define	SCM_BINTIME	0x04		/* timestamp (struct bintime) */
 #endif
 
 #if __BSD_VISIBLE
 /*
  * 4.3 compat sockaddr, move to compat file later
  */
 struct osockaddr {
 	unsigned short sa_family;	/* address family */
 	char	sa_data[14];		/* up to 14 bytes of direct address */
 };
 
 /*
  * 4.3-compat message header (move to compat file later).
  */
 struct omsghdr {
 	char	*msg_name;		/* optional address */
 	int	msg_namelen;		/* size of address */
 	struct	iovec *msg_iov;		/* scatter/gather array */
 	int	msg_iovlen;		/* # elements in msg_iov */
 	char	*msg_accrights;		/* access rights sent/received */
 	int	msg_accrightslen;
 };
 #endif
 
 /*
  * howto arguments for shutdown(2), specified by Posix.1g.
  */
 #define	SHUT_RD		0		/* shut down the reading side */
 #define	SHUT_WR		1		/* shut down the writing side */
 #define	SHUT_RDWR	2		/* shut down both sides */
 
 /* we cheat and use the SHUT_XX defines for these */
 #define PRU_FLUSH_RD     SHUT_RD
 #define PRU_FLUSH_WR     SHUT_WR
 #define PRU_FLUSH_RDWR   SHUT_RDWR
 
 
 #if __BSD_VISIBLE
 /*
  * sendfile(2) header/trailer struct
  */
 struct sf_hdtr {
 	struct iovec *headers;	/* pointer to an array of header struct iovec's */
 	int hdr_cnt;		/* number of header iovec's */
 	struct iovec *trailers;	/* pointer to an array of trailer struct iovec's */
 	int trl_cnt;		/* number of trailer iovec's */
 };
 
 /*
  * Sendfile-specific flag(s)
  */
 #define	SF_NODISKIO     0x00000001
 #define	SF_MNOWAIT	0x00000002
 #define	SF_SYNC		0x00000004
 #endif
 
 #ifndef	_KERNEL
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	accept(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	bind(int, const struct sockaddr *, socklen_t);
 int	connect(int, const struct sockaddr *, socklen_t);
 int	getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	getsockopt(int, int, int, void * __restrict, socklen_t * __restrict);
 int	listen(int, int);
 ssize_t	recv(int, void *, size_t, int);
 ssize_t	recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
 ssize_t	recvmsg(int, struct msghdr *, int);
 ssize_t	send(int, const void *, size_t, int);
 ssize_t	sendto(int, const void *,
 	    size_t, int, const struct sockaddr *, socklen_t);
 ssize_t	sendmsg(int, const struct msghdr *, int);
 #if __BSD_VISIBLE
 int	sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int);
 #endif
 int	setsockopt(int, int, int, const void *, socklen_t);
 int	shutdown(int, int);
 int	sockatmark(int);
 int	socket(int, int, int);
 int	socketpair(int, int, int, int *);
 __END_DECLS
 
 #endif /* !_KERNEL */
 
 #endif /* !_SYS_SOCKET_H_ */
Index: head/sys/sys/socketvar.h
===================================================================
--- head/sys/sys/socketvar.h	(revision 178887)
+++ head/sys/sys/socketvar.h	(revision 178888)
@@ -1,587 +1,588 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_SOCKETVAR_H_
 #define _SYS_SOCKETVAR_H_
 
 #include <sys/queue.h>			/* for TAILQ macros */
 #include <sys/selinfo.h>		/* for struct selinfo */
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 
 /*
  * Kernel structure per socket.
  * Contains send and receive buffer queues,
  * handle on protocol and pointer to protocol
  * private data and error information.
  */
 typedef	u_quad_t so_gen_t;
 
 /*-
  * Locking key to struct socket:
  * (a) constant after allocation, no locking required.
  * (b) locked by SOCK_LOCK(so).
  * (c) locked by SOCKBUF_LOCK(&so->so_rcv).
  * (d) locked by SOCKBUF_LOCK(&so->so_snd).
  * (e) locked by ACCEPT_LOCK().
  * (f) not locked since integer reads/writes are atomic.
  * (g) used only as a sleep/wakeup address, no value.
  * (h) locked by global mutex so_global_mtx.
  */
 struct socket {
 	int	so_count;		/* (b) reference count */
 	short	so_type;		/* (a) generic type, see socket.h */
 	short	so_options;		/* from socket call, see socket.h */
 	short	so_linger;		/* time to linger while closing */
 	short	so_state;		/* (b) internal state flags SS_* */
 	int	so_qstate;		/* (e) internal state flags SQ_* */
 	void	*so_pcb;		/* protocol control block */
 	struct	protosw *so_proto;	/* (a) protocol handle */
 /*
  * Variables for connection queuing.
  * Socket where accepts occur is so_head in all subsidiary sockets.
  * If so_head is 0, socket is not related to an accept.
  * For head socket so_incomp queues partially completed connections,
  * while so_comp is a queue of connections ready to be accepted.
  * If a connection is aborted and it has so_head set, then
  * it has to be pulled out of either so_incomp or so_comp.
  * We allow connections to queue up based on current queue lengths
  * and limit on number of queued connections for this socket.
  */
 	struct	socket *so_head;	/* (e) back pointer to listen socket */
 	TAILQ_HEAD(, socket) so_incomp;	/* (e) queue of partial unaccepted connections */
 	TAILQ_HEAD(, socket) so_comp;	/* (e) queue of complete unaccepted connections */
 	TAILQ_ENTRY(socket) so_list;	/* (e) list of unaccepted connections */
 	u_short	so_qlen;		/* (e) number of unaccepted connections */
 	u_short	so_incqlen;		/* (e) number of unaccepted incomplete
 					   connections */
 	u_short	so_qlimit;		/* (e) max number queued connections */
 	short	so_timeo;		/* (g) connection timeout */
 	u_short	so_error;		/* (f) error affecting connection */
 	struct	sigio *so_sigio;	/* [sg] information for async I/O or
 					   out of band data (SIGURG) */
 	u_long	so_oobmark;		/* (c) chars to oob mark */
 	TAILQ_HEAD(, aiocblist) so_aiojobq; /* AIO ops waiting on socket */
 /*
  * Variables for socket buffering.
  */
 	struct sockbuf {
 		struct	selinfo sb_sel;	/* process selecting read/write */
 		struct	mtx sb_mtx;	/* sockbuf lock */
 		struct	sx sb_sx;	/* prevent I/O interlacing */
 		short	sb_state;	/* (c/d) socket state on sockbuf */
 #define	sb_startzero	sb_mb
 		struct	mbuf *sb_mb;	/* (c/d) the mbuf chain */
 		struct	mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */
 		struct	mbuf *sb_lastrecord;	/* (c/d) first mbuf of last
 						 * record in socket buffer */
 		struct	mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */
 		u_int	sb_sndptroff;	/* (c/d) byte offset of ptr into chain */
 		u_int	sb_cc;		/* (c/d) actual chars in buffer */
 		u_int	sb_hiwat;	/* (c/d) max actual char count */
 		u_int	sb_mbcnt;	/* (c/d) chars of mbufs used */
 		u_int	sb_mbmax;	/* (c/d) max chars of mbufs to use */
 		u_int	sb_ctl;		/* (c/d) non-data chars in buffer */
 		int	sb_lowat;	/* (c/d) low water mark */
 		int	sb_timeo;	/* (c/d) timeout for read/write */
 		short	sb_flags;	/* (c/d) flags, see below */
 	} so_rcv, so_snd;
 /*
  * Constants for sb_flags field of struct sockbuf.
  */
 #define	SB_MAX		(256*1024)	/* default for max chars in sockbuf */
 /*
  * Constants for sb_flags field of struct sockbuf.
  */
 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
 #define	SB_SEL		0x08		/* someone is selecting */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
 #define	SB_NOINTR	0x40		/* operations not interruptible */
 #define	SB_AIO		0x80		/* AIO operations queued */
 #define	SB_KNOTE	0x100		/* kernel note attached */
 #define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
 #define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
 
 	void	(*so_upcall)(struct socket *, void *, int);
 	void	*so_upcallarg;
 	struct	ucred *so_cred;		/* (a) user credentials */
 	struct	label *so_label;	/* (b) MAC label for socket */
 	struct	label *so_peerlabel;	/* (b) cached MAC label for peer */
 	/* NB: generation count must not be first. */
 	so_gen_t so_gencnt;		/* (h) generation count */
 	void	*so_emuldata;		/* (b) private data for emulators */
  	struct so_accf {
 		struct	accept_filter *so_accept_filter;
 		void	*so_accept_filter_arg;	/* saved filter args */
 		char	*so_accept_filter_str;	/* saved user args */
 	} *so_accf;
+	int so_fibnum;		/* routing domain for this socket */
 };
 
 #define SB_EMPTY_FIXUP(sb) do {						\
 	if ((sb)->sb_mb == NULL) {					\
 		(sb)->sb_mbtail = NULL;					\
 		(sb)->sb_lastrecord = NULL;				\
 	}								\
 } while (/*CONSTCOND*/0)
 
 /*
  * Global accept mutex to serialize access to accept queues and
  * fields associated with multiple sockets.  This allows us to
  * avoid defining a lock order between listen and accept sockets
  * until such time as it proves to be a good idea.
  */
 extern struct mtx accept_mtx;
 #define	ACCEPT_LOCK_ASSERT()		mtx_assert(&accept_mtx, MA_OWNED)
 #define	ACCEPT_UNLOCK_ASSERT()		mtx_assert(&accept_mtx, MA_NOTOWNED)
 #define	ACCEPT_LOCK()			mtx_lock(&accept_mtx)
 #define	ACCEPT_UNLOCK()			mtx_unlock(&accept_mtx)
 
 /*
  * Per-socket buffer mutex used to protect most fields in the socket
  * buffer.
  */
 #define	SOCKBUF_MTX(_sb)		(&(_sb)->sb_mtx)
 #define	SOCKBUF_LOCK_INIT(_sb, _name) \
 	mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF)
 #define	SOCKBUF_LOCK_DESTROY(_sb)	mtx_destroy(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
 #define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
 
 /*
  * Per-socket mutex: we reuse the receive socket buffer mutex for space
  * efficiency.  This decision should probably be revisited as we optimize
  * locking for the socket code.
  */
 #define	SOCK_MTX(_so)			SOCKBUF_MTX(&(_so)->so_rcv)
 #define	SOCK_LOCK(_so)			SOCKBUF_LOCK(&(_so)->so_rcv)
 #define	SOCK_OWNED(_so)			SOCKBUF_OWNED(&(_so)->so_rcv)
 #define	SOCK_UNLOCK(_so)		SOCKBUF_UNLOCK(&(_so)->so_rcv)
 #define	SOCK_LOCK_ASSERT(_so)		SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
 
 /*
  * Socket state bits.
  *
  * Historically, this bits were all kept in the so_state field.  For
  * locking reasons, they are now in multiple fields, as they are
  * locked differently.  so_state maintains basic socket state protected
  * by the socket lock.  so_qstate holds information about the socket
  * accept queues.  Each socket buffer also has a state field holding
  * information relevant to that socket buffer (can't send, rcv).  Many
  * fields will be read without locks to improve performance and avoid
  * lock order issues.  However, this approach must be used with caution.
  */
 #define	SS_NOFDREF		0x0001	/* no file table ref any more */
 #define	SS_ISCONNECTED		0x0002	/* socket connected to a peer */
 #define	SS_ISCONNECTING		0x0004	/* in process of connecting to peer */
 #define	SS_ISDISCONNECTING	0x0008	/* in process of disconnecting */
 #define	SS_NBIO			0x0100	/* non-blocking ops */
 #define	SS_ASYNC		0x0200	/* async i/o notify */
 #define	SS_ISCONFIRMING		0x0400	/* deciding to accept connection req */
 #define	SS_ISDISCONNECTED	0x2000	/* socket disconnected from peer */
 /*
  * Protocols can mark a socket as SS_PROTOREF to indicate that, following
  * pru_detach, they still want the socket to persist, and will free it
  * themselves when they are done.  Protocols should only ever call sofree()
  * following setting this flag in pru_detach(), and never otherwise, as
  * sofree() bypasses socket reference counting.
  */
 #define	SS_PROTOREF		0x4000	/* strong protocol reference */
 
 /*
  * Socket state bits now stored in the socket buffer state field.
  */
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 /*
  * Socket state bits stored in so_qstate.
  */
 #define	SQ_INCOMP		0x0800	/* unaccepted, incomplete connection */
 #define	SQ_COMP			0x1000	/* unaccepted, complete connection */
 
 /*
  * Externalized form of struct socket used by the sysctl(3) interface.
  */
 struct xsocket {
 	size_t	xso_len;	/* length of this structure */
 	struct	socket *xso_so;	/* makes a convenient handle sometimes */
 	short	so_type;
 	short	so_options;
 	short	so_linger;
 	short	so_state;
 	caddr_t	so_pcb;		/* another convenient handle */
 	int	xso_protocol;
 	int	xso_family;
 	u_short	so_qlen;
 	u_short	so_incqlen;
 	u_short	so_qlimit;
 	short	so_timeo;
 	u_short	so_error;
 	pid_t	so_pgid;
 	u_long	so_oobmark;
 	struct xsockbuf {
 		u_int	sb_cc;
 		u_int	sb_hiwat;
 		u_int	sb_mbcnt;
 		u_int	sb_mbmax;
 		int	sb_lowat;
 		int	sb_timeo;
 		short	sb_flags;
 	} so_rcv, so_snd;
 	uid_t	so_uid;		/* XXX */
 };
 
 #ifdef _KERNEL
 
 /*
  * Macros for sockets and socket buffering.
  */
 
 /*
  * Flags to sblock().
  */
 #define	SBL_WAIT	0x00000001	/* Wait if not immediately available. */
 #define	SBL_NOINTR	0x00000002	/* Force non-interruptible sleep. */
 #define	SBL_VALID	(SBL_WAIT | SBL_NOINTR)
 
 /*
  * Do we need to notify the other side when I/O is possible?
  */
 #define	sb_notify(sb)	(((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \
     SB_UPCALL | SB_AIO | SB_KNOTE)) != 0)
 
 /*
  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
  * This is problematical if the fields are unsigned, as the space might
  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
  * overflow and return 0.  Should use "lmin" but it doesn't exist now.
  */
 #define	sbspace(sb) \
     ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \
 	 (int)((sb)->sb_mbmax - (sb)->sb_mbcnt)))
 
 /* do we have to send all at once on a socket? */
 #define	sosendallatonce(so) \
     ((so)->so_proto->pr_flags & PR_ATOMIC)
 
 /* can we read something from so? */
 #define	soreadable(so) \
     ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
 	((so)->so_rcv.sb_state & SBS_CANTRCVMORE) || \
 	!TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
 
 /* can we write something to so? */
 #define	sowriteable(so) \
     ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
 	(((so)->so_state&SS_ISCONNECTED) || \
 	  ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
      ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \
      (so)->so_error)
 
 /* adjust counters in sb reflecting allocation of m */
 #define	sballoc(sb, m) { \
 	(sb)->sb_cc += (m)->m_len; \
 	if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
 		(sb)->sb_ctl += (m)->m_len; \
 	(sb)->sb_mbcnt += MSIZE; \
 	if ((m)->m_flags & M_EXT) \
 		(sb)->sb_mbcnt += (m)->m_ext.ext_size; \
 }
 
 /* adjust counters in sb reflecting freeing of m */
 #define	sbfree(sb, m) { \
 	(sb)->sb_cc -= (m)->m_len; \
 	if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
 		(sb)->sb_ctl -= (m)->m_len; \
 	(sb)->sb_mbcnt -= MSIZE; \
 	if ((m)->m_flags & M_EXT) \
 		(sb)->sb_mbcnt -= (m)->m_ext.ext_size; \
 	if ((sb)->sb_sndptr == (m)) { \
 		(sb)->sb_sndptr = NULL; \
 		(sb)->sb_sndptroff = 0; \
 	} \
 	if ((sb)->sb_sndptroff != 0) \
 		(sb)->sb_sndptroff -= (m)->m_len; \
 }
 
 /*
  * soref()/sorele() ref-count the socket structure.  Note that you must
  * still explicitly close the socket, but the last ref count will free
  * the structure.
  */
 #define	soref(so) do {							\
 	SOCK_LOCK_ASSERT(so);						\
 	++(so)->so_count;						\
 } while (0)
 
 #define	sorele(so) do {							\
 	ACCEPT_LOCK_ASSERT();						\
 	SOCK_LOCK_ASSERT(so);						\
 	if ((so)->so_count <= 0)					\
 		panic("sorele");					\
 	if (--(so)->so_count == 0)					\
 		sofree(so);						\
 	else {								\
 		SOCK_UNLOCK(so);					\
 		ACCEPT_UNLOCK();					\
 	}								\
 } while (0)
 
 #define	sotryfree(so) do {						\
 	ACCEPT_LOCK_ASSERT();						\
 	SOCK_LOCK_ASSERT(so);						\
 	if ((so)->so_count == 0)					\
 		sofree(so);						\
 	else {								\
 		SOCK_UNLOCK(so);					\
 		ACCEPT_UNLOCK();					\
 	}								\
 } while(0)
 
 /*
  * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
  * avoid a non-atomic test-and-wakeup.  However, sowakeup is
  * responsible for releasing the lock if it is called.  We unlock only
  * if we don't call into sowakeup.  If any code is introduced that
  * directly invokes the underlying sowakeup() primitives, it must
  * maintain the same semantics.
  */
 #define	sorwakeup_locked(so) do {					\
 	SOCKBUF_LOCK_ASSERT(&(so)->so_rcv);				\
 	if (sb_notify(&(so)->so_rcv))					\
 		sowakeup((so), &(so)->so_rcv);	 			\
 	else								\
 		SOCKBUF_UNLOCK(&(so)->so_rcv);				\
 } while (0)
 
 #define	sorwakeup(so) do {						\
 	SOCKBUF_LOCK(&(so)->so_rcv);					\
 	sorwakeup_locked(so);						\
 } while (0)
 
 #define	sowwakeup_locked(so) do {					\
 	SOCKBUF_LOCK_ASSERT(&(so)->so_snd);				\
 	if (sb_notify(&(so)->so_snd))					\
 		sowakeup((so), &(so)->so_snd); 				\
 	else								\
 		SOCKBUF_UNLOCK(&(so)->so_snd);				\
 } while (0)
 
 #define	sowwakeup(so) do {						\
 	SOCKBUF_LOCK(&(so)->so_snd);					\
 	sowwakeup_locked(so);						\
 } while (0)
 
 /*
  * Argument structure for sosetopt et seq.  This is in the KERNEL
  * section because it will never be visible to user code.
  */
 enum sopt_dir { SOPT_GET, SOPT_SET };
 struct sockopt {
 	enum	sopt_dir sopt_dir; /* is this a get or a set? */
 	int	sopt_level;	/* second arg of [gs]etsockopt */
 	int	sopt_name;	/* third arg of [gs]etsockopt */
 	void   *sopt_val;	/* fourth arg of [gs]etsockopt */
 	size_t	sopt_valsize;	/* (almost) fifth arg of [gs]etsockopt */
 	struct	thread *sopt_td; /* calling thread or null if kernel */
 };
 
 struct accept_filter {
 	char	accf_name[16];
 	void	(*accf_callback)
 		(struct socket *so, void *arg, int waitflag);
 	void *	(*accf_create)
 		(struct socket *so, char *arg);
 	void	(*accf_destroy)
 		(struct socket *so);
 	SLIST_ENTRY(accept_filter) accf_next;
 };
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_ACCF);
 MALLOC_DECLARE(M_PCB);
 MALLOC_DECLARE(M_SONAME);
 #endif
 
 extern int	maxsockets;
 extern u_long	sb_max;
 extern struct uma_zone *socket_zone;
 extern so_gen_t so_gencnt;
 
 struct mbuf;
 struct sockaddr;
 struct ucred;
 struct uio;
 
 /*
  * From uipc_socket and friends
  */
 int	do_getopt_accept_filter(struct socket *so, struct sockopt *sopt);
 int	do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
 int	so_setsockopt(struct socket *so, int level, int optname,
 	    void *optval, size_t optlen);
 int	sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type);
 int	getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
 void	sbappend(struct sockbuf *sb, struct mbuf *m);
 void	sbappend_locked(struct sockbuf *sb, struct mbuf *m);
 void	sbappendstream(struct sockbuf *sb, struct mbuf *m);
 void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m);
 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control);
 int	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control);
 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
 void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
 void	sbcheck(struct sockbuf *sb);
 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
 	sbcreatecontrol(caddr_t p, int size, int type, int level);
 void	sbdestroy(struct sockbuf *sb, struct socket *so);
 void	sbdrop(struct sockbuf *sb, int len);
 void	sbdrop_locked(struct sockbuf *sb, int len);
 void	sbdroprecord(struct sockbuf *sb);
 void	sbdroprecord_locked(struct sockbuf *sb);
 void	sbflush(struct sockbuf *sb);
 void	sbflush_locked(struct sockbuf *sb);
 void	sbrelease(struct sockbuf *sb, struct socket *so);
 void	sbrelease_internal(struct sockbuf *sb, struct socket *so);
 void	sbrelease_locked(struct sockbuf *sb, struct socket *so);
 int	sbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
 	    struct thread *td);
 int	sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
 	    struct thread *td);
 struct mbuf *
 	sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff);
 void	sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
 int	sbwait(struct sockbuf *sb);
 int	sblock(struct sockbuf *sb, int flags);
 void	sbunlock(struct sockbuf *sb);
 void	soabort(struct socket *so);
 int	soaccept(struct socket *so, struct sockaddr **nam);
 int	socheckuid(struct socket *so, uid_t uid);
 int	sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
 void	socantrcvmore(struct socket *so);
 void	socantrcvmore_locked(struct socket *so);
 void	socantsendmore(struct socket *so);
 void	socantsendmore_locked(struct socket *so);
 int	soclose(struct socket *so);
 int	soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
 int	soconnect2(struct socket *so1, struct socket *so2);
 int	socow_setup(struct mbuf *m0, struct uio *uio);
 int	socreate(int dom, struct socket **aso, int type, int proto,
 	    struct ucred *cred, struct thread *td);
 int	sodisconnect(struct socket *so);
 struct	sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
 void	sofree(struct socket *so);
 int	sogetopt(struct socket *so, struct sockopt *sopt);
 void	sohasoutofband(struct socket *so);
 void	soisconnected(struct socket *so);
 void	soisconnecting(struct socket *so);
 void	soisdisconnected(struct socket *so);
 void	soisdisconnecting(struct socket *so);
 int	solisten(struct socket *so, int backlog, struct thread *td);
 void	solisten_proto(struct socket *so, int backlog);
 int	solisten_proto_check(struct socket *so);
 struct socket *
 	sonewconn(struct socket *head, int connstatus);
 int	sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen);
 int	sooptcopyout(struct sockopt *sopt, const void *buf, size_t len);
 
 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
 int	soopt_getm(struct sockopt *sopt, struct mbuf **mp);
 int	soopt_mcopyin(struct sockopt *sopt, struct mbuf *m);
 int	soopt_mcopyout(struct sockopt *sopt, struct mbuf *m);
 
 int	sopoll(struct socket *so, int events, struct ucred *active_cred,
 	    struct thread *td);
 int	sopoll_generic(struct socket *so, int events,
 	    struct ucred *active_cred, struct thread *td);
 int	soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
 	    struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
 int	soreceive_generic(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
 void	sorflush(struct socket *so);
 int	sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	    struct mbuf *top, struct mbuf *control, int flags,
 	    struct thread *td);
 int	sosend_dgram(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);
 int	sosend_generic(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);
 int	sosetopt(struct socket *so, struct sockopt *sopt);
 int	soshutdown(struct socket *so, int how);
 void	sotoxsocket(struct socket *so, struct xsocket *xso);
 void	sowakeup(struct socket *so, struct sockbuf *sb);
 int	selsocket(struct socket *so, int events, struct timeval *tv,
 	    struct thread *td);
 
 #ifdef SOCKBUF_DEBUG
 void	sblastrecordchk(struct sockbuf *, const char *, int);
 #define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
 
 void	sblastmbufchk(struct sockbuf *, const char *, int);
 #define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
 #else
 #define	SBLASTRECORDCHK(sb)      /* nothing */
 #define	SBLASTMBUFCHK(sb)        /* nothing */
 #endif /* SOCKBUF_DEBUG */
 
 /*
  * Accept filter functions (duh).
  */
 int	accept_filt_add(struct accept_filter *filt);
 int	accept_filt_del(char *name);
 struct	accept_filter *accept_filt_get(char *name);
 #ifdef ACCEPT_FILTER_MOD
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_accf);
 #endif
 int	accept_filt_generic_mod_event(module_t mod, int event, void *data);
 #endif
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_SOCKETVAR_H_ */
Index: head/sys/sys/syscall.h
===================================================================
--- head/sys/sys/syscall.h	(revision 178887)
+++ head/sys/sys/syscall.h	(revision 178888)
@@ -1,421 +1,422 @@
 /*
  * System call numbers.
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
  * created from FreeBSD: src/sys/kern/syscalls.master,v 1.242 2008/03/31 12:06:55 kib Exp 
  */
 
 #define	SYS_syscall	0
 #define	SYS_exit	1
 #define	SYS_fork	2
 #define	SYS_read	3
 #define	SYS_write	4
 #define	SYS_open	5
 #define	SYS_close	6
 #define	SYS_wait4	7
 				/* 8 is old creat */
 #define	SYS_link	9
 #define	SYS_unlink	10
 				/* 11 is obsolete execv */
 #define	SYS_chdir	12
 #define	SYS_fchdir	13
 #define	SYS_mknod	14
 #define	SYS_chmod	15
 #define	SYS_chown	16
 #define	SYS_break	17
 #define	SYS_freebsd4_getfsstat	18
 				/* 19 is old lseek */
 #define	SYS_getpid	20
 #define	SYS_mount	21
 #define	SYS_unmount	22
 #define	SYS_setuid	23
 #define	SYS_getuid	24
 #define	SYS_geteuid	25
 #define	SYS_ptrace	26
 #define	SYS_recvmsg	27
 #define	SYS_sendmsg	28
 #define	SYS_recvfrom	29
 #define	SYS_accept	30
 #define	SYS_getpeername	31
 #define	SYS_getsockname	32
 #define	SYS_access	33
 #define	SYS_chflags	34
 #define	SYS_fchflags	35
 #define	SYS_sync	36
 #define	SYS_kill	37
 				/* 38 is old stat */
 #define	SYS_getppid	39
 				/* 40 is old lstat */
 #define	SYS_dup	41
 #define	SYS_pipe	42
 #define	SYS_getegid	43
 #define	SYS_profil	44
 #define	SYS_ktrace	45
 				/* 46 is old sigaction */
 #define	SYS_getgid	47
 				/* 48 is old sigprocmask */
 #define	SYS_getlogin	49
 #define	SYS_setlogin	50
 #define	SYS_acct	51
 				/* 52 is old sigpending */
 #define	SYS_sigaltstack	53
 #define	SYS_ioctl	54
 #define	SYS_reboot	55
 #define	SYS_revoke	56
 #define	SYS_symlink	57
 #define	SYS_readlink	58
 #define	SYS_execve	59
 #define	SYS_umask	60
 #define	SYS_chroot	61
 				/* 62 is old fstat */
 				/* 63 is old getkerninfo */
 				/* 64 is old getpagesize */
 #define	SYS_msync	65
 #define	SYS_vfork	66
 				/* 67 is obsolete vread */
 				/* 68 is obsolete vwrite */
 #define	SYS_sbrk	69
 #define	SYS_sstk	70
 				/* 71 is old mmap */
 #define	SYS_vadvise	72
 #define	SYS_munmap	73
 #define	SYS_mprotect	74
 #define	SYS_madvise	75
 				/* 76 is obsolete vhangup */
 				/* 77 is obsolete vlimit */
 #define	SYS_mincore	78
 #define	SYS_getgroups	79
 #define	SYS_setgroups	80
 #define	SYS_getpgrp	81
 #define	SYS_setpgid	82
 #define	SYS_setitimer	83
 				/* 84 is old wait */
 #define	SYS_swapon	85
 #define	SYS_getitimer	86
 				/* 87 is old gethostname */
 				/* 88 is old sethostname */
 #define	SYS_getdtablesize	89
 #define	SYS_dup2	90
 #define	SYS_fcntl	92
 #define	SYS_select	93
 #define	SYS_fsync	95
 #define	SYS_setpriority	96
 #define	SYS_socket	97
 #define	SYS_connect	98
 				/* 99 is old accept */
 #define	SYS_getpriority	100
 				/* 101 is old send */
 				/* 102 is old recv */
 				/* 103 is old sigreturn */
 #define	SYS_bind	104
 #define	SYS_setsockopt	105
 #define	SYS_listen	106
 				/* 107 is obsolete vtimes */
 				/* 108 is old sigvec */
 				/* 109 is old sigblock */
 				/* 110 is old sigsetmask */
 				/* 111 is old sigsuspend */
 				/* 112 is old sigstack */
 				/* 113 is old recvmsg */
 				/* 114 is old sendmsg */
 				/* 115 is obsolete vtrace */
 #define	SYS_gettimeofday	116
 #define	SYS_getrusage	117
 #define	SYS_getsockopt	118
 #define	SYS_readv	120
 #define	SYS_writev	121
 #define	SYS_settimeofday	122
 #define	SYS_fchown	123
 #define	SYS_fchmod	124
 				/* 125 is old recvfrom */
 #define	SYS_setreuid	126
 #define	SYS_setregid	127
 #define	SYS_rename	128
 				/* 129 is old truncate */
 				/* 130 is old ftruncate */
 #define	SYS_flock	131
 #define	SYS_mkfifo	132
 #define	SYS_sendto	133
 #define	SYS_shutdown	134
 #define	SYS_socketpair	135
 #define	SYS_mkdir	136
 #define	SYS_rmdir	137
 #define	SYS_utimes	138
 				/* 139 is obsolete 4.2 sigreturn */
 #define	SYS_adjtime	140
 				/* 141 is old getpeername */
 				/* 142 is old gethostid */
 				/* 143 is old sethostid */
 				/* 144 is old getrlimit */
 				/* 145 is old setrlimit */
 				/* 146 is old killpg */
 #define	SYS_setsid	147
 #define	SYS_quotactl	148
 				/* 149 is old quota */
 				/* 150 is old getsockname */
 #define	SYS_nlm_syscall	154
 #define	SYS_nfssvc	155
 				/* 156 is old getdirentries */
 #define	SYS_freebsd4_statfs	157
 #define	SYS_freebsd4_fstatfs	158
 #define	SYS_lgetfh	160
 #define	SYS_getfh	161
 #define	SYS_getdomainname	162
 #define	SYS_setdomainname	163
 #define	SYS_uname	164
 #define	SYS_sysarch	165
 #define	SYS_rtprio	166
 #define	SYS_semsys	169
 #define	SYS_msgsys	170
 #define	SYS_shmsys	171
 #define	SYS_freebsd6_pread	173
 #define	SYS_freebsd6_pwrite	174
+#define	SYS_setfib	175
 #define	SYS_ntp_adjtime	176
 #define	SYS_setgid	181
 #define	SYS_setegid	182
 #define	SYS_seteuid	183
 #define	SYS_stat	188
 #define	SYS_fstat	189
 #define	SYS_lstat	190
 #define	SYS_pathconf	191
 #define	SYS_fpathconf	192
 #define	SYS_getrlimit	194
 #define	SYS_setrlimit	195
 #define	SYS_getdirentries	196
 #define	SYS_freebsd6_mmap	197
 #define	SYS___syscall	198
 #define	SYS_freebsd6_lseek	199
 #define	SYS_freebsd6_truncate	200
 #define	SYS_freebsd6_ftruncate	201
 #define	SYS___sysctl	202
 #define	SYS_mlock	203
 #define	SYS_munlock	204
 #define	SYS_undelete	205
 #define	SYS_futimes	206
 #define	SYS_getpgid	207
 #define	SYS_poll	209
 #define	SYS___semctl	220
 #define	SYS_semget	221
 #define	SYS_semop	222
 #define	SYS_msgctl	224
 #define	SYS_msgget	225
 #define	SYS_msgsnd	226
 #define	SYS_msgrcv	227
 #define	SYS_shmat	228
 #define	SYS_shmctl	229
 #define	SYS_shmdt	230
 #define	SYS_shmget	231
 #define	SYS_clock_gettime	232
 #define	SYS_clock_settime	233
 #define	SYS_clock_getres	234
 #define	SYS_ktimer_create	235
 #define	SYS_ktimer_delete	236
 #define	SYS_ktimer_settime	237
 #define	SYS_ktimer_gettime	238
 #define	SYS_ktimer_getoverrun	239
 #define	SYS_nanosleep	240
 #define	SYS_ntp_gettime	248
 #define	SYS_minherit	250
 #define	SYS_rfork	251
 #define	SYS_openbsd_poll	252
 #define	SYS_issetugid	253
 #define	SYS_lchown	254
 #define	SYS_aio_read	255
 #define	SYS_aio_write	256
 #define	SYS_lio_listio	257
 #define	SYS_getdents	272
 #define	SYS_lchmod	274
 #define	SYS_netbsd_lchown	275
 #define	SYS_lutimes	276
 #define	SYS_netbsd_msync	277
 #define	SYS_nstat	278
 #define	SYS_nfstat	279
 #define	SYS_nlstat	280
 #define	SYS_preadv	289
 #define	SYS_pwritev	290
 #define	SYS_freebsd4_fhstatfs	297
 #define	SYS_fhopen	298
 #define	SYS_fhstat	299
 #define	SYS_modnext	300
 #define	SYS_modstat	301
 #define	SYS_modfnext	302
 #define	SYS_modfind	303
 #define	SYS_kldload	304
 #define	SYS_kldunload	305
 #define	SYS_kldfind	306
 #define	SYS_kldnext	307
 #define	SYS_kldstat	308
 #define	SYS_kldfirstmod	309
 #define	SYS_getsid	310
 #define	SYS_setresuid	311
 #define	SYS_setresgid	312
 				/* 313 is obsolete signanosleep */
 #define	SYS_aio_return	314
 #define	SYS_aio_suspend	315
 #define	SYS_aio_cancel	316
 #define	SYS_aio_error	317
 #define	SYS_oaio_read	318
 #define	SYS_oaio_write	319
 #define	SYS_olio_listio	320
 #define	SYS_yield	321
 				/* 322 is obsolete thr_sleep */
 				/* 323 is obsolete thr_wakeup */
 #define	SYS_mlockall	324
 #define	SYS_munlockall	325
 #define	SYS___getcwd	326
 #define	SYS_sched_setparam	327
 #define	SYS_sched_getparam	328
 #define	SYS_sched_setscheduler	329
 #define	SYS_sched_getscheduler	330
 #define	SYS_sched_yield	331
 #define	SYS_sched_get_priority_max	332
 #define	SYS_sched_get_priority_min	333
 #define	SYS_sched_rr_get_interval	334
 #define	SYS_utrace	335
 #define	SYS_freebsd4_sendfile	336
 #define	SYS_kldsym	337
 #define	SYS_jail	338
 #define	SYS_sigprocmask	340
 #define	SYS_sigsuspend	341
 #define	SYS_freebsd4_sigaction	342
 #define	SYS_sigpending	343
 #define	SYS_freebsd4_sigreturn	344
 #define	SYS_sigtimedwait	345
 #define	SYS_sigwaitinfo	346
 #define	SYS___acl_get_file	347
 #define	SYS___acl_set_file	348
 #define	SYS___acl_get_fd	349
 #define	SYS___acl_set_fd	350
 #define	SYS___acl_delete_file	351
 #define	SYS___acl_delete_fd	352
 #define	SYS___acl_aclcheck_file	353
 #define	SYS___acl_aclcheck_fd	354
 #define	SYS_extattrctl	355
 #define	SYS_extattr_set_file	356
 #define	SYS_extattr_get_file	357
 #define	SYS_extattr_delete_file	358
 #define	SYS_aio_waitcomplete	359
 #define	SYS_getresuid	360
 #define	SYS_getresgid	361
 #define	SYS_kqueue	362
 #define	SYS_kevent	363
 #define	SYS_extattr_set_fd	371
 #define	SYS_extattr_get_fd	372
 #define	SYS_extattr_delete_fd	373
 #define	SYS___setugid	374
 #define	SYS_nfsclnt	375
 #define	SYS_eaccess	376
 #define	SYS_nmount	378
 #define	SYS___mac_get_proc	384
 #define	SYS___mac_set_proc	385
 #define	SYS___mac_get_fd	386
 #define	SYS___mac_get_file	387
 #define	SYS___mac_set_fd	388
 #define	SYS___mac_set_file	389
 #define	SYS_kenv	390
 #define	SYS_lchflags	391
 #define	SYS_uuidgen	392
 #define	SYS_sendfile	393
 #define	SYS_mac_syscall	394
 #define	SYS_getfsstat	395
 #define	SYS_statfs	396
 #define	SYS_fstatfs	397
 #define	SYS_fhstatfs	398
 #define	SYS_ksem_close	400
 #define	SYS_ksem_post	401
 #define	SYS_ksem_wait	402
 #define	SYS_ksem_trywait	403
 #define	SYS_ksem_init	404
 #define	SYS_ksem_open	405
 #define	SYS_ksem_unlink	406
 #define	SYS_ksem_getvalue	407
 #define	SYS_ksem_destroy	408
 #define	SYS___mac_get_pid	409
 #define	SYS___mac_get_link	410
 #define	SYS___mac_set_link	411
 #define	SYS_extattr_set_link	412
 #define	SYS_extattr_get_link	413
 #define	SYS_extattr_delete_link	414
 #define	SYS___mac_execve	415
 #define	SYS_sigaction	416
 #define	SYS_sigreturn	417
 #define	SYS_getcontext	421
 #define	SYS_setcontext	422
 #define	SYS_swapcontext	423
 #define	SYS_swapoff	424
 #define	SYS___acl_get_link	425
 #define	SYS___acl_set_link	426
 #define	SYS___acl_delete_link	427
 #define	SYS___acl_aclcheck_link	428
 #define	SYS_sigwait	429
 #define	SYS_thr_create	430
 #define	SYS_thr_exit	431
 #define	SYS_thr_self	432
 #define	SYS_thr_kill	433
 #define	SYS__umtx_lock	434
 #define	SYS__umtx_unlock	435
 #define	SYS_jail_attach	436
 #define	SYS_extattr_list_fd	437
 #define	SYS_extattr_list_file	438
 #define	SYS_extattr_list_link	439
 #define	SYS_ksem_timedwait	441
 #define	SYS_thr_suspend	442
 #define	SYS_thr_wake	443
 #define	SYS_kldunloadf	444
 #define	SYS_audit	445
 #define	SYS_auditon	446
 #define	SYS_getauid	447
 #define	SYS_setauid	448
 #define	SYS_getaudit	449
 #define	SYS_setaudit	450
 #define	SYS_getaudit_addr	451
 #define	SYS_setaudit_addr	452
 #define	SYS_auditctl	453
 #define	SYS__umtx_op	454
 #define	SYS_thr_new	455
 #define	SYS_sigqueue	456
 #define	SYS_kmq_open	457
 #define	SYS_kmq_setattr	458
 #define	SYS_kmq_timedreceive	459
 #define	SYS_kmq_timedsend	460
 #define	SYS_kmq_notify	461
 #define	SYS_kmq_unlink	462
 #define	SYS_abort2	463
 #define	SYS_thr_set_name	464
 #define	SYS_aio_fsync	465
 #define	SYS_rtprio_thread	466
 #define	SYS_sctp_peeloff	471
 #define	SYS_sctp_generic_sendmsg	472
 #define	SYS_sctp_generic_sendmsg_iov	473
 #define	SYS_sctp_generic_recvmsg	474
 #define	SYS_pread	475
 #define	SYS_pwrite	476
 #define	SYS_mmap	477
 #define	SYS_lseek	478
 #define	SYS_truncate	479
 #define	SYS_ftruncate	480
 #define	SYS_thr_kill2	481
 #define	SYS_shm_open	482
 #define	SYS_shm_unlink	483
 #define	SYS_cpuset	484
 #define	SYS_cpuset_setid	485
 #define	SYS_cpuset_getid	486
 #define	SYS_cpuset_getaffinity	487
 #define	SYS_cpuset_setaffinity	488
 #define	SYS_faccessat	489
 #define	SYS_fchmodat	490
 #define	SYS_fchownat	491
 #define	SYS_fexecve	492
 #define	SYS_fstatat	493
 #define	SYS_futimesat	494
 #define	SYS_linkat	495
 #define	SYS_mkdirat	496
 #define	SYS_mkfifoat	497
 #define	SYS_mknodat	498
 #define	SYS_openat	499
 #define	SYS_readlinkat	500
 #define	SYS_renameat	501
 #define	SYS_symlinkat	502
 #define	SYS_unlinkat	503
 #define	SYS_MAXSYSCALL	504
Index: head/sys/sys/syscall.mk
===================================================================
--- head/sys/sys/syscall.mk	(revision 178887)
+++ head/sys/sys/syscall.mk	(revision 178888)
@@ -1,369 +1,370 @@
 # FreeBSD system call names.
 # DO NOT EDIT-- this file is automatically generated.
 # $FreeBSD$
 # created from FreeBSD: src/sys/kern/syscalls.master,v 1.242 2008/03/31 12:06:55 kib Exp 
 MIASM =  \
 	syscall.o \
 	exit.o \
 	fork.o \
 	read.o \
 	write.o \
 	open.o \
 	close.o \
 	wait4.o \
 	link.o \
 	unlink.o \
 	chdir.o \
 	fchdir.o \
 	mknod.o \
 	chmod.o \
 	chown.o \
 	break.o \
 	freebsd4_getfsstat.o \
 	getpid.o \
 	mount.o \
 	unmount.o \
 	setuid.o \
 	getuid.o \
 	geteuid.o \
 	ptrace.o \
 	recvmsg.o \
 	sendmsg.o \
 	recvfrom.o \
 	accept.o \
 	getpeername.o \
 	getsockname.o \
 	access.o \
 	chflags.o \
 	fchflags.o \
 	sync.o \
 	kill.o \
 	getppid.o \
 	dup.o \
 	pipe.o \
 	getegid.o \
 	profil.o \
 	ktrace.o \
 	getgid.o \
 	getlogin.o \
 	setlogin.o \
 	acct.o \
 	sigaltstack.o \
 	ioctl.o \
 	reboot.o \
 	revoke.o \
 	symlink.o \
 	readlink.o \
 	execve.o \
 	umask.o \
 	chroot.o \
 	msync.o \
 	vfork.o \
 	sbrk.o \
 	sstk.o \
 	vadvise.o \
 	munmap.o \
 	mprotect.o \
 	madvise.o \
 	mincore.o \
 	getgroups.o \
 	setgroups.o \
 	getpgrp.o \
 	setpgid.o \
 	setitimer.o \
 	swapon.o \
 	getitimer.o \
 	getdtablesize.o \
 	dup2.o \
 	fcntl.o \
 	select.o \
 	fsync.o \
 	setpriority.o \
 	socket.o \
 	connect.o \
 	getpriority.o \
 	bind.o \
 	setsockopt.o \
 	listen.o \
 	gettimeofday.o \
 	getrusage.o \
 	getsockopt.o \
 	readv.o \
 	writev.o \
 	settimeofday.o \
 	fchown.o \
 	fchmod.o \
 	setreuid.o \
 	setregid.o \
 	rename.o \
 	flock.o \
 	mkfifo.o \
 	sendto.o \
 	shutdown.o \
 	socketpair.o \
 	mkdir.o \
 	rmdir.o \
 	utimes.o \
 	adjtime.o \
 	setsid.o \
 	quotactl.o \
 	nlm_syscall.o \
 	nfssvc.o \
 	freebsd4_statfs.o \
 	freebsd4_fstatfs.o \
 	lgetfh.o \
 	getfh.o \
 	getdomainname.o \
 	setdomainname.o \
 	uname.o \
 	sysarch.o \
 	rtprio.o \
 	semsys.o \
 	msgsys.o \
 	shmsys.o \
 	freebsd6_pread.o \
 	freebsd6_pwrite.o \
+	setfib.o \
 	ntp_adjtime.o \
 	setgid.o \
 	setegid.o \
 	seteuid.o \
 	stat.o \
 	fstat.o \
 	lstat.o \
 	pathconf.o \
 	fpathconf.o \
 	getrlimit.o \
 	setrlimit.o \
 	getdirentries.o \
 	freebsd6_mmap.o \
 	__syscall.o \
 	freebsd6_lseek.o \
 	freebsd6_truncate.o \
 	freebsd6_ftruncate.o \
 	__sysctl.o \
 	mlock.o \
 	munlock.o \
 	undelete.o \
 	futimes.o \
 	getpgid.o \
 	poll.o \
 	__semctl.o \
 	semget.o \
 	semop.o \
 	msgctl.o \
 	msgget.o \
 	msgsnd.o \
 	msgrcv.o \
 	shmat.o \
 	shmctl.o \
 	shmdt.o \
 	shmget.o \
 	clock_gettime.o \
 	clock_settime.o \
 	clock_getres.o \
 	ktimer_create.o \
 	ktimer_delete.o \
 	ktimer_settime.o \
 	ktimer_gettime.o \
 	ktimer_getoverrun.o \
 	nanosleep.o \
 	ntp_gettime.o \
 	minherit.o \
 	rfork.o \
 	openbsd_poll.o \
 	issetugid.o \
 	lchown.o \
 	aio_read.o \
 	aio_write.o \
 	lio_listio.o \
 	getdents.o \
 	lchmod.o \
 	netbsd_lchown.o \
 	lutimes.o \
 	netbsd_msync.o \
 	nstat.o \
 	nfstat.o \
 	nlstat.o \
 	preadv.o \
 	pwritev.o \
 	freebsd4_fhstatfs.o \
 	fhopen.o \
 	fhstat.o \
 	modnext.o \
 	modstat.o \
 	modfnext.o \
 	modfind.o \
 	kldload.o \
 	kldunload.o \
 	kldfind.o \
 	kldnext.o \
 	kldstat.o \
 	kldfirstmod.o \
 	getsid.o \
 	setresuid.o \
 	setresgid.o \
 	aio_return.o \
 	aio_suspend.o \
 	aio_cancel.o \
 	aio_error.o \
 	oaio_read.o \
 	oaio_write.o \
 	olio_listio.o \
 	yield.o \
 	mlockall.o \
 	munlockall.o \
 	__getcwd.o \
 	sched_setparam.o \
 	sched_getparam.o \
 	sched_setscheduler.o \
 	sched_getscheduler.o \
 	sched_yield.o \
 	sched_get_priority_max.o \
 	sched_get_priority_min.o \
 	sched_rr_get_interval.o \
 	utrace.o \
 	freebsd4_sendfile.o \
 	kldsym.o \
 	jail.o \
 	sigprocmask.o \
 	sigsuspend.o \
 	freebsd4_sigaction.o \
 	sigpending.o \
 	freebsd4_sigreturn.o \
 	sigtimedwait.o \
 	sigwaitinfo.o \
 	__acl_get_file.o \
 	__acl_set_file.o \
 	__acl_get_fd.o \
 	__acl_set_fd.o \
 	__acl_delete_file.o \
 	__acl_delete_fd.o \
 	__acl_aclcheck_file.o \
 	__acl_aclcheck_fd.o \
 	extattrctl.o \
 	extattr_set_file.o \
 	extattr_get_file.o \
 	extattr_delete_file.o \
 	aio_waitcomplete.o \
 	getresuid.o \
 	getresgid.o \
 	kqueue.o \
 	kevent.o \
 	extattr_set_fd.o \
 	extattr_get_fd.o \
 	extattr_delete_fd.o \
 	__setugid.o \
 	nfsclnt.o \
 	eaccess.o \
 	nmount.o \
 	__mac_get_proc.o \
 	__mac_set_proc.o \
 	__mac_get_fd.o \
 	__mac_get_file.o \
 	__mac_set_fd.o \
 	__mac_set_file.o \
 	kenv.o \
 	lchflags.o \
 	uuidgen.o \
 	sendfile.o \
 	mac_syscall.o \
 	getfsstat.o \
 	statfs.o \
 	fstatfs.o \
 	fhstatfs.o \
 	ksem_close.o \
 	ksem_post.o \
 	ksem_wait.o \
 	ksem_trywait.o \
 	ksem_init.o \
 	ksem_open.o \
 	ksem_unlink.o \
 	ksem_getvalue.o \
 	ksem_destroy.o \
 	__mac_get_pid.o \
 	__mac_get_link.o \
 	__mac_set_link.o \
 	extattr_set_link.o \
 	extattr_get_link.o \
 	extattr_delete_link.o \
 	__mac_execve.o \
 	sigaction.o \
 	sigreturn.o \
 	getcontext.o \
 	setcontext.o \
 	swapcontext.o \
 	swapoff.o \
 	__acl_get_link.o \
 	__acl_set_link.o \
 	__acl_delete_link.o \
 	__acl_aclcheck_link.o \
 	sigwait.o \
 	thr_create.o \
 	thr_exit.o \
 	thr_self.o \
 	thr_kill.o \
 	_umtx_lock.o \
 	_umtx_unlock.o \
 	jail_attach.o \
 	extattr_list_fd.o \
 	extattr_list_file.o \
 	extattr_list_link.o \
 	ksem_timedwait.o \
 	thr_suspend.o \
 	thr_wake.o \
 	kldunloadf.o \
 	audit.o \
 	auditon.o \
 	getauid.o \
 	setauid.o \
 	getaudit.o \
 	setaudit.o \
 	getaudit_addr.o \
 	setaudit_addr.o \
 	auditctl.o \
 	_umtx_op.o \
 	thr_new.o \
 	sigqueue.o \
 	kmq_open.o \
 	kmq_setattr.o \
 	kmq_timedreceive.o \
 	kmq_timedsend.o \
 	kmq_notify.o \
 	kmq_unlink.o \
 	abort2.o \
 	thr_set_name.o \
 	aio_fsync.o \
 	rtprio_thread.o \
 	sctp_peeloff.o \
 	sctp_generic_sendmsg.o \
 	sctp_generic_sendmsg_iov.o \
 	sctp_generic_recvmsg.o \
 	pread.o \
 	pwrite.o \
 	mmap.o \
 	lseek.o \
 	truncate.o \
 	ftruncate.o \
 	thr_kill2.o \
 	shm_open.o \
 	shm_unlink.o \
 	cpuset.o \
 	cpuset_setid.o \
 	cpuset_getid.o \
 	cpuset_getaffinity.o \
 	cpuset_setaffinity.o \
 	faccessat.o \
 	fchmodat.o \
 	fchownat.o \
 	fexecve.o \
 	fstatat.o \
 	futimesat.o \
 	linkat.o \
 	mkdirat.o \
 	mkfifoat.o \
 	mknodat.o \
 	openat.o \
 	readlinkat.o \
 	renameat.o \
 	symlinkat.o \
 	unlinkat.o
Index: head/sys/sys/sysproto.h
===================================================================
--- head/sys/sys/sysproto.h	(revision 178887)
+++ head/sys/sys/sysproto.h	(revision 178888)
@@ -1,2572 +1,2577 @@
 /*
  * System call prototypes.
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
  * created from FreeBSD: src/sys/kern/syscalls.master,v 1.242 2008/03/31 12:06:55 kib Exp 
  */
 
 #ifndef _SYS_SYSPROTO_H_
 #define	_SYS_SYSPROTO_H_
 
 #include <sys/signal.h>
 #include <sys/acl.h>
 #include <sys/cpuset.h>
 #include <sys/_semaphore.h>
 #include <sys/ucontext.h>
 
 #include <bsm/audit_kevents.h>
 
 struct proc;
 
 struct thread;
 
 #define	PAD_(t)	(sizeof(register_t) <= sizeof(t) ? \
 		0 : sizeof(register_t) - sizeof(t))
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 #define	PADL_(t)	0
 #define	PADR_(t)	PAD_(t)
 #else
 #define	PADL_(t)	PAD_(t)
 #define	PADR_(t)	0
 #endif
 
 struct nosys_args {
 	register_t dummy;
 };
 struct sys_exit_args {
 	char rval_l_[PADL_(int)]; int rval; char rval_r_[PADR_(int)];
 };
 struct fork_args {
 	register_t dummy;
 };
 struct read_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(void *)]; void * buf; char buf_r_[PADR_(void *)];
 	char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
 };
 struct write_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)];
 	char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
 };
 struct open_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 };
 struct close_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 };
 struct wait_args {
 	char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)];
 	char status_l_[PADL_(int *)]; int * status; char status_r_[PADR_(int *)];
 	char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)];
 	char rusage_l_[PADL_(struct rusage *)]; struct rusage * rusage; char rusage_r_[PADR_(struct rusage *)];
 };
 struct link_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char link_l_[PADL_(char *)]; char * link; char link_r_[PADR_(char *)];
 };
 struct unlink_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 };
 struct chdir_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 };
 struct fchdir_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 };
 struct mknod_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 	char dev_l_[PADL_(int)]; int dev; char dev_r_[PADR_(int)];
 };
 struct chmod_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 };
 struct chown_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)];
 	char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)];
 };
 struct obreak_args {
 	char nsize_l_[PADL_(char *)]; char * nsize; char nsize_r_[PADR_(char *)];
 };
 struct getpid_args {
 	register_t dummy;
 };
 struct mount_args {
 	char type_l_[PADL_(char *)]; char * type; char type_r_[PADR_(char *)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char data_l_[PADL_(caddr_t)]; caddr_t data; char data_r_[PADR_(caddr_t)];
 };
 struct unmount_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct setuid_args {
 	char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)];
 };
 struct getuid_args {
 	register_t dummy;
 };
 struct geteuid_args {
 	register_t dummy;
 };
 struct ptrace_args {
 	char req_l_[PADL_(int)]; int req; char req_r_[PADR_(int)];
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char addr_l_[PADL_(caddr_t)]; caddr_t addr; char addr_r_[PADR_(caddr_t)];
 	char data_l_[PADL_(int)]; int data; char data_r_[PADR_(int)];
 };
 struct recvmsg_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char msg_l_[PADL_(struct msghdr *)]; struct msghdr * msg; char msg_r_[PADR_(struct msghdr *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct sendmsg_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char msg_l_[PADL_(struct msghdr *)]; struct msghdr * msg; char msg_r_[PADR_(struct msghdr *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct recvfrom_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char from_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict from; char from_r_[PADR_(struct sockaddr *__restrict)];
 	char fromlenaddr_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict fromlenaddr; char fromlenaddr_r_[PADR_(__socklen_t *__restrict)];
 };
 struct accept_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char name_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict name; char name_r_[PADR_(struct sockaddr *__restrict)];
 	char anamelen_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict anamelen; char anamelen_r_[PADR_(__socklen_t *__restrict)];
 };
 struct getpeername_args {
 	char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)];
 	char asa_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict asa; char asa_r_[PADR_(struct sockaddr *__restrict)];
 	char alen_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict alen; char alen_r_[PADR_(__socklen_t *__restrict)];
 };
 struct getsockname_args {
 	char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)];
 	char asa_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict asa; char asa_r_[PADR_(struct sockaddr *__restrict)];
 	char alen_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict alen; char alen_r_[PADR_(__socklen_t *__restrict)];
 };
 struct access_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct chflags_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct fchflags_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct sync_args {
 	register_t dummy;
 };
 struct kill_args {
 	char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)];
 	char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)];
 };
 struct getppid_args {
 	register_t dummy;
 };
 struct dup_args {
 	char fd_l_[PADL_(u_int)]; u_int fd; char fd_r_[PADR_(u_int)];
 };
 struct pipe_args {
 	register_t dummy;
 };
 struct getegid_args {
 	register_t dummy;
 };
 struct profil_args {
 	char samples_l_[PADL_(caddr_t)]; caddr_t samples; char samples_r_[PADR_(caddr_t)];
 	char size_l_[PADL_(size_t)]; size_t size; char size_r_[PADR_(size_t)];
 	char offset_l_[PADL_(size_t)]; size_t offset; char offset_r_[PADR_(size_t)];
 	char scale_l_[PADL_(u_int)]; u_int scale; char scale_r_[PADR_(u_int)];
 };
 struct ktrace_args {
 	char fname_l_[PADL_(const char *)]; const char * fname; char fname_r_[PADR_(const char *)];
 	char ops_l_[PADL_(int)]; int ops; char ops_r_[PADR_(int)];
 	char facs_l_[PADL_(int)]; int facs; char facs_r_[PADR_(int)];
 	char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)];
 };
 struct getgid_args {
 	register_t dummy;
 };
 struct getlogin_args {
 	char namebuf_l_[PADL_(char *)]; char * namebuf; char namebuf_r_[PADR_(char *)];
 	char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)];
 };
 struct setlogin_args {
 	char namebuf_l_[PADL_(char *)]; char * namebuf; char namebuf_r_[PADR_(char *)];
 };
 struct acct_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 };
 struct osigpending_args {
 	register_t dummy;
 };
 struct sigaltstack_args {
 	char ss_l_[PADL_(stack_t *)]; stack_t * ss; char ss_r_[PADR_(stack_t *)];
 	char oss_l_[PADL_(stack_t *)]; stack_t * oss; char oss_r_[PADR_(stack_t *)];
 };
 struct ioctl_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char com_l_[PADL_(u_long)]; u_long com; char com_r_[PADR_(u_long)];
 	char data_l_[PADL_(caddr_t)]; caddr_t data; char data_r_[PADR_(caddr_t)];
 };
 struct reboot_args {
 	char opt_l_[PADL_(int)]; int opt; char opt_r_[PADR_(int)];
 };
 struct revoke_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 };
 struct symlink_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char link_l_[PADL_(char *)]; char * link; char link_r_[PADR_(char *)];
 };
 struct readlink_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
 	char count_l_[PADL_(size_t)]; size_t count; char count_r_[PADR_(size_t)];
 };
 struct execve_args {
 	char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)];
 	char argv_l_[PADL_(char **)]; char ** argv; char argv_r_[PADR_(char **)];
 	char envv_l_[PADL_(char **)]; char ** envv; char envv_r_[PADR_(char **)];
 };
 struct umask_args {
 	char newmask_l_[PADL_(int)]; int newmask; char newmask_r_[PADR_(int)];
 };
 struct chroot_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 };
 struct getpagesize_args {
 	register_t dummy;
 };
 struct msync_args {
 	char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct vfork_args {
 	register_t dummy;
 };
 struct sbrk_args {
 	char incr_l_[PADL_(int)]; int incr; char incr_r_[PADR_(int)];
 };
 struct sstk_args {
 	char incr_l_[PADL_(int)]; int incr; char incr_r_[PADR_(int)];
 };
 struct ovadvise_args {
 	char anom_l_[PADL_(int)]; int anom; char anom_r_[PADR_(int)];
 };
 struct munmap_args {
 	char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 };
 struct mprotect_args {
 	char addr_l_[PADL_(const void *)]; const void * addr; char addr_r_[PADR_(const void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)];
 };
 struct madvise_args {
 	char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char behav_l_[PADL_(int)]; int behav; char behav_r_[PADR_(int)];
 };
 struct mincore_args {
 	char addr_l_[PADL_(const void *)]; const void * addr; char addr_r_[PADR_(const void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char vec_l_[PADL_(char *)]; char * vec; char vec_r_[PADR_(char *)];
 };
 struct getgroups_args {
 	char gidsetsize_l_[PADL_(u_int)]; u_int gidsetsize; char gidsetsize_r_[PADR_(u_int)];
 	char gidset_l_[PADL_(gid_t *)]; gid_t * gidset; char gidset_r_[PADR_(gid_t *)];
 };
 struct setgroups_args {
 	char gidsetsize_l_[PADL_(u_int)]; u_int gidsetsize; char gidsetsize_r_[PADR_(u_int)];
 	char gidset_l_[PADL_(gid_t *)]; gid_t * gidset; char gidset_r_[PADR_(gid_t *)];
 };
 struct getpgrp_args {
 	register_t dummy;
 };
 struct setpgid_args {
 	char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)];
 	char pgid_l_[PADL_(int)]; int pgid; char pgid_r_[PADR_(int)];
 };
 struct setitimer_args {
 	char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)];
 	char itv_l_[PADL_(struct itimerval *)]; struct itimerval * itv; char itv_r_[PADR_(struct itimerval *)];
 	char oitv_l_[PADL_(struct itimerval *)]; struct itimerval * oitv; char oitv_r_[PADR_(struct itimerval *)];
 };
 struct owait_args {
 	register_t dummy;
 };
 struct swapon_args {
 	char name_l_[PADL_(char *)]; char * name; char name_r_[PADR_(char *)];
 };
 struct getitimer_args {
 	char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)];
 	char itv_l_[PADL_(struct itimerval *)]; struct itimerval * itv; char itv_r_[PADR_(struct itimerval *)];
 };
 struct getdtablesize_args {
 	register_t dummy;
 };
 struct dup2_args {
 	char from_l_[PADL_(u_int)]; u_int from; char from_r_[PADR_(u_int)];
 	char to_l_[PADL_(u_int)]; u_int to; char to_r_[PADR_(u_int)];
 };
 struct fcntl_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
 	char arg_l_[PADL_(long)]; long arg; char arg_r_[PADR_(long)];
 };
 struct select_args {
 	char nd_l_[PADL_(int)]; int nd; char nd_r_[PADR_(int)];
 	char in_l_[PADL_(fd_set *)]; fd_set * in; char in_r_[PADR_(fd_set *)];
 	char ou_l_[PADL_(fd_set *)]; fd_set * ou; char ou_r_[PADR_(fd_set *)];
 	char ex_l_[PADL_(fd_set *)]; fd_set * ex; char ex_r_[PADR_(fd_set *)];
 	char tv_l_[PADL_(struct timeval *)]; struct timeval * tv; char tv_r_[PADR_(struct timeval *)];
 };
 struct fsync_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 };
 struct setpriority_args {
 	char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)];
 	char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)];
 	char prio_l_[PADL_(int)]; int prio; char prio_r_[PADR_(int)];
 };
 struct socket_args {
 	char domain_l_[PADL_(int)]; int domain; char domain_r_[PADR_(int)];
 	char type_l_[PADL_(int)]; int type; char type_r_[PADR_(int)];
 	char protocol_l_[PADL_(int)]; int protocol; char protocol_r_[PADR_(int)];
 };
 struct connect_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char name_l_[PADL_(caddr_t)]; caddr_t name; char name_r_[PADR_(caddr_t)];
 	char namelen_l_[PADL_(int)]; int namelen; char namelen_r_[PADR_(int)];
 };
 struct getpriority_args {
 	char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)];
 	char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)];
 };
 struct bind_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char name_l_[PADL_(caddr_t)]; caddr_t name; char name_r_[PADR_(caddr_t)];
 	char namelen_l_[PADL_(int)]; int namelen; char namelen_r_[PADR_(int)];
 };
 struct setsockopt_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char level_l_[PADL_(int)]; int level; char level_r_[PADR_(int)];
 	char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)];
 	char val_l_[PADL_(caddr_t)]; caddr_t val; char val_r_[PADR_(caddr_t)];
 	char valsize_l_[PADL_(int)]; int valsize; char valsize_r_[PADR_(int)];
 };
 struct listen_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char backlog_l_[PADL_(int)]; int backlog; char backlog_r_[PADR_(int)];
 };
 struct gettimeofday_args {
 	char tp_l_[PADL_(struct timeval *)]; struct timeval * tp; char tp_r_[PADR_(struct timeval *)];
 	char tzp_l_[PADL_(struct timezone *)]; struct timezone * tzp; char tzp_r_[PADR_(struct timezone *)];
 };
 struct getrusage_args {
 	char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)];
 	char rusage_l_[PADL_(struct rusage *)]; struct rusage * rusage; char rusage_r_[PADR_(struct rusage *)];
 };
 struct getsockopt_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char level_l_[PADL_(int)]; int level; char level_r_[PADR_(int)];
 	char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)];
 	char val_l_[PADL_(caddr_t)]; caddr_t val; char val_r_[PADR_(caddr_t)];
 	char avalsize_l_[PADL_(int *)]; int * avalsize; char avalsize_r_[PADR_(int *)];
 };
 struct readv_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char iovp_l_[PADL_(struct iovec *)]; struct iovec * iovp; char iovp_r_[PADR_(struct iovec *)];
 	char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)];
 };
 struct writev_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char iovp_l_[PADL_(struct iovec *)]; struct iovec * iovp; char iovp_r_[PADR_(struct iovec *)];
 	char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)];
 };
 struct settimeofday_args {
 	char tv_l_[PADL_(struct timeval *)]; struct timeval * tv; char tv_r_[PADR_(struct timeval *)];
 	char tzp_l_[PADL_(struct timezone *)]; struct timezone * tzp; char tzp_r_[PADR_(struct timezone *)];
 };
 struct fchown_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)];
 	char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)];
 };
 struct fchmod_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 };
 struct setreuid_args {
 	char ruid_l_[PADL_(int)]; int ruid; char ruid_r_[PADR_(int)];
 	char euid_l_[PADL_(int)]; int euid; char euid_r_[PADR_(int)];
 };
 struct setregid_args {
 	char rgid_l_[PADL_(int)]; int rgid; char rgid_r_[PADR_(int)];
 	char egid_l_[PADL_(int)]; int egid; char egid_r_[PADR_(int)];
 };
 struct rename_args {
 	char from_l_[PADL_(char *)]; char * from; char from_r_[PADR_(char *)];
 	char to_l_[PADL_(char *)]; char * to; char to_r_[PADR_(char *)];
 };
 struct flock_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)];
 };
 struct mkfifo_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 };
 struct sendto_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char to_l_[PADL_(caddr_t)]; caddr_t to; char to_r_[PADR_(caddr_t)];
 	char tolen_l_[PADL_(int)]; int tolen; char tolen_r_[PADR_(int)];
 };
 struct shutdown_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)];
 };
 struct socketpair_args {
 	char domain_l_[PADL_(int)]; int domain; char domain_r_[PADR_(int)];
 	char type_l_[PADL_(int)]; int type; char type_r_[PADR_(int)];
 	char protocol_l_[PADL_(int)]; int protocol; char protocol_r_[PADR_(int)];
 	char rsv_l_[PADL_(int *)]; int * rsv; char rsv_r_[PADR_(int *)];
 };
 struct mkdir_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 };
 struct rmdir_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 };
 struct utimes_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char tptr_l_[PADL_(struct timeval *)]; struct timeval * tptr; char tptr_r_[PADR_(struct timeval *)];
 };
 struct adjtime_args {
 	char delta_l_[PADL_(struct timeval *)]; struct timeval * delta; char delta_r_[PADR_(struct timeval *)];
 	char olddelta_l_[PADL_(struct timeval *)]; struct timeval * olddelta; char olddelta_r_[PADR_(struct timeval *)];
 };
 struct ogethostid_args {
 	register_t dummy;
 };
 struct setsid_args {
 	register_t dummy;
 };
 struct quotactl_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
 	char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)];
 	char arg_l_[PADL_(caddr_t)]; caddr_t arg; char arg_r_[PADR_(caddr_t)];
 };
 struct oquota_args {
 	register_t dummy;
 };
 struct nlm_syscall_args {
 	char debug_level_l_[PADL_(int)]; int debug_level; char debug_level_r_[PADR_(int)];
 	char grace_period_l_[PADL_(int)]; int grace_period; char grace_period_r_[PADR_(int)];
 	char addr_count_l_[PADL_(int)]; int addr_count; char addr_count_r_[PADR_(int)];
 	char addrs_l_[PADL_(char **)]; char ** addrs; char addrs_r_[PADR_(char **)];
 };
 struct nfssvc_args {
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 	char argp_l_[PADL_(caddr_t)]; caddr_t argp; char argp_r_[PADR_(caddr_t)];
 };
 struct lgetfh_args {
 	char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)];
 	char fhp_l_[PADL_(struct fhandle *)]; struct fhandle * fhp; char fhp_r_[PADR_(struct fhandle *)];
 };
 struct getfh_args {
 	char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)];
 	char fhp_l_[PADL_(struct fhandle *)]; struct fhandle * fhp; char fhp_r_[PADR_(struct fhandle *)];
 };
 struct getdomainname_args {
 	char domainname_l_[PADL_(char *)]; char * domainname; char domainname_r_[PADR_(char *)];
 	char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)];
 };
 struct setdomainname_args {
 	char domainname_l_[PADL_(char *)]; char * domainname; char domainname_r_[PADR_(char *)];
 	char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)];
 };
 struct uname_args {
 	char name_l_[PADL_(struct utsname *)]; struct utsname * name; char name_r_[PADR_(struct utsname *)];
 };
 struct sysarch_args {
 	char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)];
 	char parms_l_[PADL_(char *)]; char * parms; char parms_r_[PADR_(char *)];
 };
 struct rtprio_args {
 	char function_l_[PADL_(int)]; int function; char function_r_[PADR_(int)];
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char rtp_l_[PADL_(struct rtprio *)]; struct rtprio * rtp; char rtp_r_[PADR_(struct rtprio *)];
 };
 struct semsys_args {
 	char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)];
 	char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)];
 	char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)];
 	char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)];
 	char a5_l_[PADL_(int)]; int a5; char a5_r_[PADR_(int)];
 };
 struct msgsys_args {
 	char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)];
 	char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)];
 	char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)];
 	char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)];
 	char a5_l_[PADL_(int)]; int a5; char a5_r_[PADR_(int)];
 	char a6_l_[PADL_(int)]; int a6; char a6_r_[PADR_(int)];
 };
 struct shmsys_args {
 	char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)];
 	char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)];
 	char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)];
 	char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)];
 };
 struct freebsd6_pread_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(void *)]; void * buf; char buf_r_[PADR_(void *)];
 	char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
 	char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 };
 struct freebsd6_pwrite_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)];
 	char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
 	char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 };
+struct setfib_args {
+	char fibnum_l_[PADL_(int)]; int fibnum; char fibnum_r_[PADR_(int)];
+};
 struct ntp_adjtime_args {
 	char tp_l_[PADL_(struct timex *)]; struct timex * tp; char tp_r_[PADR_(struct timex *)];
 };
 struct setgid_args {
 	char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)];
 };
 struct setegid_args {
 	char egid_l_[PADL_(gid_t)]; gid_t egid; char egid_r_[PADR_(gid_t)];
 };
 struct seteuid_args {
 	char euid_l_[PADL_(uid_t)]; uid_t euid; char euid_r_[PADR_(uid_t)];
 };
 struct stat_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char ub_l_[PADL_(struct stat *)]; struct stat * ub; char ub_r_[PADR_(struct stat *)];
 };
 struct fstat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char sb_l_[PADL_(struct stat *)]; struct stat * sb; char sb_r_[PADR_(struct stat *)];
 };
 struct lstat_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char ub_l_[PADL_(struct stat *)]; struct stat * ub; char ub_r_[PADR_(struct stat *)];
 };
 struct pathconf_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)];
 };
 struct fpathconf_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)];
 };
 struct __getrlimit_args {
 	char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)];
 	char rlp_l_[PADL_(struct rlimit *)]; struct rlimit * rlp; char rlp_r_[PADR_(struct rlimit *)];
 };
 struct __setrlimit_args {
 	char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)];
 	char rlp_l_[PADL_(struct rlimit *)]; struct rlimit * rlp; char rlp_r_[PADR_(struct rlimit *)];
 };
 struct getdirentries_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
 	char count_l_[PADL_(u_int)]; u_int count; char count_r_[PADR_(u_int)];
 	char basep_l_[PADL_(long *)]; long * basep; char basep_r_[PADR_(long *)];
 };
 struct freebsd6_mmap_args {
 	char addr_l_[PADL_(caddr_t)]; caddr_t addr; char addr_r_[PADR_(caddr_t)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
 	char pos_l_[PADL_(off_t)]; off_t pos; char pos_r_[PADR_(off_t)];
 };
 struct freebsd6_lseek_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 	char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)];
 };
 struct freebsd6_truncate_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
 	char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)];
 };
 struct freebsd6_ftruncate_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)];
 	char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)];
 };
 struct sysctl_args {
 	char name_l_[PADL_(int *)]; int * name; char name_r_[PADR_(int *)];
 	char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)];
 	char old_l_[PADL_(void *)]; void * old; char old_r_[PADR_(void *)];
 	char oldlenp_l_[PADL_(size_t *)]; size_t * oldlenp; char oldlenp_r_[PADR_(size_t *)];
 	char new_l_[PADL_(void *)]; void * new; char new_r_[PADR_(void *)];
 	char newlen_l_[PADL_(size_t)]; size_t newlen; char newlen_r_[PADR_(size_t)];
 };
 struct mlock_args {
 	char addr_l_[PADL_(const void *)]; const void * addr; char addr_r_[PADR_(const void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 };
 struct munlock_args {
 	char addr_l_[PADL_(const void *)]; const void * addr; char addr_r_[PADR_(const void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 };
 struct undelete_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 };
 struct futimes_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char tptr_l_[PADL_(struct timeval *)]; struct timeval * tptr; char tptr_r_[PADR_(struct timeval *)];
 };
 struct getpgid_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 };
 struct poll_args {
 	char fds_l_[PADL_(struct pollfd *)]; struct pollfd * fds; char fds_r_[PADR_(struct pollfd *)];
 	char nfds_l_[PADL_(u_int)]; u_int nfds; char nfds_r_[PADR_(u_int)];
 	char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)];
 };
 struct __semctl_args {
 	char semid_l_[PADL_(int)]; int semid; char semid_r_[PADR_(int)];
 	char semnum_l_[PADL_(int)]; int semnum; char semnum_r_[PADR_(int)];
 	char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
 	char arg_l_[PADL_(union semun *)]; union semun * arg; char arg_r_[PADR_(union semun *)];
 };
 struct semget_args {
 	char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)];
 	char nsems_l_[PADL_(int)]; int nsems; char nsems_r_[PADR_(int)];
 	char semflg_l_[PADL_(int)]; int semflg; char semflg_r_[PADR_(int)];
 };
 struct semop_args {
 	char semid_l_[PADL_(int)]; int semid; char semid_r_[PADR_(int)];
 	char sops_l_[PADL_(struct sembuf *)]; struct sembuf * sops; char sops_r_[PADR_(struct sembuf *)];
 	char nsops_l_[PADL_(size_t)]; size_t nsops; char nsops_r_[PADR_(size_t)];
 };
 struct msgctl_args {
 	char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)];
 	char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
 	char buf_l_[PADL_(struct msqid_ds *)]; struct msqid_ds * buf; char buf_r_[PADR_(struct msqid_ds *)];
 };
 struct msgget_args {
 	char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)];
 	char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)];
 };
 struct msgsnd_args {
 	char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)];
 	char msgp_l_[PADL_(const void *)]; const void * msgp; char msgp_r_[PADR_(const void *)];
 	char msgsz_l_[PADL_(size_t)]; size_t msgsz; char msgsz_r_[PADR_(size_t)];
 	char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)];
 };
 struct msgrcv_args {
 	char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)];
 	char msgp_l_[PADL_(void *)]; void * msgp; char msgp_r_[PADR_(void *)];
 	char msgsz_l_[PADL_(size_t)]; size_t msgsz; char msgsz_r_[PADR_(size_t)];
 	char msgtyp_l_[PADL_(long)]; long msgtyp; char msgtyp_r_[PADR_(long)];
 	char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)];
 };
 struct shmat_args {
 	char shmid_l_[PADL_(int)]; int shmid; char shmid_r_[PADR_(int)];
 	char shmaddr_l_[PADL_(const void *)]; const void * shmaddr; char shmaddr_r_[PADR_(const void *)];
 	char shmflg_l_[PADL_(int)]; int shmflg; char shmflg_r_[PADR_(int)];
 };
 struct shmctl_args {
 	char shmid_l_[PADL_(int)]; int shmid; char shmid_r_[PADR_(int)];
 	char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
 	char buf_l_[PADL_(struct shmid_ds *)]; struct shmid_ds * buf; char buf_r_[PADR_(struct shmid_ds *)];
 };
 struct shmdt_args {
 	char shmaddr_l_[PADL_(const void *)]; const void * shmaddr; char shmaddr_r_[PADR_(const void *)];
 };
 struct shmget_args {
 	char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)];
 	char size_l_[PADL_(size_t)]; size_t size; char size_r_[PADR_(size_t)];
 	char shmflg_l_[PADL_(int)]; int shmflg; char shmflg_r_[PADR_(int)];
 };
 struct clock_gettime_args {
 	char clock_id_l_[PADL_(clockid_t)]; clockid_t clock_id; char clock_id_r_[PADR_(clockid_t)];
 	char tp_l_[PADL_(struct timespec *)]; struct timespec * tp; char tp_r_[PADR_(struct timespec *)];
 };
 struct clock_settime_args {
 	char clock_id_l_[PADL_(clockid_t)]; clockid_t clock_id; char clock_id_r_[PADR_(clockid_t)];
 	char tp_l_[PADL_(const struct timespec *)]; const struct timespec * tp; char tp_r_[PADR_(const struct timespec *)];
 };
 struct clock_getres_args {
 	char clock_id_l_[PADL_(clockid_t)]; clockid_t clock_id; char clock_id_r_[PADR_(clockid_t)];
 	char tp_l_[PADL_(struct timespec *)]; struct timespec * tp; char tp_r_[PADR_(struct timespec *)];
 };
 struct ktimer_create_args {
 	char clock_id_l_[PADL_(clockid_t)]; clockid_t clock_id; char clock_id_r_[PADR_(clockid_t)];
 	char evp_l_[PADL_(struct sigevent *)]; struct sigevent * evp; char evp_r_[PADR_(struct sigevent *)];
 	char timerid_l_[PADL_(int *)]; int * timerid; char timerid_r_[PADR_(int *)];
 };
 struct ktimer_delete_args {
 	char timerid_l_[PADL_(int)]; int timerid; char timerid_r_[PADR_(int)];
 };
 struct ktimer_settime_args {
 	char timerid_l_[PADL_(int)]; int timerid; char timerid_r_[PADR_(int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char value_l_[PADL_(const struct itimerspec *)]; const struct itimerspec * value; char value_r_[PADR_(const struct itimerspec *)];
 	char ovalue_l_[PADL_(struct itimerspec *)]; struct itimerspec * ovalue; char ovalue_r_[PADR_(struct itimerspec *)];
 };
 struct ktimer_gettime_args {
 	char timerid_l_[PADL_(int)]; int timerid; char timerid_r_[PADR_(int)];
 	char value_l_[PADL_(struct itimerspec *)]; struct itimerspec * value; char value_r_[PADR_(struct itimerspec *)];
 };
 struct ktimer_getoverrun_args {
 	char timerid_l_[PADL_(int)]; int timerid; char timerid_r_[PADR_(int)];
 };
 struct nanosleep_args {
 	char rqtp_l_[PADL_(const struct timespec *)]; const struct timespec * rqtp; char rqtp_r_[PADR_(const struct timespec *)];
 	char rmtp_l_[PADL_(struct timespec *)]; struct timespec * rmtp; char rmtp_r_[PADR_(struct timespec *)];
 };
 struct ntp_gettime_args {
 	char ntvp_l_[PADL_(struct ntptimeval *)]; struct ntptimeval * ntvp; char ntvp_r_[PADR_(struct ntptimeval *)];
 };
 struct minherit_args {
 	char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char inherit_l_[PADL_(int)]; int inherit; char inherit_r_[PADR_(int)];
 };
 struct rfork_args {
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct openbsd_poll_args {
 	char fds_l_[PADL_(struct pollfd *)]; struct pollfd * fds; char fds_r_[PADR_(struct pollfd *)];
 	char nfds_l_[PADL_(u_int)]; u_int nfds; char nfds_r_[PADR_(u_int)];
 	char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)];
 };
 struct issetugid_args {
 	register_t dummy;
 };
 struct lchown_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)];
 	char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)];
 };
 struct aio_read_args {
 	char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
 };
 struct aio_write_args {
 	char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
 };
 struct lio_listio_args {
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 	char acb_list_l_[PADL_(struct aiocb *const *)]; struct aiocb *const * acb_list; char acb_list_r_[PADR_(struct aiocb *const *)];
 	char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)];
 	char sig_l_[PADL_(struct sigevent *)]; struct sigevent * sig; char sig_r_[PADR_(struct sigevent *)];
 };
 struct getdents_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
 	char count_l_[PADL_(size_t)]; size_t count; char count_r_[PADR_(size_t)];
 };
 struct lchmod_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 };
 struct lutimes_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char tptr_l_[PADL_(struct timeval *)]; struct timeval * tptr; char tptr_r_[PADR_(struct timeval *)];
 };
 struct nstat_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char ub_l_[PADL_(struct nstat *)]; struct nstat * ub; char ub_r_[PADR_(struct nstat *)];
 };
 struct nfstat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char sb_l_[PADL_(struct nstat *)]; struct nstat * sb; char sb_r_[PADR_(struct nstat *)];
 };
 struct nlstat_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char ub_l_[PADL_(struct nstat *)]; struct nstat * ub; char ub_r_[PADR_(struct nstat *)];
 };
 struct preadv_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char iovp_l_[PADL_(struct iovec *)]; struct iovec * iovp; char iovp_r_[PADR_(struct iovec *)];
 	char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 };
 struct pwritev_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char iovp_l_[PADL_(struct iovec *)]; struct iovec * iovp; char iovp_r_[PADR_(struct iovec *)];
 	char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 };
 struct fhopen_args {
 	char u_fhp_l_[PADL_(const struct fhandle *)]; const struct fhandle * u_fhp; char u_fhp_r_[PADR_(const struct fhandle *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct fhstat_args {
 	char u_fhp_l_[PADL_(const struct fhandle *)]; const struct fhandle * u_fhp; char u_fhp_r_[PADR_(const struct fhandle *)];
 	char sb_l_[PADL_(struct stat *)]; struct stat * sb; char sb_r_[PADR_(struct stat *)];
 };
 struct modnext_args {
 	char modid_l_[PADL_(int)]; int modid; char modid_r_[PADR_(int)];
 };
 struct modstat_args {
 	char modid_l_[PADL_(int)]; int modid; char modid_r_[PADR_(int)];
 	char stat_l_[PADL_(struct module_stat *)]; struct module_stat * stat; char stat_r_[PADR_(struct module_stat *)];
 };
 struct modfnext_args {
 	char modid_l_[PADL_(int)]; int modid; char modid_r_[PADR_(int)];
 };
 struct modfind_args {
 	char name_l_[PADL_(const char *)]; const char * name; char name_r_[PADR_(const char *)];
 };
 struct kldload_args {
 	char file_l_[PADL_(const char *)]; const char * file; char file_r_[PADR_(const char *)];
 };
 struct kldunload_args {
 	char fileid_l_[PADL_(int)]; int fileid; char fileid_r_[PADR_(int)];
 };
 struct kldfind_args {
 	char file_l_[PADL_(const char *)]; const char * file; char file_r_[PADR_(const char *)];
 };
 struct kldnext_args {
 	char fileid_l_[PADL_(int)]; int fileid; char fileid_r_[PADR_(int)];
 };
 struct kldstat_args {
 	char fileid_l_[PADL_(int)]; int fileid; char fileid_r_[PADR_(int)];
 	char stat_l_[PADL_(struct kld_file_stat *)]; struct kld_file_stat * stat; char stat_r_[PADR_(struct kld_file_stat *)];
 };
 struct kldfirstmod_args {
 	char fileid_l_[PADL_(int)]; int fileid; char fileid_r_[PADR_(int)];
 };
 struct getsid_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 };
 struct setresuid_args {
 	char ruid_l_[PADL_(uid_t)]; uid_t ruid; char ruid_r_[PADR_(uid_t)];
 	char euid_l_[PADL_(uid_t)]; uid_t euid; char euid_r_[PADR_(uid_t)];
 	char suid_l_[PADL_(uid_t)]; uid_t suid; char suid_r_[PADR_(uid_t)];
 };
 struct setresgid_args {
 	char rgid_l_[PADL_(gid_t)]; gid_t rgid; char rgid_r_[PADR_(gid_t)];
 	char egid_l_[PADL_(gid_t)]; gid_t egid; char egid_r_[PADR_(gid_t)];
 	char sgid_l_[PADL_(gid_t)]; gid_t sgid; char sgid_r_[PADR_(gid_t)];
 };
 struct aio_return_args {
 	char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
 };
 struct aio_suspend_args {
 	char aiocbp_l_[PADL_(struct aiocb *const *)]; struct aiocb *const * aiocbp; char aiocbp_r_[PADR_(struct aiocb *const *)];
 	char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)];
 	char timeout_l_[PADL_(const struct timespec *)]; const struct timespec * timeout; char timeout_r_[PADR_(const struct timespec *)];
 };
 struct aio_cancel_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
 };
 struct aio_error_args {
 	char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
 };
 struct oaio_read_args {
 	char aiocbp_l_[PADL_(struct oaiocb *)]; struct oaiocb * aiocbp; char aiocbp_r_[PADR_(struct oaiocb *)];
 };
 struct oaio_write_args {
 	char aiocbp_l_[PADL_(struct oaiocb *)]; struct oaiocb * aiocbp; char aiocbp_r_[PADR_(struct oaiocb *)];
 };
 struct olio_listio_args {
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 	char acb_list_l_[PADL_(struct oaiocb *const *)]; struct oaiocb *const * acb_list; char acb_list_r_[PADR_(struct oaiocb *const *)];
 	char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)];
 	char sig_l_[PADL_(struct osigevent *)]; struct osigevent * sig; char sig_r_[PADR_(struct osigevent *)];
 };
 struct yield_args {
 	register_t dummy;
 };
 struct mlockall_args {
 	char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)];
 };
 struct munlockall_args {
 	register_t dummy;
 };
 struct __getcwd_args {
 	char buf_l_[PADL_(u_char *)]; u_char * buf; char buf_r_[PADR_(u_char *)];
 	char buflen_l_[PADL_(u_int)]; u_int buflen; char buflen_r_[PADR_(u_int)];
 };
 struct sched_setparam_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char param_l_[PADL_(const struct sched_param *)]; const struct sched_param * param; char param_r_[PADR_(const struct sched_param *)];
 };
 struct sched_getparam_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char param_l_[PADL_(struct sched_param *)]; struct sched_param * param; char param_r_[PADR_(struct sched_param *)];
 };
 struct sched_setscheduler_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char policy_l_[PADL_(int)]; int policy; char policy_r_[PADR_(int)];
 	char param_l_[PADL_(const struct sched_param *)]; const struct sched_param * param; char param_r_[PADR_(const struct sched_param *)];
 };
 struct sched_getscheduler_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 };
 struct sched_yield_args {
 	register_t dummy;
 };
 struct sched_get_priority_max_args {
 	char policy_l_[PADL_(int)]; int policy; char policy_r_[PADR_(int)];
 };
 struct sched_get_priority_min_args {
 	char policy_l_[PADL_(int)]; int policy; char policy_r_[PADR_(int)];
 };
 struct sched_rr_get_interval_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char interval_l_[PADL_(struct timespec *)]; struct timespec * interval; char interval_r_[PADR_(struct timespec *)];
 };
 struct utrace_args {
 	char addr_l_[PADL_(const void *)]; const void * addr; char addr_r_[PADR_(const void *)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 };
 struct kldsym_args {
 	char fileid_l_[PADL_(int)]; int fileid; char fileid_r_[PADR_(int)];
 	char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 };
 struct jail_args {
 	char jail_l_[PADL_(struct jail *)]; struct jail * jail; char jail_r_[PADR_(struct jail *)];
 };
 struct sigprocmask_args {
 	char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)];
 	char set_l_[PADL_(const sigset_t *)]; const sigset_t * set; char set_r_[PADR_(const sigset_t *)];
 	char oset_l_[PADL_(sigset_t *)]; sigset_t * oset; char oset_r_[PADR_(sigset_t *)];
 };
 struct sigsuspend_args {
 	char sigmask_l_[PADL_(const sigset_t *)]; const sigset_t * sigmask; char sigmask_r_[PADR_(const sigset_t *)];
 };
 struct sigpending_args {
 	char set_l_[PADL_(sigset_t *)]; sigset_t * set; char set_r_[PADR_(sigset_t *)];
 };
 struct sigtimedwait_args {
 	char set_l_[PADL_(const sigset_t *)]; const sigset_t * set; char set_r_[PADR_(const sigset_t *)];
 	char info_l_[PADL_(siginfo_t *)]; siginfo_t * info; char info_r_[PADR_(siginfo_t *)];
 	char timeout_l_[PADL_(const struct timespec *)]; const struct timespec * timeout; char timeout_r_[PADR_(const struct timespec *)];
 };
 struct sigwaitinfo_args {
 	char set_l_[PADL_(const sigset_t *)]; const sigset_t * set; char set_r_[PADR_(const sigset_t *)];
 	char info_l_[PADL_(siginfo_t *)]; siginfo_t * info; char info_r_[PADR_(siginfo_t *)];
 };
 struct __acl_get_file_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct __acl_set_file_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct __acl_get_fd_args {
 	char filedes_l_[PADL_(int)]; int filedes; char filedes_r_[PADR_(int)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct __acl_set_fd_args {
 	char filedes_l_[PADL_(int)]; int filedes; char filedes_r_[PADR_(int)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct __acl_delete_file_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 };
 struct __acl_delete_fd_args {
 	char filedes_l_[PADL_(int)]; int filedes; char filedes_r_[PADR_(int)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 };
 struct __acl_aclcheck_file_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct __acl_aclcheck_fd_args {
 	char filedes_l_[PADL_(int)]; int filedes; char filedes_r_[PADR_(int)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct extattrctl_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
 	char filename_l_[PADL_(const char *)]; const char * filename; char filename_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 };
 struct extattr_set_file_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct extattr_get_file_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct extattr_delete_file_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 };
 struct aio_waitcomplete_args {
 	char aiocbp_l_[PADL_(struct aiocb **)]; struct aiocb ** aiocbp; char aiocbp_r_[PADR_(struct aiocb **)];
 	char timeout_l_[PADL_(struct timespec *)]; struct timespec * timeout; char timeout_r_[PADR_(struct timespec *)];
 };
 struct getresuid_args {
 	char ruid_l_[PADL_(uid_t *)]; uid_t * ruid; char ruid_r_[PADR_(uid_t *)];
 	char euid_l_[PADL_(uid_t *)]; uid_t * euid; char euid_r_[PADR_(uid_t *)];
 	char suid_l_[PADL_(uid_t *)]; uid_t * suid; char suid_r_[PADR_(uid_t *)];
 };
 struct getresgid_args {
 	char rgid_l_[PADL_(gid_t *)]; gid_t * rgid; char rgid_r_[PADR_(gid_t *)];
 	char egid_l_[PADL_(gid_t *)]; gid_t * egid; char egid_r_[PADR_(gid_t *)];
 	char sgid_l_[PADL_(gid_t *)]; gid_t * sgid; char sgid_r_[PADR_(gid_t *)];
 };
 struct kqueue_args {
 	register_t dummy;
 };
 struct kevent_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char changelist_l_[PADL_(struct kevent *)]; struct kevent * changelist; char changelist_r_[PADR_(struct kevent *)];
 	char nchanges_l_[PADL_(int)]; int nchanges; char nchanges_r_[PADR_(int)];
 	char eventlist_l_[PADL_(struct kevent *)]; struct kevent * eventlist; char eventlist_r_[PADR_(struct kevent *)];
 	char nevents_l_[PADL_(int)]; int nevents; char nevents_r_[PADR_(int)];
 	char timeout_l_[PADL_(const struct timespec *)]; const struct timespec * timeout; char timeout_r_[PADR_(const struct timespec *)];
 };
 struct extattr_set_fd_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct extattr_get_fd_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct extattr_delete_fd_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 };
 struct __setugid_args {
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 };
 struct nfsclnt_args {
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 	char argp_l_[PADL_(caddr_t)]; caddr_t argp; char argp_r_[PADR_(caddr_t)];
 };
 struct eaccess_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct nmount_args {
 	char iovp_l_[PADL_(struct iovec *)]; struct iovec * iovp; char iovp_r_[PADR_(struct iovec *)];
 	char iovcnt_l_[PADL_(unsigned int)]; unsigned int iovcnt; char iovcnt_r_[PADR_(unsigned int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct __mac_get_proc_args {
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct __mac_set_proc_args {
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct __mac_get_fd_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct __mac_get_file_args {
 	char path_p_l_[PADL_(const char *)]; const char * path_p; char path_p_r_[PADR_(const char *)];
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct __mac_set_fd_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct __mac_set_file_args {
 	char path_p_l_[PADL_(const char *)]; const char * path_p; char path_p_r_[PADR_(const char *)];
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct kenv_args {
 	char what_l_[PADL_(int)]; int what; char what_r_[PADR_(int)];
 	char name_l_[PADL_(const char *)]; const char * name; char name_r_[PADR_(const char *)];
 	char value_l_[PADL_(char *)]; char * value; char value_r_[PADR_(char *)];
 	char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)];
 };
 struct lchflags_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct uuidgen_args {
 	char store_l_[PADL_(struct uuid *)]; struct uuid * store; char store_r_[PADR_(struct uuid *)];
 	char count_l_[PADL_(int)]; int count; char count_r_[PADR_(int)];
 };
 struct sendfile_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 	char hdtr_l_[PADL_(struct sf_hdtr *)]; struct sf_hdtr * hdtr; char hdtr_r_[PADR_(struct sf_hdtr *)];
 	char sbytes_l_[PADL_(off_t *)]; off_t * sbytes; char sbytes_r_[PADR_(off_t *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct mac_syscall_args {
 	char policy_l_[PADL_(const char *)]; const char * policy; char policy_r_[PADR_(const char *)];
 	char call_l_[PADL_(int)]; int call; char call_r_[PADR_(int)];
 	char arg_l_[PADL_(void *)]; void * arg; char arg_r_[PADR_(void *)];
 };
 struct getfsstat_args {
 	char buf_l_[PADL_(struct statfs *)]; struct statfs * buf; char buf_r_[PADR_(struct statfs *)];
 	char bufsize_l_[PADL_(long)]; long bufsize; char bufsize_r_[PADR_(long)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct statfs_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char buf_l_[PADL_(struct statfs *)]; struct statfs * buf; char buf_r_[PADR_(struct statfs *)];
 };
 struct fstatfs_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(struct statfs *)]; struct statfs * buf; char buf_r_[PADR_(struct statfs *)];
 };
 struct fhstatfs_args {
 	char u_fhp_l_[PADL_(const struct fhandle *)]; const struct fhandle * u_fhp; char u_fhp_r_[PADR_(const struct fhandle *)];
 	char buf_l_[PADL_(struct statfs *)]; struct statfs * buf; char buf_r_[PADR_(struct statfs *)];
 };
 struct ksem_close_args {
 	char id_l_[PADL_(semid_t)]; semid_t id; char id_r_[PADR_(semid_t)];
 };
 struct ksem_post_args {
 	char id_l_[PADL_(semid_t)]; semid_t id; char id_r_[PADR_(semid_t)];
 };
 struct ksem_wait_args {
 	char id_l_[PADL_(semid_t)]; semid_t id; char id_r_[PADR_(semid_t)];
 };
 struct ksem_trywait_args {
 	char id_l_[PADL_(semid_t)]; semid_t id; char id_r_[PADR_(semid_t)];
 };
 struct ksem_init_args {
 	char idp_l_[PADL_(semid_t *)]; semid_t * idp; char idp_r_[PADR_(semid_t *)];
 	char value_l_[PADL_(unsigned int)]; unsigned int value; char value_r_[PADR_(unsigned int)];
 };
 struct ksem_open_args {
 	char idp_l_[PADL_(semid_t *)]; semid_t * idp; char idp_r_[PADR_(semid_t *)];
 	char name_l_[PADL_(const char *)]; const char * name; char name_r_[PADR_(const char *)];
 	char oflag_l_[PADL_(int)]; int oflag; char oflag_r_[PADR_(int)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 	char value_l_[PADL_(unsigned int)]; unsigned int value; char value_r_[PADR_(unsigned int)];
 };
 struct ksem_unlink_args {
 	char name_l_[PADL_(const char *)]; const char * name; char name_r_[PADR_(const char *)];
 };
 struct ksem_getvalue_args {
 	char id_l_[PADL_(semid_t)]; semid_t id; char id_r_[PADR_(semid_t)];
 	char val_l_[PADL_(int *)]; int * val; char val_r_[PADR_(int *)];
 };
 struct ksem_destroy_args {
 	char id_l_[PADL_(semid_t)]; semid_t id; char id_r_[PADR_(semid_t)];
 };
 struct __mac_get_pid_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct __mac_get_link_args {
 	char path_p_l_[PADL_(const char *)]; const char * path_p; char path_p_r_[PADR_(const char *)];
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct __mac_set_link_args {
 	char path_p_l_[PADL_(const char *)]; const char * path_p; char path_p_r_[PADR_(const char *)];
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct extattr_set_link_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct extattr_get_link_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct extattr_delete_link_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)];
 };
 struct __mac_execve_args {
 	char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)];
 	char argv_l_[PADL_(char **)]; char ** argv; char argv_r_[PADR_(char **)];
 	char envv_l_[PADL_(char **)]; char ** envv; char envv_r_[PADR_(char **)];
 	char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)];
 };
 struct sigaction_args {
 	char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)];
 	char act_l_[PADL_(const struct sigaction *)]; const struct sigaction * act; char act_r_[PADR_(const struct sigaction *)];
 	char oact_l_[PADL_(struct sigaction *)]; struct sigaction * oact; char oact_r_[PADR_(struct sigaction *)];
 };
 struct sigreturn_args {
 	char sigcntxp_l_[PADL_(const struct __ucontext *)]; const struct __ucontext * sigcntxp; char sigcntxp_r_[PADR_(const struct __ucontext *)];
 };
 struct getcontext_args {
 	char ucp_l_[PADL_(struct __ucontext *)]; struct __ucontext * ucp; char ucp_r_[PADR_(struct __ucontext *)];
 };
 struct setcontext_args {
 	char ucp_l_[PADL_(const struct __ucontext *)]; const struct __ucontext * ucp; char ucp_r_[PADR_(const struct __ucontext *)];
 };
 struct swapcontext_args {
 	char oucp_l_[PADL_(struct __ucontext *)]; struct __ucontext * oucp; char oucp_r_[PADR_(struct __ucontext *)];
 	char ucp_l_[PADL_(const struct __ucontext *)]; const struct __ucontext * ucp; char ucp_r_[PADR_(const struct __ucontext *)];
 };
 struct swapoff_args {
 	char name_l_[PADL_(const char *)]; const char * name; char name_r_[PADR_(const char *)];
 };
 struct __acl_get_link_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct __acl_set_link_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct __acl_delete_link_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 };
 struct __acl_aclcheck_link_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
 	char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
 };
 struct sigwait_args {
 	char set_l_[PADL_(const sigset_t *)]; const sigset_t * set; char set_r_[PADR_(const sigset_t *)];
 	char sig_l_[PADL_(int *)]; int * sig; char sig_r_[PADR_(int *)];
 };
 struct thr_create_args {
 	char ctx_l_[PADL_(ucontext_t *)]; ucontext_t * ctx; char ctx_r_[PADR_(ucontext_t *)];
 	char id_l_[PADL_(long *)]; long * id; char id_r_[PADR_(long *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct thr_exit_args {
 	char state_l_[PADL_(long *)]; long * state; char state_r_[PADR_(long *)];
 };
 struct thr_self_args {
 	char id_l_[PADL_(long *)]; long * id; char id_r_[PADR_(long *)];
 };
 struct thr_kill_args {
 	char id_l_[PADL_(long)]; long id; char id_r_[PADR_(long)];
 	char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)];
 };
 struct _umtx_lock_args {
 	char umtx_l_[PADL_(struct umtx *)]; struct umtx * umtx; char umtx_r_[PADR_(struct umtx *)];
 };
 struct _umtx_unlock_args {
 	char umtx_l_[PADL_(struct umtx *)]; struct umtx * umtx; char umtx_r_[PADR_(struct umtx *)];
 };
 struct jail_attach_args {
 	char jid_l_[PADL_(int)]; int jid; char jid_r_[PADR_(int)];
 };
 struct extattr_list_fd_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct extattr_list_file_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct extattr_list_link_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char attrnamespace_l_[PADL_(int)]; int attrnamespace; char attrnamespace_r_[PADR_(int)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 };
 struct ksem_timedwait_args {
 	char id_l_[PADL_(semid_t)]; semid_t id; char id_r_[PADR_(semid_t)];
 	char abstime_l_[PADL_(const struct timespec *)]; const struct timespec * abstime; char abstime_r_[PADR_(const struct timespec *)];
 };
 struct thr_suspend_args {
 	char timeout_l_[PADL_(const struct timespec *)]; const struct timespec * timeout; char timeout_r_[PADR_(const struct timespec *)];
 };
 struct thr_wake_args {
 	char id_l_[PADL_(long)]; long id; char id_r_[PADR_(long)];
 };
 struct kldunloadf_args {
 	char fileid_l_[PADL_(int)]; int fileid; char fileid_r_[PADR_(int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct audit_args {
 	char record_l_[PADL_(const void *)]; const void * record; char record_r_[PADR_(const void *)];
 	char length_l_[PADL_(u_int)]; u_int length; char length_r_[PADR_(u_int)];
 };
 struct auditon_args {
 	char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)];
 	char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)];
 	char length_l_[PADL_(u_int)]; u_int length; char length_r_[PADR_(u_int)];
 };
 struct getauid_args {
 	char auid_l_[PADL_(uid_t *)]; uid_t * auid; char auid_r_[PADR_(uid_t *)];
 };
 struct setauid_args {
 	char auid_l_[PADL_(uid_t *)]; uid_t * auid; char auid_r_[PADR_(uid_t *)];
 };
 struct getaudit_args {
 	char auditinfo_l_[PADL_(struct auditinfo *)]; struct auditinfo * auditinfo; char auditinfo_r_[PADR_(struct auditinfo *)];
 };
 struct setaudit_args {
 	char auditinfo_l_[PADL_(struct auditinfo *)]; struct auditinfo * auditinfo; char auditinfo_r_[PADR_(struct auditinfo *)];
 };
 struct getaudit_addr_args {
 	char auditinfo_addr_l_[PADL_(struct auditinfo_addr *)]; struct auditinfo_addr * auditinfo_addr; char auditinfo_addr_r_[PADR_(struct auditinfo_addr *)];
 	char length_l_[PADL_(u_int)]; u_int length; char length_r_[PADR_(u_int)];
 };
 struct setaudit_addr_args {
 	char auditinfo_addr_l_[PADL_(struct auditinfo_addr *)]; struct auditinfo_addr * auditinfo_addr; char auditinfo_addr_r_[PADR_(struct auditinfo_addr *)];
 	char length_l_[PADL_(u_int)]; u_int length; char length_r_[PADR_(u_int)];
 };
 struct auditctl_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 };
 struct _umtx_op_args {
 	char obj_l_[PADL_(void *)]; void * obj; char obj_r_[PADR_(void *)];
 	char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)];
 	char val_l_[PADL_(u_long)]; u_long val; char val_r_[PADR_(u_long)];
 	char uaddr1_l_[PADL_(void *)]; void * uaddr1; char uaddr1_r_[PADR_(void *)];
 	char uaddr2_l_[PADL_(void *)]; void * uaddr2; char uaddr2_r_[PADR_(void *)];
 };
 struct thr_new_args {
 	char param_l_[PADL_(struct thr_param *)]; struct thr_param * param; char param_r_[PADR_(struct thr_param *)];
 	char param_size_l_[PADL_(int)]; int param_size; char param_size_r_[PADR_(int)];
 };
 struct sigqueue_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)];
 	char value_l_[PADL_(void *)]; void * value; char value_r_[PADR_(void *)];
 };
 struct kmq_open_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 	char attr_l_[PADL_(const struct mq_attr *)]; const struct mq_attr * attr; char attr_r_[PADR_(const struct mq_attr *)];
 };
 struct kmq_setattr_args {
 	char mqd_l_[PADL_(int)]; int mqd; char mqd_r_[PADR_(int)];
 	char attr_l_[PADL_(const struct mq_attr *)]; const struct mq_attr * attr; char attr_r_[PADR_(const struct mq_attr *)];
 	char oattr_l_[PADL_(struct mq_attr *)]; struct mq_attr * oattr; char oattr_r_[PADR_(struct mq_attr *)];
 };
 struct kmq_timedreceive_args {
 	char mqd_l_[PADL_(int)]; int mqd; char mqd_r_[PADR_(int)];
 	char msg_ptr_l_[PADL_(char *)]; char * msg_ptr; char msg_ptr_r_[PADR_(char *)];
 	char msg_len_l_[PADL_(size_t)]; size_t msg_len; char msg_len_r_[PADR_(size_t)];
 	char msg_prio_l_[PADL_(unsigned *)]; unsigned * msg_prio; char msg_prio_r_[PADR_(unsigned *)];
 	char abs_timeout_l_[PADL_(const struct timespec *)]; const struct timespec * abs_timeout; char abs_timeout_r_[PADR_(const struct timespec *)];
 };
 struct kmq_timedsend_args {
 	char mqd_l_[PADL_(int)]; int mqd; char mqd_r_[PADR_(int)];
 	char msg_ptr_l_[PADL_(const char *)]; const char * msg_ptr; char msg_ptr_r_[PADR_(const char *)];
 	char msg_len_l_[PADL_(size_t)]; size_t msg_len; char msg_len_r_[PADR_(size_t)];
 	char msg_prio_l_[PADL_(unsigned)]; unsigned msg_prio; char msg_prio_r_[PADR_(unsigned)];
 	char abs_timeout_l_[PADL_(const struct timespec *)]; const struct timespec * abs_timeout; char abs_timeout_r_[PADR_(const struct timespec *)];
 };
 struct kmq_notify_args {
 	char mqd_l_[PADL_(int)]; int mqd; char mqd_r_[PADR_(int)];
 	char sigev_l_[PADL_(const struct sigevent *)]; const struct sigevent * sigev; char sigev_r_[PADR_(const struct sigevent *)];
 };
 struct kmq_unlink_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 };
 struct abort2_args {
 	char why_l_[PADL_(const char *)]; const char * why; char why_r_[PADR_(const char *)];
 	char nargs_l_[PADL_(int)]; int nargs; char nargs_r_[PADR_(int)];
 	char args_l_[PADL_(void **)]; void ** args; char args_r_[PADR_(void **)];
 };
 struct thr_set_name_args {
 	char id_l_[PADL_(long)]; long id; char id_r_[PADR_(long)];
 	char name_l_[PADL_(const char *)]; const char * name; char name_r_[PADR_(const char *)];
 };
 struct aio_fsync_args {
 	char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)];
 	char aiocbp_l_[PADL_(struct aiocb *)]; struct aiocb * aiocbp; char aiocbp_r_[PADR_(struct aiocb *)];
 };
 struct rtprio_thread_args {
 	char function_l_[PADL_(int)]; int function; char function_r_[PADR_(int)];
 	char lwpid_l_[PADL_(lwpid_t)]; lwpid_t lwpid; char lwpid_r_[PADR_(lwpid_t)];
 	char rtp_l_[PADL_(struct rtprio *)]; struct rtprio * rtp; char rtp_r_[PADR_(struct rtprio *)];
 };
 struct sctp_peeloff_args {
 	char sd_l_[PADL_(int)]; int sd; char sd_r_[PADR_(int)];
 	char name_l_[PADL_(uint32_t)]; uint32_t name; char name_r_[PADR_(uint32_t)];
 };
 struct sctp_generic_sendmsg_args {
 	char sd_l_[PADL_(int)]; int sd; char sd_r_[PADR_(int)];
 	char msg_l_[PADL_(caddr_t)]; caddr_t msg; char msg_r_[PADR_(caddr_t)];
 	char mlen_l_[PADL_(int)]; int mlen; char mlen_r_[PADR_(int)];
 	char to_l_[PADL_(caddr_t)]; caddr_t to; char to_r_[PADR_(caddr_t)];
 	char tolen_l_[PADL_(__socklen_t)]; __socklen_t tolen; char tolen_r_[PADR_(__socklen_t)];
 	char sinfo_l_[PADL_(struct sctp_sndrcvinfo *)]; struct sctp_sndrcvinfo * sinfo; char sinfo_r_[PADR_(struct sctp_sndrcvinfo *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct sctp_generic_sendmsg_iov_args {
 	char sd_l_[PADL_(int)]; int sd; char sd_r_[PADR_(int)];
 	char iov_l_[PADL_(struct iovec *)]; struct iovec * iov; char iov_r_[PADR_(struct iovec *)];
 	char iovlen_l_[PADL_(int)]; int iovlen; char iovlen_r_[PADR_(int)];
 	char to_l_[PADL_(caddr_t)]; caddr_t to; char to_r_[PADR_(caddr_t)];
 	char tolen_l_[PADL_(__socklen_t)]; __socklen_t tolen; char tolen_r_[PADR_(__socklen_t)];
 	char sinfo_l_[PADL_(struct sctp_sndrcvinfo *)]; struct sctp_sndrcvinfo * sinfo; char sinfo_r_[PADR_(struct sctp_sndrcvinfo *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct sctp_generic_recvmsg_args {
 	char sd_l_[PADL_(int)]; int sd; char sd_r_[PADR_(int)];
 	char iov_l_[PADL_(struct iovec *)]; struct iovec * iov; char iov_r_[PADR_(struct iovec *)];
 	char iovlen_l_[PADL_(int)]; int iovlen; char iovlen_r_[PADR_(int)];
 	char from_l_[PADL_(struct sockaddr *)]; struct sockaddr * from; char from_r_[PADR_(struct sockaddr *)];
 	char fromlenaddr_l_[PADL_(__socklen_t *)]; __socklen_t * fromlenaddr; char fromlenaddr_r_[PADR_(__socklen_t *)];
 	char sinfo_l_[PADL_(struct sctp_sndrcvinfo *)]; struct sctp_sndrcvinfo * sinfo; char sinfo_r_[PADR_(struct sctp_sndrcvinfo *)];
 	char msg_flags_l_[PADL_(int *)]; int * msg_flags; char msg_flags_r_[PADR_(int *)];
 };
 struct pread_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(void *)]; void * buf; char buf_r_[PADR_(void *)];
 	char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 };
 struct pwrite_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)];
 	char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 };
 struct mmap_args {
 	char addr_l_[PADL_(caddr_t)]; caddr_t addr; char addr_r_[PADR_(caddr_t)];
 	char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)];
 	char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char pos_l_[PADL_(off_t)]; off_t pos; char pos_r_[PADR_(off_t)];
 };
 struct lseek_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 	char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)];
 };
 struct truncate_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)];
 };
 struct ftruncate_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)];
 };
 struct thr_kill2_args {
 	char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)];
 	char id_l_[PADL_(long)]; long id; char id_r_[PADR_(long)];
 	char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)];
 };
 struct shm_open_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 };
 struct shm_unlink_args {
 	char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
 };
 struct cpuset_args {
 	char setid_l_[PADL_(cpusetid_t *)]; cpusetid_t * setid; char setid_r_[PADR_(cpusetid_t *)];
 };
 struct cpuset_setid_args {
 	char which_l_[PADL_(cpuwhich_t)]; cpuwhich_t which; char which_r_[PADR_(cpuwhich_t)];
 	char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
 	char setid_l_[PADL_(cpusetid_t)]; cpusetid_t setid; char setid_r_[PADR_(cpusetid_t)];
 };
 struct cpuset_getid_args {
 	char level_l_[PADL_(cpulevel_t)]; cpulevel_t level; char level_r_[PADR_(cpulevel_t)];
 	char which_l_[PADL_(cpuwhich_t)]; cpuwhich_t which; char which_r_[PADR_(cpuwhich_t)];
 	char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
 	char setid_l_[PADL_(cpusetid_t *)]; cpusetid_t * setid; char setid_r_[PADR_(cpusetid_t *)];
 };
 struct cpuset_getaffinity_args {
 	char level_l_[PADL_(cpulevel_t)]; cpulevel_t level; char level_r_[PADR_(cpulevel_t)];
 	char which_l_[PADL_(cpuwhich_t)]; cpuwhich_t which; char which_r_[PADR_(cpuwhich_t)];
 	char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
 	char cpusetsize_l_[PADL_(size_t)]; size_t cpusetsize; char cpusetsize_r_[PADR_(size_t)];
 	char mask_l_[PADL_(cpuset_t *)]; cpuset_t * mask; char mask_r_[PADR_(cpuset_t *)];
 };
 struct cpuset_setaffinity_args {
 	char level_l_[PADL_(cpulevel_t)]; cpulevel_t level; char level_r_[PADR_(cpulevel_t)];
 	char which_l_[PADL_(cpuwhich_t)]; cpuwhich_t which; char which_r_[PADR_(cpuwhich_t)];
 	char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)];
 	char cpusetsize_l_[PADL_(size_t)]; size_t cpusetsize; char cpusetsize_r_[PADR_(size_t)];
 	char mask_l_[PADL_(const cpuset_t *)]; const cpuset_t * mask; char mask_r_[PADR_(const cpuset_t *)];
 };
 struct faccessat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 };
 struct fchmodat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 };
 struct fchownat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)];
 	char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)];
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 };
 struct fexecve_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char argv_l_[PADL_(char **)]; char ** argv; char argv_r_[PADR_(char **)];
 	char envv_l_[PADL_(char **)]; char ** envv; char envv_r_[PADR_(char **)];
 };
 struct fstatat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char buf_l_[PADL_(struct stat *)]; struct stat * buf; char buf_r_[PADR_(struct stat *)];
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 };
 struct futimesat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char times_l_[PADL_(struct timeval *)]; struct timeval * times; char times_r_[PADR_(struct timeval *)];
 };
 struct linkat_args {
 	char fd1_l_[PADL_(int)]; int fd1; char fd1_r_[PADR_(int)];
 	char path1_l_[PADL_(char *)]; char * path1; char path1_r_[PADR_(char *)];
 	char fd2_l_[PADL_(int)]; int fd2; char fd2_r_[PADR_(int)];
 	char path2_l_[PADL_(char *)]; char * path2; char path2_r_[PADR_(char *)];
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 };
 struct mkdirat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 };
 struct mkfifoat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 };
 struct mknodat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 	char dev_l_[PADL_(dev_t)]; dev_t dev; char dev_r_[PADR_(dev_t)];
 };
 struct openat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 	char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)];
 };
 struct readlinkat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
 	char bufsize_l_[PADL_(size_t)]; size_t bufsize; char bufsize_r_[PADR_(size_t)];
 };
 struct renameat_args {
 	char oldfd_l_[PADL_(int)]; int oldfd; char oldfd_r_[PADR_(int)];
 	char old_l_[PADL_(char *)]; char * old; char old_r_[PADR_(char *)];
 	char newfd_l_[PADL_(int)]; int newfd; char newfd_r_[PADR_(int)];
 	char new_l_[PADL_(char *)]; char * new; char new_r_[PADR_(char *)];
 };
 struct symlinkat_args {
 	char path1_l_[PADL_(char *)]; char * path1; char path1_r_[PADR_(char *)];
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path2_l_[PADL_(char *)]; char * path2; char path2_r_[PADR_(char *)];
 };
 struct unlinkat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
 };
 int	nosys(struct thread *, struct nosys_args *);
 void	sys_exit(struct thread *, struct sys_exit_args *);
 int	fork(struct thread *, struct fork_args *);
 int	read(struct thread *, struct read_args *);
 int	write(struct thread *, struct write_args *);
 int	open(struct thread *, struct open_args *);
 int	close(struct thread *, struct close_args *);
 int	wait4(struct thread *, struct wait_args *);
 int	link(struct thread *, struct link_args *);
 int	unlink(struct thread *, struct unlink_args *);
 int	chdir(struct thread *, struct chdir_args *);
 int	fchdir(struct thread *, struct fchdir_args *);
 int	mknod(struct thread *, struct mknod_args *);
 int	chmod(struct thread *, struct chmod_args *);
 int	chown(struct thread *, struct chown_args *);
 int	obreak(struct thread *, struct obreak_args *);
 int	getpid(struct thread *, struct getpid_args *);
 int	mount(struct thread *, struct mount_args *);
 int	unmount(struct thread *, struct unmount_args *);
 int	setuid(struct thread *, struct setuid_args *);
 int	getuid(struct thread *, struct getuid_args *);
 int	geteuid(struct thread *, struct geteuid_args *);
 int	ptrace(struct thread *, struct ptrace_args *);
 int	recvmsg(struct thread *, struct recvmsg_args *);
 int	sendmsg(struct thread *, struct sendmsg_args *);
 int	recvfrom(struct thread *, struct recvfrom_args *);
 int	accept(struct thread *, struct accept_args *);
 int	getpeername(struct thread *, struct getpeername_args *);
 int	getsockname(struct thread *, struct getsockname_args *);
 int	access(struct thread *, struct access_args *);
 int	chflags(struct thread *, struct chflags_args *);
 int	fchflags(struct thread *, struct fchflags_args *);
 int	sync(struct thread *, struct sync_args *);
 int	kill(struct thread *, struct kill_args *);
 int	getppid(struct thread *, struct getppid_args *);
 int	dup(struct thread *, struct dup_args *);
 int	pipe(struct thread *, struct pipe_args *);
 int	getegid(struct thread *, struct getegid_args *);
 int	profil(struct thread *, struct profil_args *);
 int	ktrace(struct thread *, struct ktrace_args *);
 int	getgid(struct thread *, struct getgid_args *);
 int	getlogin(struct thread *, struct getlogin_args *);
 int	setlogin(struct thread *, struct setlogin_args *);
 int	acct(struct thread *, struct acct_args *);
 int	sigaltstack(struct thread *, struct sigaltstack_args *);
 int	ioctl(struct thread *, struct ioctl_args *);
 int	reboot(struct thread *, struct reboot_args *);
 int	revoke(struct thread *, struct revoke_args *);
 int	symlink(struct thread *, struct symlink_args *);
 int	readlink(struct thread *, struct readlink_args *);
 int	execve(struct thread *, struct execve_args *);
 int	umask(struct thread *, struct umask_args *);
 int	chroot(struct thread *, struct chroot_args *);
 int	msync(struct thread *, struct msync_args *);
 int	vfork(struct thread *, struct vfork_args *);
 int	sbrk(struct thread *, struct sbrk_args *);
 int	sstk(struct thread *, struct sstk_args *);
 int	ovadvise(struct thread *, struct ovadvise_args *);
 int	munmap(struct thread *, struct munmap_args *);
 int	mprotect(struct thread *, struct mprotect_args *);
 int	madvise(struct thread *, struct madvise_args *);
 int	mincore(struct thread *, struct mincore_args *);
 int	getgroups(struct thread *, struct getgroups_args *);
 int	setgroups(struct thread *, struct setgroups_args *);
 int	getpgrp(struct thread *, struct getpgrp_args *);
 int	setpgid(struct thread *, struct setpgid_args *);
 int	setitimer(struct thread *, struct setitimer_args *);
 int	swapon(struct thread *, struct swapon_args *);
 int	getitimer(struct thread *, struct getitimer_args *);
 int	getdtablesize(struct thread *, struct getdtablesize_args *);
 int	dup2(struct thread *, struct dup2_args *);
 int	fcntl(struct thread *, struct fcntl_args *);
 int	select(struct thread *, struct select_args *);
 int	fsync(struct thread *, struct fsync_args *);
 int	setpriority(struct thread *, struct setpriority_args *);
 int	socket(struct thread *, struct socket_args *);
 int	connect(struct thread *, struct connect_args *);
 int	getpriority(struct thread *, struct getpriority_args *);
 int	bind(struct thread *, struct bind_args *);
 int	setsockopt(struct thread *, struct setsockopt_args *);
 int	listen(struct thread *, struct listen_args *);
 int	gettimeofday(struct thread *, struct gettimeofday_args *);
 int	getrusage(struct thread *, struct getrusage_args *);
 int	getsockopt(struct thread *, struct getsockopt_args *);
 int	readv(struct thread *, struct readv_args *);
 int	writev(struct thread *, struct writev_args *);
 int	settimeofday(struct thread *, struct settimeofday_args *);
 int	fchown(struct thread *, struct fchown_args *);
 int	fchmod(struct thread *, struct fchmod_args *);
 int	setreuid(struct thread *, struct setreuid_args *);
 int	setregid(struct thread *, struct setregid_args *);
 int	rename(struct thread *, struct rename_args *);
 int	flock(struct thread *, struct flock_args *);
 int	mkfifo(struct thread *, struct mkfifo_args *);
 int	sendto(struct thread *, struct sendto_args *);
 int	shutdown(struct thread *, struct shutdown_args *);
 int	socketpair(struct thread *, struct socketpair_args *);
 int	mkdir(struct thread *, struct mkdir_args *);
 int	rmdir(struct thread *, struct rmdir_args *);
 int	utimes(struct thread *, struct utimes_args *);
 int	adjtime(struct thread *, struct adjtime_args *);
 int	setsid(struct thread *, struct setsid_args *);
 int	quotactl(struct thread *, struct quotactl_args *);
 int	nlm_syscall(struct thread *, struct nlm_syscall_args *);
 int	nfssvc(struct thread *, struct nfssvc_args *);
 int	lgetfh(struct thread *, struct lgetfh_args *);
 int	getfh(struct thread *, struct getfh_args *);
 int	getdomainname(struct thread *, struct getdomainname_args *);
 int	setdomainname(struct thread *, struct setdomainname_args *);
 int	uname(struct thread *, struct uname_args *);
 int	sysarch(struct thread *, struct sysarch_args *);
 int	rtprio(struct thread *, struct rtprio_args *);
 int	semsys(struct thread *, struct semsys_args *);
 int	msgsys(struct thread *, struct msgsys_args *);
 int	shmsys(struct thread *, struct shmsys_args *);
 int	freebsd6_pread(struct thread *, struct freebsd6_pread_args *);
 int	freebsd6_pwrite(struct thread *, struct freebsd6_pwrite_args *);
+int	setfib(struct thread *, struct setfib_args *);
 int	ntp_adjtime(struct thread *, struct ntp_adjtime_args *);
 int	setgid(struct thread *, struct setgid_args *);
 int	setegid(struct thread *, struct setegid_args *);
 int	seteuid(struct thread *, struct seteuid_args *);
 int	stat(struct thread *, struct stat_args *);
 int	fstat(struct thread *, struct fstat_args *);
 int	lstat(struct thread *, struct lstat_args *);
 int	pathconf(struct thread *, struct pathconf_args *);
 int	fpathconf(struct thread *, struct fpathconf_args *);
 int	getrlimit(struct thread *, struct __getrlimit_args *);
 int	setrlimit(struct thread *, struct __setrlimit_args *);
 int	getdirentries(struct thread *, struct getdirentries_args *);
 int	freebsd6_mmap(struct thread *, struct freebsd6_mmap_args *);
 int	freebsd6_lseek(struct thread *, struct freebsd6_lseek_args *);
 int	freebsd6_truncate(struct thread *, struct freebsd6_truncate_args *);
 int	freebsd6_ftruncate(struct thread *, struct freebsd6_ftruncate_args *);
 int	__sysctl(struct thread *, struct sysctl_args *);
 int	mlock(struct thread *, struct mlock_args *);
 int	munlock(struct thread *, struct munlock_args *);
 int	undelete(struct thread *, struct undelete_args *);
 int	futimes(struct thread *, struct futimes_args *);
 int	getpgid(struct thread *, struct getpgid_args *);
 int	poll(struct thread *, struct poll_args *);
 int	lkmnosys(struct thread *, struct nosys_args *);
 int	__semctl(struct thread *, struct __semctl_args *);
 int	semget(struct thread *, struct semget_args *);
 int	semop(struct thread *, struct semop_args *);
 int	msgctl(struct thread *, struct msgctl_args *);
 int	msgget(struct thread *, struct msgget_args *);
 int	msgsnd(struct thread *, struct msgsnd_args *);
 int	msgrcv(struct thread *, struct msgrcv_args *);
 int	shmat(struct thread *, struct shmat_args *);
 int	shmctl(struct thread *, struct shmctl_args *);
 int	shmdt(struct thread *, struct shmdt_args *);
 int	shmget(struct thread *, struct shmget_args *);
 int	clock_gettime(struct thread *, struct clock_gettime_args *);
 int	clock_settime(struct thread *, struct clock_settime_args *);
 int	clock_getres(struct thread *, struct clock_getres_args *);
 int	ktimer_create(struct thread *, struct ktimer_create_args *);
 int	ktimer_delete(struct thread *, struct ktimer_delete_args *);
 int	ktimer_settime(struct thread *, struct ktimer_settime_args *);
 int	ktimer_gettime(struct thread *, struct ktimer_gettime_args *);
 int	ktimer_getoverrun(struct thread *, struct ktimer_getoverrun_args *);
 int	nanosleep(struct thread *, struct nanosleep_args *);
 int	ntp_gettime(struct thread *, struct ntp_gettime_args *);
 int	minherit(struct thread *, struct minherit_args *);
 int	rfork(struct thread *, struct rfork_args *);
 int	openbsd_poll(struct thread *, struct openbsd_poll_args *);
 int	issetugid(struct thread *, struct issetugid_args *);
 int	lchown(struct thread *, struct lchown_args *);
 int	aio_read(struct thread *, struct aio_read_args *);
 int	aio_write(struct thread *, struct aio_write_args *);
 int	lio_listio(struct thread *, struct lio_listio_args *);
 int	getdents(struct thread *, struct getdents_args *);
 int	lchmod(struct thread *, struct lchmod_args *);
 int	lutimes(struct thread *, struct lutimes_args *);
 int	nstat(struct thread *, struct nstat_args *);
 int	nfstat(struct thread *, struct nfstat_args *);
 int	nlstat(struct thread *, struct nlstat_args *);
 int	preadv(struct thread *, struct preadv_args *);
 int	pwritev(struct thread *, struct pwritev_args *);
 int	fhopen(struct thread *, struct fhopen_args *);
 int	fhstat(struct thread *, struct fhstat_args *);
 int	modnext(struct thread *, struct modnext_args *);
 int	modstat(struct thread *, struct modstat_args *);
 int	modfnext(struct thread *, struct modfnext_args *);
 int	modfind(struct thread *, struct modfind_args *);
 int	kldload(struct thread *, struct kldload_args *);
 int	kldunload(struct thread *, struct kldunload_args *);
 int	kldfind(struct thread *, struct kldfind_args *);
 int	kldnext(struct thread *, struct kldnext_args *);
 int	kldstat(struct thread *, struct kldstat_args *);
 int	kldfirstmod(struct thread *, struct kldfirstmod_args *);
 int	getsid(struct thread *, struct getsid_args *);
 int	setresuid(struct thread *, struct setresuid_args *);
 int	setresgid(struct thread *, struct setresgid_args *);
 int	aio_return(struct thread *, struct aio_return_args *);
 int	aio_suspend(struct thread *, struct aio_suspend_args *);
 int	aio_cancel(struct thread *, struct aio_cancel_args *);
 int	aio_error(struct thread *, struct aio_error_args *);
 int	oaio_read(struct thread *, struct oaio_read_args *);
 int	oaio_write(struct thread *, struct oaio_write_args *);
 int	olio_listio(struct thread *, struct olio_listio_args *);
 int	yield(struct thread *, struct yield_args *);
 int	mlockall(struct thread *, struct mlockall_args *);
 int	munlockall(struct thread *, struct munlockall_args *);
 int	__getcwd(struct thread *, struct __getcwd_args *);
 int	sched_setparam(struct thread *, struct sched_setparam_args *);
 int	sched_getparam(struct thread *, struct sched_getparam_args *);
 int	sched_setscheduler(struct thread *, struct sched_setscheduler_args *);
 int	sched_getscheduler(struct thread *, struct sched_getscheduler_args *);
 int	sched_yield(struct thread *, struct sched_yield_args *);
 int	sched_get_priority_max(struct thread *, struct sched_get_priority_max_args *);
 int	sched_get_priority_min(struct thread *, struct sched_get_priority_min_args *);
 int	sched_rr_get_interval(struct thread *, struct sched_rr_get_interval_args *);
 int	utrace(struct thread *, struct utrace_args *);
 int	kldsym(struct thread *, struct kldsym_args *);
 int	jail(struct thread *, struct jail_args *);
 int	sigprocmask(struct thread *, struct sigprocmask_args *);
 int	sigsuspend(struct thread *, struct sigsuspend_args *);
 int	sigpending(struct thread *, struct sigpending_args *);
 int	sigtimedwait(struct thread *, struct sigtimedwait_args *);
 int	sigwaitinfo(struct thread *, struct sigwaitinfo_args *);
 int	__acl_get_file(struct thread *, struct __acl_get_file_args *);
 int	__acl_set_file(struct thread *, struct __acl_set_file_args *);
 int	__acl_get_fd(struct thread *, struct __acl_get_fd_args *);
 int	__acl_set_fd(struct thread *, struct __acl_set_fd_args *);
 int	__acl_delete_file(struct thread *, struct __acl_delete_file_args *);
 int	__acl_delete_fd(struct thread *, struct __acl_delete_fd_args *);
 int	__acl_aclcheck_file(struct thread *, struct __acl_aclcheck_file_args *);
 int	__acl_aclcheck_fd(struct thread *, struct __acl_aclcheck_fd_args *);
 int	extattrctl(struct thread *, struct extattrctl_args *);
 int	extattr_set_file(struct thread *, struct extattr_set_file_args *);
 int	extattr_get_file(struct thread *, struct extattr_get_file_args *);
 int	extattr_delete_file(struct thread *, struct extattr_delete_file_args *);
 int	aio_waitcomplete(struct thread *, struct aio_waitcomplete_args *);
 int	getresuid(struct thread *, struct getresuid_args *);
 int	getresgid(struct thread *, struct getresgid_args *);
 int	kqueue(struct thread *, struct kqueue_args *);
 int	kevent(struct thread *, struct kevent_args *);
 int	lkmressys(struct thread *, struct nosys_args *);
 int	extattr_set_fd(struct thread *, struct extattr_set_fd_args *);
 int	extattr_get_fd(struct thread *, struct extattr_get_fd_args *);
 int	extattr_delete_fd(struct thread *, struct extattr_delete_fd_args *);
 int	__setugid(struct thread *, struct __setugid_args *);
 int	nfsclnt(struct thread *, struct nfsclnt_args *);
 int	eaccess(struct thread *, struct eaccess_args *);
 int	nmount(struct thread *, struct nmount_args *);
 int	__mac_get_proc(struct thread *, struct __mac_get_proc_args *);
 int	__mac_set_proc(struct thread *, struct __mac_set_proc_args *);
 int	__mac_get_fd(struct thread *, struct __mac_get_fd_args *);
 int	__mac_get_file(struct thread *, struct __mac_get_file_args *);
 int	__mac_set_fd(struct thread *, struct __mac_set_fd_args *);
 int	__mac_set_file(struct thread *, struct __mac_set_file_args *);
 int	kenv(struct thread *, struct kenv_args *);
 int	lchflags(struct thread *, struct lchflags_args *);
 int	uuidgen(struct thread *, struct uuidgen_args *);
 int	sendfile(struct thread *, struct sendfile_args *);
 int	mac_syscall(struct thread *, struct mac_syscall_args *);
 int	getfsstat(struct thread *, struct getfsstat_args *);
 int	statfs(struct thread *, struct statfs_args *);
 int	fstatfs(struct thread *, struct fstatfs_args *);
 int	fhstatfs(struct thread *, struct fhstatfs_args *);
 int	ksem_close(struct thread *, struct ksem_close_args *);
 int	ksem_post(struct thread *, struct ksem_post_args *);
 int	ksem_wait(struct thread *, struct ksem_wait_args *);
 int	ksem_trywait(struct thread *, struct ksem_trywait_args *);
 int	ksem_init(struct thread *, struct ksem_init_args *);
 int	ksem_open(struct thread *, struct ksem_open_args *);
 int	ksem_unlink(struct thread *, struct ksem_unlink_args *);
 int	ksem_getvalue(struct thread *, struct ksem_getvalue_args *);
 int	ksem_destroy(struct thread *, struct ksem_destroy_args *);
 int	__mac_get_pid(struct thread *, struct __mac_get_pid_args *);
 int	__mac_get_link(struct thread *, struct __mac_get_link_args *);
 int	__mac_set_link(struct thread *, struct __mac_set_link_args *);
 int	extattr_set_link(struct thread *, struct extattr_set_link_args *);
 int	extattr_get_link(struct thread *, struct extattr_get_link_args *);
 int	extattr_delete_link(struct thread *, struct extattr_delete_link_args *);
 int	__mac_execve(struct thread *, struct __mac_execve_args *);
 int	sigaction(struct thread *, struct sigaction_args *);
 int	sigreturn(struct thread *, struct sigreturn_args *);
 int	getcontext(struct thread *, struct getcontext_args *);
 int	setcontext(struct thread *, struct setcontext_args *);
 int	swapcontext(struct thread *, struct swapcontext_args *);
 int	swapoff(struct thread *, struct swapoff_args *);
 int	__acl_get_link(struct thread *, struct __acl_get_link_args *);
 int	__acl_set_link(struct thread *, struct __acl_set_link_args *);
 int	__acl_delete_link(struct thread *, struct __acl_delete_link_args *);
 int	__acl_aclcheck_link(struct thread *, struct __acl_aclcheck_link_args *);
 int	sigwait(struct thread *, struct sigwait_args *);
 int	thr_create(struct thread *, struct thr_create_args *);
 int	thr_exit(struct thread *, struct thr_exit_args *);
 int	thr_self(struct thread *, struct thr_self_args *);
 int	thr_kill(struct thread *, struct thr_kill_args *);
 int	_umtx_lock(struct thread *, struct _umtx_lock_args *);
 int	_umtx_unlock(struct thread *, struct _umtx_unlock_args *);
 int	jail_attach(struct thread *, struct jail_attach_args *);
 int	extattr_list_fd(struct thread *, struct extattr_list_fd_args *);
 int	extattr_list_file(struct thread *, struct extattr_list_file_args *);
 int	extattr_list_link(struct thread *, struct extattr_list_link_args *);
 int	ksem_timedwait(struct thread *, struct ksem_timedwait_args *);
 int	thr_suspend(struct thread *, struct thr_suspend_args *);
 int	thr_wake(struct thread *, struct thr_wake_args *);
 int	kldunloadf(struct thread *, struct kldunloadf_args *);
 int	audit(struct thread *, struct audit_args *);
 int	auditon(struct thread *, struct auditon_args *);
 int	getauid(struct thread *, struct getauid_args *);
 int	setauid(struct thread *, struct setauid_args *);
 int	getaudit(struct thread *, struct getaudit_args *);
 int	setaudit(struct thread *, struct setaudit_args *);
 int	getaudit_addr(struct thread *, struct getaudit_addr_args *);
 int	setaudit_addr(struct thread *, struct setaudit_addr_args *);
 int	auditctl(struct thread *, struct auditctl_args *);
 int	_umtx_op(struct thread *, struct _umtx_op_args *);
 int	thr_new(struct thread *, struct thr_new_args *);
 int	sigqueue(struct thread *, struct sigqueue_args *);
 int	kmq_open(struct thread *, struct kmq_open_args *);
 int	kmq_setattr(struct thread *, struct kmq_setattr_args *);
 int	kmq_timedreceive(struct thread *, struct kmq_timedreceive_args *);
 int	kmq_timedsend(struct thread *, struct kmq_timedsend_args *);
 int	kmq_notify(struct thread *, struct kmq_notify_args *);
 int	kmq_unlink(struct thread *, struct kmq_unlink_args *);
 int	abort2(struct thread *, struct abort2_args *);
 int	thr_set_name(struct thread *, struct thr_set_name_args *);
 int	aio_fsync(struct thread *, struct aio_fsync_args *);
 int	rtprio_thread(struct thread *, struct rtprio_thread_args *);
 int	sctp_peeloff(struct thread *, struct sctp_peeloff_args *);
 int	sctp_generic_sendmsg(struct thread *, struct sctp_generic_sendmsg_args *);
 int	sctp_generic_sendmsg_iov(struct thread *, struct sctp_generic_sendmsg_iov_args *);
 int	sctp_generic_recvmsg(struct thread *, struct sctp_generic_recvmsg_args *);
 int	pread(struct thread *, struct pread_args *);
 int	pwrite(struct thread *, struct pwrite_args *);
 int	mmap(struct thread *, struct mmap_args *);
 int	lseek(struct thread *, struct lseek_args *);
 int	truncate(struct thread *, struct truncate_args *);
 int	ftruncate(struct thread *, struct ftruncate_args *);
 int	thr_kill2(struct thread *, struct thr_kill2_args *);
 int	shm_open(struct thread *, struct shm_open_args *);
 int	shm_unlink(struct thread *, struct shm_unlink_args *);
 int	cpuset(struct thread *, struct cpuset_args *);
 int	cpuset_setid(struct thread *, struct cpuset_setid_args *);
 int	cpuset_getid(struct thread *, struct cpuset_getid_args *);
 int	cpuset_getaffinity(struct thread *, struct cpuset_getaffinity_args *);
 int	cpuset_setaffinity(struct thread *, struct cpuset_setaffinity_args *);
 int	faccessat(struct thread *, struct faccessat_args *);
 int	fchmodat(struct thread *, struct fchmodat_args *);
 int	fchownat(struct thread *, struct fchownat_args *);
 int	fexecve(struct thread *, struct fexecve_args *);
 int	fstatat(struct thread *, struct fstatat_args *);
 int	futimesat(struct thread *, struct futimesat_args *);
 int	linkat(struct thread *, struct linkat_args *);
 int	mkdirat(struct thread *, struct mkdirat_args *);
 int	mkfifoat(struct thread *, struct mkfifoat_args *);
 int	mknodat(struct thread *, struct mknodat_args *);
 int	openat(struct thread *, struct openat_args *);
 int	readlinkat(struct thread *, struct readlinkat_args *);
 int	renameat(struct thread *, struct renameat_args *);
 int	symlinkat(struct thread *, struct symlinkat_args *);
 int	unlinkat(struct thread *, struct unlinkat_args *);
 
 #ifdef COMPAT_43
 
 struct ocreat_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)];
 };
 struct olseek_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char offset_l_[PADL_(long)]; long offset; char offset_r_[PADR_(long)];
 	char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)];
 };
 struct ostat_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char ub_l_[PADL_(struct ostat *)]; struct ostat * ub; char ub_r_[PADR_(struct ostat *)];
 };
 struct olstat_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char ub_l_[PADL_(struct ostat *)]; struct ostat * ub; char ub_r_[PADR_(struct ostat *)];
 };
 struct osigaction_args {
 	char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)];
 	char nsa_l_[PADL_(struct osigaction *)]; struct osigaction * nsa; char nsa_r_[PADR_(struct osigaction *)];
 	char osa_l_[PADL_(struct osigaction *)]; struct osigaction * osa; char osa_r_[PADR_(struct osigaction *)];
 };
 struct osigprocmask_args {
 	char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)];
 	char mask_l_[PADL_(osigset_t)]; osigset_t mask; char mask_r_[PADR_(osigset_t)];
 };
 struct ofstat_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char sb_l_[PADL_(struct ostat *)]; struct ostat * sb; char sb_r_[PADR_(struct ostat *)];
 };
 struct getkerninfo_args {
 	char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)];
 	char where_l_[PADL_(char *)]; char * where; char where_r_[PADR_(char *)];
 	char size_l_[PADL_(size_t *)]; size_t * size; char size_r_[PADR_(size_t *)];
 	char arg_l_[PADL_(int)]; int arg; char arg_r_[PADR_(int)];
 };
 struct ommap_args {
 	char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)];
 	char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)];
 	char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char pos_l_[PADL_(long)]; long pos; char pos_r_[PADR_(long)];
 };
 struct gethostname_args {
 	char hostname_l_[PADL_(char *)]; char * hostname; char hostname_r_[PADR_(char *)];
 	char len_l_[PADL_(u_int)]; u_int len; char len_r_[PADR_(u_int)];
 };
 struct sethostname_args {
 	char hostname_l_[PADL_(char *)]; char * hostname; char hostname_r_[PADR_(char *)];
 	char len_l_[PADL_(u_int)]; u_int len; char len_r_[PADR_(u_int)];
 };
 struct osend_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)];
 	char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct orecv_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)];
 	char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct osigreturn_args {
 	char sigcntxp_l_[PADL_(struct osigcontext *)]; struct osigcontext * sigcntxp; char sigcntxp_r_[PADR_(struct osigcontext *)];
 };
 struct osigvec_args {
 	char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)];
 	char nsv_l_[PADL_(struct sigvec *)]; struct sigvec * nsv; char nsv_r_[PADR_(struct sigvec *)];
 	char osv_l_[PADL_(struct sigvec *)]; struct sigvec * osv; char osv_r_[PADR_(struct sigvec *)];
 };
 struct osigblock_args {
 	char mask_l_[PADL_(int)]; int mask; char mask_r_[PADR_(int)];
 };
 struct osigsetmask_args {
 	char mask_l_[PADL_(int)]; int mask; char mask_r_[PADR_(int)];
 };
 struct osigsuspend_args {
 	char mask_l_[PADL_(osigset_t)]; osigset_t mask; char mask_r_[PADR_(osigset_t)];
 };
 struct osigstack_args {
 	char nss_l_[PADL_(struct sigstack *)]; struct sigstack * nss; char nss_r_[PADR_(struct sigstack *)];
 	char oss_l_[PADL_(struct sigstack *)]; struct sigstack * oss; char oss_r_[PADR_(struct sigstack *)];
 };
 struct orecvmsg_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char msg_l_[PADL_(struct omsghdr *)]; struct omsghdr * msg; char msg_r_[PADR_(struct omsghdr *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct osendmsg_args {
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char msg_l_[PADL_(caddr_t)]; caddr_t msg; char msg_r_[PADR_(caddr_t)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct otruncate_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char length_l_[PADL_(long)]; long length; char length_r_[PADR_(long)];
 };
 struct oftruncate_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char length_l_[PADL_(long)]; long length; char length_r_[PADR_(long)];
 };
 struct ogetpeername_args {
 	char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)];
 	char asa_l_[PADL_(caddr_t)]; caddr_t asa; char asa_r_[PADR_(caddr_t)];
 	char alen_l_[PADL_(int *)]; int * alen; char alen_r_[PADR_(int *)];
 };
 struct osethostid_args {
 	char hostid_l_[PADL_(long)]; long hostid; char hostid_r_[PADR_(long)];
 };
 struct ogetrlimit_args {
 	char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)];
 	char rlp_l_[PADL_(struct orlimit *)]; struct orlimit * rlp; char rlp_r_[PADR_(struct orlimit *)];
 };
 struct osetrlimit_args {
 	char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)];
 	char rlp_l_[PADL_(struct orlimit *)]; struct orlimit * rlp; char rlp_r_[PADR_(struct orlimit *)];
 };
 struct okillpg_args {
 	char pgid_l_[PADL_(int)]; int pgid; char pgid_r_[PADR_(int)];
 	char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)];
 };
 struct ogetdirentries_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)];
 	char count_l_[PADL_(u_int)]; u_int count; char count_r_[PADR_(u_int)];
 	char basep_l_[PADL_(long *)]; long * basep; char basep_r_[PADR_(long *)];
 };
 int	ocreat(struct thread *, struct ocreat_args *);
 int	olseek(struct thread *, struct olseek_args *);
 int	ostat(struct thread *, struct ostat_args *);
 int	olstat(struct thread *, struct olstat_args *);
 int	osigaction(struct thread *, struct osigaction_args *);
 int	osigprocmask(struct thread *, struct osigprocmask_args *);
 int	osigpending(struct thread *, struct osigpending_args *);
 int	ofstat(struct thread *, struct ofstat_args *);
 int	ogetkerninfo(struct thread *, struct getkerninfo_args *);
 int	ogetpagesize(struct thread *, struct getpagesize_args *);
 int	ommap(struct thread *, struct ommap_args *);
 int	owait(struct thread *, struct owait_args *);
 int	ogethostname(struct thread *, struct gethostname_args *);
 int	osethostname(struct thread *, struct sethostname_args *);
 int	oaccept(struct thread *, struct accept_args *);
 int	osend(struct thread *, struct osend_args *);
 int	orecv(struct thread *, struct orecv_args *);
 int	osigreturn(struct thread *, struct osigreturn_args *);
 int	osigvec(struct thread *, struct osigvec_args *);
 int	osigblock(struct thread *, struct osigblock_args *);
 int	osigsetmask(struct thread *, struct osigsetmask_args *);
 int	osigsuspend(struct thread *, struct osigsuspend_args *);
 int	osigstack(struct thread *, struct osigstack_args *);
 int	orecvmsg(struct thread *, struct orecvmsg_args *);
 int	osendmsg(struct thread *, struct osendmsg_args *);
 int	orecvfrom(struct thread *, struct recvfrom_args *);
 int	otruncate(struct thread *, struct otruncate_args *);
 int	oftruncate(struct thread *, struct oftruncate_args *);
 int	ogetpeername(struct thread *, struct ogetpeername_args *);
 int	ogethostid(struct thread *, struct ogethostid_args *);
 int	osethostid(struct thread *, struct osethostid_args *);
 int	ogetrlimit(struct thread *, struct ogetrlimit_args *);
 int	osetrlimit(struct thread *, struct osetrlimit_args *);
 int	okillpg(struct thread *, struct okillpg_args *);
 int	oquota(struct thread *, struct oquota_args *);
 int	ogetsockname(struct thread *, struct getsockname_args *);
 int	ogetdirentries(struct thread *, struct ogetdirentries_args *);
 
 #endif /* COMPAT_43 */
 
 
 #ifdef COMPAT_FREEBSD4
 
 struct freebsd4_getfsstat_args {
 	char buf_l_[PADL_(struct ostatfs *)]; struct ostatfs * buf; char buf_r_[PADR_(struct ostatfs *)];
 	char bufsize_l_[PADL_(long)]; long bufsize; char bufsize_r_[PADR_(long)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct freebsd4_statfs_args {
 	char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
 	char buf_l_[PADL_(struct ostatfs *)]; struct ostatfs * buf; char buf_r_[PADR_(struct ostatfs *)];
 };
 struct freebsd4_fstatfs_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char buf_l_[PADL_(struct ostatfs *)]; struct ostatfs * buf; char buf_r_[PADR_(struct ostatfs *)];
 };
 struct freebsd4_fhstatfs_args {
 	char u_fhp_l_[PADL_(const struct fhandle *)]; const struct fhandle * u_fhp; char u_fhp_r_[PADR_(const struct fhandle *)];
 	char buf_l_[PADL_(struct ostatfs *)]; struct ostatfs * buf; char buf_r_[PADR_(struct ostatfs *)];
 };
 struct freebsd4_sendfile_args {
 	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)];
 	char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)];
 	char nbytes_l_[PADL_(size_t)]; size_t nbytes; char nbytes_r_[PADR_(size_t)];
 	char hdtr_l_[PADL_(struct sf_hdtr *)]; struct sf_hdtr * hdtr; char hdtr_r_[PADR_(struct sf_hdtr *)];
 	char sbytes_l_[PADL_(off_t *)]; off_t * sbytes; char sbytes_r_[PADR_(off_t *)];
 	char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
 };
 struct freebsd4_sigaction_args {
 	char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)];
 	char act_l_[PADL_(const struct sigaction *)]; const struct sigaction * act; char act_r_[PADR_(const struct sigaction *)];
 	char oact_l_[PADL_(struct sigaction *)]; struct sigaction * oact; char oact_r_[PADR_(struct sigaction *)];
 };
 struct freebsd4_sigreturn_args {
 	char sigcntxp_l_[PADL_(const struct ucontext4 *)]; const struct ucontext4 * sigcntxp; char sigcntxp_r_[PADR_(const struct ucontext4 *)];
 };
 int	freebsd4_getfsstat(struct thread *, struct freebsd4_getfsstat_args *);
 int	freebsd4_statfs(struct thread *, struct freebsd4_statfs_args *);
 int	freebsd4_fstatfs(struct thread *, struct freebsd4_fstatfs_args *);
 int	freebsd4_fhstatfs(struct thread *, struct freebsd4_fhstatfs_args *);
 int	freebsd4_sendfile(struct thread *, struct freebsd4_sendfile_args *);
 int	freebsd4_sigaction(struct thread *, struct freebsd4_sigaction_args *);
 int	freebsd4_sigreturn(struct thread *, struct freebsd4_sigreturn_args *);
 
 #endif /* COMPAT_FREEBSD4 */
 
 
 #ifdef COMPAT_FREEBSD6
 
 
 #endif /* COMPAT_FREEBSD6 */
 
 #define	SYS_AUE_syscall	AUE_NULL
 #define	SYS_AUE_exit	AUE_EXIT
 #define	SYS_AUE_fork	AUE_FORK
 #define	SYS_AUE_read	AUE_NULL
 #define	SYS_AUE_write	AUE_NULL
 #define	SYS_AUE_open	AUE_OPEN_RWTC
 #define	SYS_AUE_close	AUE_CLOSE
 #define	SYS_AUE_wait4	AUE_WAIT4
 #define	SYS_AUE_link	AUE_LINK
 #define	SYS_AUE_unlink	AUE_UNLINK
 #define	SYS_AUE_chdir	AUE_CHDIR
 #define	SYS_AUE_fchdir	AUE_FCHDIR
 #define	SYS_AUE_mknod	AUE_MKNOD
 #define	SYS_AUE_chmod	AUE_CHMOD
 #define	SYS_AUE_chown	AUE_CHOWN
 #define	SYS_AUE_break	AUE_NULL
 #define	SYS_AUE_getpid	AUE_GETPID
 #define	SYS_AUE_mount	AUE_MOUNT
 #define	SYS_AUE_unmount	AUE_UMOUNT
 #define	SYS_AUE_setuid	AUE_SETUID
 #define	SYS_AUE_getuid	AUE_GETUID
 #define	SYS_AUE_geteuid	AUE_GETEUID
 #define	SYS_AUE_ptrace	AUE_PTRACE
 #define	SYS_AUE_recvmsg	AUE_RECVMSG
 #define	SYS_AUE_sendmsg	AUE_SENDMSG
 #define	SYS_AUE_recvfrom	AUE_RECVFROM
 #define	SYS_AUE_accept	AUE_ACCEPT
 #define	SYS_AUE_getpeername	AUE_GETPEERNAME
 #define	SYS_AUE_getsockname	AUE_GETSOCKNAME
 #define	SYS_AUE_access	AUE_ACCESS
 #define	SYS_AUE_chflags	AUE_CHFLAGS
 #define	SYS_AUE_fchflags	AUE_FCHFLAGS
 #define	SYS_AUE_sync	AUE_SYNC
 #define	SYS_AUE_kill	AUE_KILL
 #define	SYS_AUE_getppid	AUE_GETPPID
 #define	SYS_AUE_dup	AUE_DUP
 #define	SYS_AUE_pipe	AUE_PIPE
 #define	SYS_AUE_getegid	AUE_GETEGID
 #define	SYS_AUE_profil	AUE_PROFILE
 #define	SYS_AUE_ktrace	AUE_KTRACE
 #define	SYS_AUE_getgid	AUE_GETGID
 #define	SYS_AUE_getlogin	AUE_GETLOGIN
 #define	SYS_AUE_setlogin	AUE_SETLOGIN
 #define	SYS_AUE_acct	AUE_ACCT
 #define	SYS_AUE_sigaltstack	AUE_SIGALTSTACK
 #define	SYS_AUE_ioctl	AUE_IOCTL
 #define	SYS_AUE_reboot	AUE_REBOOT
 #define	SYS_AUE_revoke	AUE_REVOKE
 #define	SYS_AUE_symlink	AUE_SYMLINK
 #define	SYS_AUE_readlink	AUE_READLINK
 #define	SYS_AUE_execve	AUE_EXECVE
 #define	SYS_AUE_umask	AUE_UMASK
 #define	SYS_AUE_chroot	AUE_CHROOT
 #define	SYS_AUE_msync	AUE_MSYNC
 #define	SYS_AUE_vfork	AUE_VFORK
 #define	SYS_AUE_sbrk	AUE_SBRK
 #define	SYS_AUE_sstk	AUE_SSTK
 #define	SYS_AUE_vadvise	AUE_O_VADVISE
 #define	SYS_AUE_munmap	AUE_MUNMAP
 #define	SYS_AUE_mprotect	AUE_MPROTECT
 #define	SYS_AUE_madvise	AUE_MADVISE
 #define	SYS_AUE_mincore	AUE_MINCORE
 #define	SYS_AUE_getgroups	AUE_GETGROUPS
 #define	SYS_AUE_setgroups	AUE_SETGROUPS
 #define	SYS_AUE_getpgrp	AUE_GETPGRP
 #define	SYS_AUE_setpgid	AUE_SETPGRP
 #define	SYS_AUE_setitimer	AUE_SETITIMER
 #define	SYS_AUE_swapon	AUE_SWAPON
 #define	SYS_AUE_getitimer	AUE_GETITIMER
 #define	SYS_AUE_getdtablesize	AUE_GETDTABLESIZE
 #define	SYS_AUE_dup2	AUE_DUP2
 #define	SYS_AUE_fcntl	AUE_FCNTL
 #define	SYS_AUE_select	AUE_SELECT
 #define	SYS_AUE_fsync	AUE_FSYNC
 #define	SYS_AUE_setpriority	AUE_SETPRIORITY
 #define	SYS_AUE_socket	AUE_SOCKET
 #define	SYS_AUE_connect	AUE_CONNECT
 #define	SYS_AUE_getpriority	AUE_GETPRIORITY
 #define	SYS_AUE_bind	AUE_BIND
 #define	SYS_AUE_setsockopt	AUE_SETSOCKOPT
 #define	SYS_AUE_listen	AUE_LISTEN
 #define	SYS_AUE_gettimeofday	AUE_GETTIMEOFDAY
 #define	SYS_AUE_getrusage	AUE_GETRUSAGE
 #define	SYS_AUE_getsockopt	AUE_GETSOCKOPT
 #define	SYS_AUE_readv	AUE_READV
 #define	SYS_AUE_writev	AUE_WRITEV
 #define	SYS_AUE_settimeofday	AUE_SETTIMEOFDAY
 #define	SYS_AUE_fchown	AUE_FCHOWN
 #define	SYS_AUE_fchmod	AUE_FCHMOD
 #define	SYS_AUE_setreuid	AUE_SETREUID
 #define	SYS_AUE_setregid	AUE_SETREGID
 #define	SYS_AUE_rename	AUE_RENAME
 #define	SYS_AUE_flock	AUE_FLOCK
 #define	SYS_AUE_mkfifo	AUE_MKFIFO
 #define	SYS_AUE_sendto	AUE_SENDTO
 #define	SYS_AUE_shutdown	AUE_SHUTDOWN
 #define	SYS_AUE_socketpair	AUE_SOCKETPAIR
 #define	SYS_AUE_mkdir	AUE_MKDIR
 #define	SYS_AUE_rmdir	AUE_RMDIR
 #define	SYS_AUE_utimes	AUE_UTIMES
 #define	SYS_AUE_adjtime	AUE_ADJTIME
 #define	SYS_AUE_setsid	AUE_SETSID
 #define	SYS_AUE_quotactl	AUE_QUOTACTL
 #define	SYS_AUE_nlm_syscall	AUE_NULL
 #define	SYS_AUE_nfssvc	AUE_NFS_SVC
 #define	SYS_AUE_lgetfh	AUE_LGETFH
 #define	SYS_AUE_getfh	AUE_NFS_GETFH
 #define	SYS_AUE_getdomainname	AUE_SYSCTL
 #define	SYS_AUE_setdomainname	AUE_SYSCTL
 #define	SYS_AUE_uname	AUE_NULL
 #define	SYS_AUE_sysarch	AUE_SYSARCH
 #define	SYS_AUE_rtprio	AUE_RTPRIO
 #define	SYS_AUE_semsys	AUE_SEMSYS
 #define	SYS_AUE_msgsys	AUE_MSGSYS
 #define	SYS_AUE_shmsys	AUE_SHMSYS
 #define	SYS_AUE_freebsd6_pread	AUE_PREAD
 #define	SYS_AUE_freebsd6_pwrite	AUE_PWRITE
+#define	SYS_AUE_setfib	AUE_NULL
 #define	SYS_AUE_ntp_adjtime	AUE_NTP_ADJTIME
 #define	SYS_AUE_setgid	AUE_SETGID
 #define	SYS_AUE_setegid	AUE_SETEGID
 #define	SYS_AUE_seteuid	AUE_SETEUID
 #define	SYS_AUE_stat	AUE_STAT
 #define	SYS_AUE_fstat	AUE_FSTAT
 #define	SYS_AUE_lstat	AUE_LSTAT
 #define	SYS_AUE_pathconf	AUE_PATHCONF
 #define	SYS_AUE_fpathconf	AUE_FPATHCONF
 #define	SYS_AUE_getrlimit	AUE_GETRLIMIT
 #define	SYS_AUE_setrlimit	AUE_SETRLIMIT
 #define	SYS_AUE_getdirentries	AUE_GETDIRENTRIES
 #define	SYS_AUE_freebsd6_mmap	AUE_MMAP
 #define	SYS_AUE_freebsd6_lseek	AUE_LSEEK
 #define	SYS_AUE_freebsd6_truncate	AUE_TRUNCATE
 #define	SYS_AUE_freebsd6_ftruncate	AUE_FTRUNCATE
 #define	SYS_AUE___sysctl	AUE_SYSCTL
 #define	SYS_AUE_mlock	AUE_MLOCK
 #define	SYS_AUE_munlock	AUE_MUNLOCK
 #define	SYS_AUE_undelete	AUE_UNDELETE
 #define	SYS_AUE_futimes	AUE_FUTIMES
 #define	SYS_AUE_getpgid	AUE_GETPGID
 #define	SYS_AUE_poll	AUE_POLL
 #define	SYS_AUE_lkmnosys	AUE_NULL
 #define	SYS_AUE___semctl	AUE_SEMCTL
 #define	SYS_AUE_semget	AUE_SEMGET
 #define	SYS_AUE_semop	AUE_SEMOP
 #define	SYS_AUE_msgctl	AUE_MSGCTL
 #define	SYS_AUE_msgget	AUE_MSGGET
 #define	SYS_AUE_msgsnd	AUE_MSGSND
 #define	SYS_AUE_msgrcv	AUE_MSGRCV
 #define	SYS_AUE_shmat	AUE_SHMAT
 #define	SYS_AUE_shmctl	AUE_SHMCTL
 #define	SYS_AUE_shmdt	AUE_SHMDT
 #define	SYS_AUE_shmget	AUE_SHMGET
 #define	SYS_AUE_clock_gettime	AUE_NULL
 #define	SYS_AUE_clock_settime	AUE_CLOCK_SETTIME
 #define	SYS_AUE_clock_getres	AUE_NULL
 #define	SYS_AUE_ktimer_create	AUE_NULL
 #define	SYS_AUE_ktimer_delete	AUE_NULL
 #define	SYS_AUE_ktimer_settime	AUE_NULL
 #define	SYS_AUE_ktimer_gettime	AUE_NULL
 #define	SYS_AUE_ktimer_getoverrun	AUE_NULL
 #define	SYS_AUE_nanosleep	AUE_NULL
 #define	SYS_AUE_ntp_gettime	AUE_NULL
 #define	SYS_AUE_minherit	AUE_MINHERIT
 #define	SYS_AUE_rfork	AUE_RFORK
 #define	SYS_AUE_openbsd_poll	AUE_POLL
 #define	SYS_AUE_issetugid	AUE_ISSETUGID
 #define	SYS_AUE_lchown	AUE_LCHOWN
 #define	SYS_AUE_aio_read	AUE_NULL
 #define	SYS_AUE_aio_write	AUE_NULL
 #define	SYS_AUE_lio_listio	AUE_NULL
 #define	SYS_AUE_getdents	AUE_O_GETDENTS
 #define	SYS_AUE_lchmod	AUE_LCHMOD
 #define	SYS_AUE_lutimes	AUE_LUTIMES
 #define	SYS_AUE_nstat	AUE_STAT
 #define	SYS_AUE_nfstat	AUE_FSTAT
 #define	SYS_AUE_nlstat	AUE_LSTAT
 #define	SYS_AUE_preadv	AUE_PREADV
 #define	SYS_AUE_pwritev	AUE_PWRITEV
 #define	SYS_AUE_fhopen	AUE_FHOPEN
 #define	SYS_AUE_fhstat	AUE_FHSTAT
 #define	SYS_AUE_modnext	AUE_NULL
 #define	SYS_AUE_modstat	AUE_NULL
 #define	SYS_AUE_modfnext	AUE_NULL
 #define	SYS_AUE_modfind	AUE_NULL
 #define	SYS_AUE_kldload	AUE_MODLOAD
 #define	SYS_AUE_kldunload	AUE_MODUNLOAD
 #define	SYS_AUE_kldfind	AUE_NULL
 #define	SYS_AUE_kldnext	AUE_NULL
 #define	SYS_AUE_kldstat	AUE_NULL
 #define	SYS_AUE_kldfirstmod	AUE_NULL
 #define	SYS_AUE_getsid	AUE_GETSID
 #define	SYS_AUE_setresuid	AUE_SETRESUID
 #define	SYS_AUE_setresgid	AUE_SETRESGID
 #define	SYS_AUE_aio_return	AUE_NULL
 #define	SYS_AUE_aio_suspend	AUE_NULL
 #define	SYS_AUE_aio_cancel	AUE_NULL
 #define	SYS_AUE_aio_error	AUE_NULL
 #define	SYS_AUE_oaio_read	AUE_NULL
 #define	SYS_AUE_oaio_write	AUE_NULL
 #define	SYS_AUE_olio_listio	AUE_NULL
 #define	SYS_AUE_yield	AUE_NULL
 #define	SYS_AUE_mlockall	AUE_MLOCKALL
 #define	SYS_AUE_munlockall	AUE_MUNLOCKALL
 #define	SYS_AUE___getcwd	AUE_GETCWD
 #define	SYS_AUE_sched_setparam	AUE_NULL
 #define	SYS_AUE_sched_getparam	AUE_NULL
 #define	SYS_AUE_sched_setscheduler	AUE_NULL
 #define	SYS_AUE_sched_getscheduler	AUE_NULL
 #define	SYS_AUE_sched_yield	AUE_NULL
 #define	SYS_AUE_sched_get_priority_max	AUE_NULL
 #define	SYS_AUE_sched_get_priority_min	AUE_NULL
 #define	SYS_AUE_sched_rr_get_interval	AUE_NULL
 #define	SYS_AUE_utrace	AUE_NULL
 #define	SYS_AUE_kldsym	AUE_NULL
 #define	SYS_AUE_jail	AUE_JAIL
 #define	SYS_AUE_sigprocmask	AUE_SIGPROCMASK
 #define	SYS_AUE_sigsuspend	AUE_SIGSUSPEND
 #define	SYS_AUE_sigpending	AUE_SIGPENDING
 #define	SYS_AUE_sigtimedwait	AUE_SIGWAIT
 #define	SYS_AUE_sigwaitinfo	AUE_NULL
 #define	SYS_AUE___acl_get_file	AUE_NULL
 #define	SYS_AUE___acl_set_file	AUE_NULL
 #define	SYS_AUE___acl_get_fd	AUE_NULL
 #define	SYS_AUE___acl_set_fd	AUE_NULL
 #define	SYS_AUE___acl_delete_file	AUE_NULL
 #define	SYS_AUE___acl_delete_fd	AUE_NULL
 #define	SYS_AUE___acl_aclcheck_file	AUE_NULL
 #define	SYS_AUE___acl_aclcheck_fd	AUE_NULL
 #define	SYS_AUE_extattrctl	AUE_EXTATTRCTL
 #define	SYS_AUE_extattr_set_file	AUE_EXTATTR_SET_FILE
 #define	SYS_AUE_extattr_get_file	AUE_EXTATTR_GET_FILE
 #define	SYS_AUE_extattr_delete_file	AUE_EXTATTR_DELETE_FILE
 #define	SYS_AUE_aio_waitcomplete	AUE_NULL
 #define	SYS_AUE_getresuid	AUE_GETRESUID
 #define	SYS_AUE_getresgid	AUE_GETRESGID
 #define	SYS_AUE_kqueue	AUE_KQUEUE
 #define	SYS_AUE_kevent	AUE_NULL
 #define	SYS_AUE_lkmressys	AUE_NULL
 #define	SYS_AUE_extattr_set_fd	AUE_EXTATTR_SET_FD
 #define	SYS_AUE_extattr_get_fd	AUE_EXTATTR_GET_FD
 #define	SYS_AUE_extattr_delete_fd	AUE_EXTATTR_DELETE_FD
 #define	SYS_AUE___setugid	AUE_NULL
 #define	SYS_AUE_nfsclnt	AUE_NULL
 #define	SYS_AUE_eaccess	AUE_EACCESS
 #define	SYS_AUE_nmount	AUE_NMOUNT
 #define	SYS_AUE___mac_get_proc	AUE_NULL
 #define	SYS_AUE___mac_set_proc	AUE_NULL
 #define	SYS_AUE___mac_get_fd	AUE_NULL
 #define	SYS_AUE___mac_get_file	AUE_NULL
 #define	SYS_AUE___mac_set_fd	AUE_NULL
 #define	SYS_AUE___mac_set_file	AUE_NULL
 #define	SYS_AUE_kenv	AUE_NULL
 #define	SYS_AUE_lchflags	AUE_LCHFLAGS
 #define	SYS_AUE_uuidgen	AUE_NULL
 #define	SYS_AUE_sendfile	AUE_SENDFILE
 #define	SYS_AUE_mac_syscall	AUE_NULL
 #define	SYS_AUE_getfsstat	AUE_GETFSSTAT
 #define	SYS_AUE_statfs	AUE_STATFS
 #define	SYS_AUE_fstatfs	AUE_FSTATFS
 #define	SYS_AUE_fhstatfs	AUE_FHSTATFS
 #define	SYS_AUE_ksem_close	AUE_NULL
 #define	SYS_AUE_ksem_post	AUE_NULL
 #define	SYS_AUE_ksem_wait	AUE_NULL
 #define	SYS_AUE_ksem_trywait	AUE_NULL
 #define	SYS_AUE_ksem_init	AUE_NULL
 #define	SYS_AUE_ksem_open	AUE_NULL
 #define	SYS_AUE_ksem_unlink	AUE_NULL
 #define	SYS_AUE_ksem_getvalue	AUE_NULL
 #define	SYS_AUE_ksem_destroy	AUE_NULL
 #define	SYS_AUE___mac_get_pid	AUE_NULL
 #define	SYS_AUE___mac_get_link	AUE_NULL
 #define	SYS_AUE___mac_set_link	AUE_NULL
 #define	SYS_AUE_extattr_set_link	AUE_EXTATTR_SET_LINK
 #define	SYS_AUE_extattr_get_link	AUE_EXTATTR_GET_LINK
 #define	SYS_AUE_extattr_delete_link	AUE_EXTATTR_DELETE_LINK
 #define	SYS_AUE___mac_execve	AUE_NULL
 #define	SYS_AUE_sigaction	AUE_SIGACTION
 #define	SYS_AUE_sigreturn	AUE_SIGRETURN
 #define	SYS_AUE_getcontext	AUE_NULL
 #define	SYS_AUE_setcontext	AUE_NULL
 #define	SYS_AUE_swapcontext	AUE_NULL
 #define	SYS_AUE_swapoff	AUE_SWAPOFF
 #define	SYS_AUE___acl_get_link	AUE_NULL
 #define	SYS_AUE___acl_set_link	AUE_NULL
 #define	SYS_AUE___acl_delete_link	AUE_NULL
 #define	SYS_AUE___acl_aclcheck_link	AUE_NULL
 #define	SYS_AUE_sigwait	AUE_SIGWAIT
 #define	SYS_AUE_thr_create	AUE_NULL
 #define	SYS_AUE_thr_exit	AUE_NULL
 #define	SYS_AUE_thr_self	AUE_NULL
 #define	SYS_AUE_thr_kill	AUE_NULL
 #define	SYS_AUE__umtx_lock	AUE_NULL
 #define	SYS_AUE__umtx_unlock	AUE_NULL
 #define	SYS_AUE_jail_attach	AUE_NULL
 #define	SYS_AUE_extattr_list_fd	AUE_EXTATTR_LIST_FD
 #define	SYS_AUE_extattr_list_file	AUE_EXTATTR_LIST_FILE
 #define	SYS_AUE_extattr_list_link	AUE_EXTATTR_LIST_LINK
 #define	SYS_AUE_ksem_timedwait	AUE_NULL
 #define	SYS_AUE_thr_suspend	AUE_NULL
 #define	SYS_AUE_thr_wake	AUE_NULL
 #define	SYS_AUE_kldunloadf	AUE_MODUNLOAD
 #define	SYS_AUE_audit	AUE_AUDIT
 #define	SYS_AUE_auditon	AUE_AUDITON
 #define	SYS_AUE_getauid	AUE_GETAUID
 #define	SYS_AUE_setauid	AUE_SETAUID
 #define	SYS_AUE_getaudit	AUE_GETAUDIT
 #define	SYS_AUE_setaudit	AUE_SETAUDIT
 #define	SYS_AUE_getaudit_addr	AUE_GETAUDIT_ADDR
 #define	SYS_AUE_setaudit_addr	AUE_SETAUDIT_ADDR
 #define	SYS_AUE_auditctl	AUE_AUDITCTL
 #define	SYS_AUE__umtx_op	AUE_NULL
 #define	SYS_AUE_thr_new	AUE_NULL
 #define	SYS_AUE_sigqueue	AUE_NULL
 #define	SYS_AUE_kmq_open	AUE_NULL
 #define	SYS_AUE_kmq_setattr	AUE_NULL
 #define	SYS_AUE_kmq_timedreceive	AUE_NULL
 #define	SYS_AUE_kmq_timedsend	AUE_NULL
 #define	SYS_AUE_kmq_notify	AUE_NULL
 #define	SYS_AUE_kmq_unlink	AUE_NULL
 #define	SYS_AUE_abort2	AUE_NULL
 #define	SYS_AUE_thr_set_name	AUE_NULL
 #define	SYS_AUE_aio_fsync	AUE_NULL
 #define	SYS_AUE_rtprio_thread	AUE_RTPRIO
 #define	SYS_AUE_sctp_peeloff	AUE_NULL
 #define	SYS_AUE_sctp_generic_sendmsg	AUE_NULL
 #define	SYS_AUE_sctp_generic_sendmsg_iov	AUE_NULL
 #define	SYS_AUE_sctp_generic_recvmsg	AUE_NULL
 #define	SYS_AUE_pread	AUE_PREAD
 #define	SYS_AUE_pwrite	AUE_PWRITE
 #define	SYS_AUE_mmap	AUE_MMAP
 #define	SYS_AUE_lseek	AUE_LSEEK
 #define	SYS_AUE_truncate	AUE_TRUNCATE
 #define	SYS_AUE_ftruncate	AUE_FTRUNCATE
 #define	SYS_AUE_thr_kill2	AUE_KILL
 #define	SYS_AUE_shm_open	AUE_SHMOPEN
 #define	SYS_AUE_shm_unlink	AUE_SHMUNLINK
 #define	SYS_AUE_cpuset	AUE_NULL
 #define	SYS_AUE_cpuset_setid	AUE_NULL
 #define	SYS_AUE_cpuset_getid	AUE_NULL
 #define	SYS_AUE_cpuset_getaffinity	AUE_NULL
 #define	SYS_AUE_cpuset_setaffinity	AUE_NULL
 #define	SYS_AUE_faccessat	AUE_FACCESSAT
 #define	SYS_AUE_fchmodat	AUE_FCHMODAT
 #define	SYS_AUE_fchownat	AUE_FCHOWNAT
 #define	SYS_AUE_fexecve	AUE_FEXECVE
 #define	SYS_AUE_fstatat	AUE_FSTATAT
 #define	SYS_AUE_futimesat	AUE_FUTIMESAT
 #define	SYS_AUE_linkat	AUE_LINKAT
 #define	SYS_AUE_mkdirat	AUE_MKDIRAT
 #define	SYS_AUE_mkfifoat	AUE_MKFIFOAT
 #define	SYS_AUE_mknodat	AUE_MKNODAT
 #define	SYS_AUE_openat	AUE_OPENAT_RWTC
 #define	SYS_AUE_readlinkat	AUE_READLINKAT
 #define	SYS_AUE_renameat	AUE_RENAMEAT
 #define	SYS_AUE_symlinkat	AUE_SYMLINKAT
 #define	SYS_AUE_unlinkat	AUE_UNLINKAT
 
 #undef PAD_
 #undef PADL_
 #undef PADR_
 
 #endif /* !_SYS_SYSPROTO_H_ */
Index: head/usr.sbin/setfib/Makefile
===================================================================
--- head/usr.sbin/setfib/Makefile	(nonexistent)
+++ head/usr.sbin/setfib/Makefile	(revision 178888)
@@ -0,0 +1,6 @@
+#	@(#)Makefile	8.1 (Berkeley) 6/6/93
+# $FreeBSD$
+
+PROG=	setfib
+
+.include <bsd.prog.mk>

Property changes on: head/usr.sbin/setfib/Makefile
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/usr.sbin/setfib/setfib.1
===================================================================
--- head/usr.sbin/setfib/setfib.1	(nonexistent)
+++ head/usr.sbin/setfib/setfib.1	(revision 178888)
@@ -0,0 +1,92 @@
+.\" Copyright (c) 2008 Cisco systems
+.\"	Author Julian Elischer.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd April 9, 2008
+.Dt SETFIB 1
+.Os
+.Sh NAME
+.Nm setfib
+.Nd execute a utility with an altered default network view.
+.Sh SYNOPSIS
+.Nm
+.Op Fl F 
+.Ar fib
+.Ar utility
+.Op Ar argument ...
+.Sh DESCRIPTION
+The
+.Nm
+utility runs
+.Ar utility
+with an different routing table. The table number
+.Dq fib
+will be used by default for all sockets started by this
+process or descendents.
+.Sh ENVIRONMENT
+The
+.Ev PATH
+environment variable is used to locate the requested
+.Ar utility
+if the name contains no
+.Ql /
+characters.
+.Sh EXIT STATUS
+If
+.Ar utility
+is invoked, the exit status of
+.Nm
+is the exit status of
+.Ar utility .
+.Pp
+An exit status of 126 indicates
+.Ar utility
+was found, but could not be executed.
+An exit status of 127 indicates
+.Ar utility
+could not be found.
+.Sh EXAMPLES
+Execute utility
+.Sq netstat
+to view the second routing table.
+.Pp
+.Dl "setfib -F 1 netstat -rn"
+or
+.Dl "setfib 1 netstat -rn"
+or
+.Dl "setfib -1 netstat -rn"
+.Sh SEE ALSO
+.Xr setfib 2 ,
+.Xr setsockopt 2
+.Sh STANDARDS
+The
+.Nm
+utility is a FreeBSD specific extension, however many UNIX like systems
+have an equivalent function.
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 8.0 .

Property changes on: head/usr.sbin/setfib/setfib.1
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/usr.sbin/setfib/setfib.c
===================================================================
--- head/usr.sbin/setfib/setfib.c	(nonexistent)
+++ head/usr.sbin/setfib/setfib.c	(revision 178888)
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2008 Cisco Systems, All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * setfib file skelaton taken from nice.c
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/sysctl.h>
+
+void usage(void);
+
+int
+main(int argc, char *argv[])
+{
+	long fib = 0;
+	int ch;
+	char *ep;
+	int	numfibs;
+	int intsize = sizeof(int);
+
+        if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1)
+		errx(1, "Multiple FIBS not supported");
+	if (argc < 2)
+		usage();
+	ep = argv[1];
+	/*
+	 * convert -N or N to -FN. (N is a number)
+	 */
+	if (ep[0]== '-' && isdigit((unsigned char)ep[1]))
+		ep++;
+	if (isdigit((unsigned char)*ep))
+               if (asprintf(&argv[1], "-F%s", ep) < 0)
+                        err(1, "asprintf");
+
+	while ((ch = getopt(argc, argv, "F:")) != -1) {
+		switch (ch) {
+		case 'F':
+			errno = 0;
+			fib = strtol(optarg, &ep, 10);
+			if (ep == optarg || *ep != '\0' || errno ||
+			    fib < 0 || fib >= numfibs)
+				errx(1, "%s: invalid FIB (max %s)",
+				    optarg, numfibs - 1);
+			break;
+		default:
+			usage();
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc == 0)
+		usage();
+
+	errno = 0;
+	if (syscall(175, (int)fib))
+		warn("setfib");
+	execvp(*argv, argv);
+	err(errno == ENOENT ? 127 : 126, "%s", *argv);
+}
+
+void
+usage(void)
+{
+
+	(void)fprintf(stderr,
+	    "usage: setfib [-[F]]value command");
+	exit(1);
+}

Property changes on: head/usr.sbin/setfib/setfib.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property