Index: head/sbin/ipfw/ipfw.8
===================================================================
--- head/sbin/ipfw/ipfw.8	(revision 123095)
+++ head/sbin/ipfw/ipfw.8	(revision 123096)
@@ -1,2201 +1,2208 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 13, 2002
+.Dd December 1, 2003
 .Dt IPFW 8
 .Os
 .Sh NAME
 .Nm ipfw
 .Nd IP firewall and traffic shaper control program
 .Sh SYNOPSIS
 .Nm
 .Op Fl cq
 .Cm add
 .Ar rule
 .Nm
 .Op Fl acdefnNStT
 .Brq Cm list | show
 .Op Ar rule | first-last ...
 .Nm
 .Op Fl f | q
 .Cm flush
 .Nm
 .Op Fl q
 .Brq Cm delete | zero | resetlog
 .Op Cm set
 .Op Ar number ...
 .Nm
 .Cm enable
 .Brq Cm firewall | one_pass | debug | verbose | dyn_keepalive
 .Nm
 .Cm disable
 .Brq Cm firewall | one_pass | debug | verbose | dyn_keepalive
 .Pp
 .Nm
 .Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ...
 .Nm
 .Cm set move
 .Op Cm rule
 .Ar number Cm to Ar number
 .Nm
 .Cm set swap Ar number number
 .Nm
 .Cm set show
 .Pp
 .Nm
 .Brq Cm pipe | queue
 .Ar number
 .Cm config
 .Ar config-options
 .Nm
 .Op Fl s Op Ar field
 .Brq Cm pipe | queue
 .Brq Cm delete | list | show
 .Op Ar number ...
 .Pp
 .Nm
 .Op Fl cnNqS
 .Oo
 .Fl p Ar preproc
 .Oo
 .Ar preproc-flags
 .Oc
 .Oc
 .Ar pathname
 .Sh DESCRIPTION
 The
 .Nm
 utility is the user interface for controlling the
 .Xr ipfw 4
 firewall and the
 .Xr dummynet 4
 traffic shaper in
 .Fx .
 .Pp
 .Bd -ragged -offset XXXX
 .Em NOTE:
 this manual page documents the newer version of
 .Nm
 introduced in
 .Fx
 CURRENT in July 2002, also known as
 .Nm ipfw2 .
 .Nm ipfw2
 is a superset of the old firewall,
 .Nm ipfw1 .
 The differences between the two are listed in Section
 .Sx IPFW2 ENHANCEMENTS ,
 which you are encouraged to read to revise older rulesets and possibly
 write them more efficiently.
 See Section
 .Sx USING IPFW2 IN FreeBSD-STABLE
 for instructions on how to run
 .Nm ipfw2
 on
 .Fx
 STABLE.
 .Ed
 .Pp
 An
 .Nm
 configuration, or
 .Em ruleset ,
 is made of a list of
 .Em rules
 numbered from 1 to 65535.
 Packets are passed to
 .Nm
 from a number of different places in the protocol stack
 (depending on the source and destination of the packet,
 it is possible that
 .Nm
 is invoked multiple times on the same packet).
 The packet passed to the firewall is compared
 against each of the rules in the firewall
 .Em ruleset .
 When a match is found, the action corresponding to the
 matching rule is performed.
 .Pp
 Depending on the action and certain system settings, packets
 can be reinjected into the firewall at some rule after the
 matching one for further processing.
 .Pp
 An
 .Nm
 ruleset always includes a
 .Em default
 rule (numbered 65535) which cannot be modified or deleted,
 and matches all packets.
 The action associated with the
 .Em default
 rule can be either
 .Cm deny
 or
 .Cm allow
 depending on how the kernel is configured.
 .Pp
 If the ruleset includes one or more rules with the
 .Cm keep-state
 or
 .Cm limit
 option, then
 .Nm
 assumes a
 .Em stateful
 behaviour, i.e. upon a match it will create dynamic rules matching
 the exact parameters (addresses and ports) of the matching packet.
 .Pp
 These dynamic rules, which have a limited lifetime, are checked
 at the first occurrence of a
 .Cm check-state ,
 .Cm keep-state
 or
 .Cm limit
 rule, and are typically used to open the firewall on-demand to
 legitimate traffic only.
 See the
 .Sx STATEFUL FIREWALL
 and
 .Sx EXAMPLES
 Sections below for more information on the stateful behaviour of
 .Nm .
 .Pp
 All rules (including dynamic ones) have a few associated counters:
 a packet count, a byte count, a log count and a timestamp
 indicating the time of the last match.
 Counters can be displayed or reset with
 .Nm
 commands.
 .Pp
 Rules can be added with the
 .Cm add
 command; deleted individually or in groups with the
 .Cm delete
 command, and globally (except those in set 31) with the
 .Cm flush
 command; displayed, optionally with the content of the
 counters, using the
 .Cm show
 and
 .Cm list
 commands.
 Finally, counters can be reset with the
 .Cm zero
 and
 .Cm resetlog
 commands.
 .Pp
 Also, each rule belongs to one of 32 different
 .Em sets
 , and there are
 .Nm
 commands to atomically manipulate sets, such as enable,
 disable, swap sets, move all rules in a set to another
 one, delete all rules in a set. These can be useful to
 install temporary configurations, or to test them.
 See Section
 .Sx SETS OF RULES
 for more information on
 .Em sets .
 .Pp
 The following options are available:
 .Bl -tag -width indent
 .It Fl a
 While listing, show counter values.
 The
 .Cm show
 command just implies this option.
 .It Fl c
 When entering or showing rules, print them in compact form,
 i.e. without the optional "ip from any to any" string
 when this does not carry any additional information.
 .It Fl d
 While listing, show dynamic rules in addition to static ones.
 .It Fl e
 While listing, if the
 .Fl d
 option was specified, also show expired dynamic rules.
 .It Fl f
 Don't ask for confirmation for commands that can cause problems
 if misused,
 .No i.e. Cm flush .
 If there is no tty associated with the process, this is implied.
 .It Fl n
 Only check syntax of the command strings, without actually passing
 them to the kernel.
 .It Fl N
 Try to resolve addresses and service names in output.
 .It Fl q
 While
 .Cm add Ns ing ,
 .Cm zero Ns ing ,
 .Cm resetlog Ns ging
 or
 .Cm flush Ns ing ,
 be quiet about actions
 (implies
 .Fl f ) .
 This is useful for adjusting rules by executing multiple
 .Nm
 commands in a script
 (e.g.,
 .Ql sh\ /etc/rc.firewall ) ,
 or by processing a file of many
 .Nm
 rules across a remote login session.
 If a
 .Cm flush
 is performed in normal (verbose) mode (with the default kernel
 configuration), it prints a message.
 Because all rules are flushed, the message might not be delivered
 to the login session, causing the remote login session to be closed
 and the remainder of the ruleset to not be processed.
 Access to the console would then be required to recover.
 .It Fl S
 While listing rules, show the
 .Em set
 each rule belongs to.
 If this flag is not specified, disabled rules will not be
 listed.
 .It Fl s Op Ar field
 While listing pipes, sort according to one of the four
 counters (total or current packets or bytes).
 .It Fl t
 While listing, show last match timestamp (converted with ctime()).
 .It Fl T
 While listing, show last match timestamp (as seconds from the epoch).
 This form can be more convenient for postprocessing by scripts.
 .El
 .Pp
 To ease configuration, rules can be put into a file which is
 processed using
 .Nm
 as shown in the last synopsis line.
 An absolute
 .Ar pathname
 must be used.
 The file will be read line by line and applied as arguments to the
 .Nm
 utility.
 .Pp
 Optionally, a preprocessor can be specified using
 .Fl p Ar preproc
 where
 .Ar pathname
 is to be piped through.
 Useful preprocessors include
 .Xr cpp 1
 and
 .Xr m4 1 .
 If
 .Ar preproc
 doesn't start with a slash
 .Pq Ql /
 as its first character, the usual
 .Ev PATH
 name search is performed.
 Care should be taken with this in environments where not all
 file systems are mounted (yet) by the time
 .Nm
 is being run (e.g. when they are mounted over NFS).
 Once
 .Fl p
 has been specified, any additional arguments as passed on to the preprocessor
 for interpretation.
 This allows for flexible configuration files (like conditionalizing
 them on the local hostname) and the use of macros to centralize
 frequently required arguments like IP addresses.
 .Pp
 The
 .Nm
 .Cm pipe
 and
 .Cm queue
 commands are used to configure the traffic shaper, as shown in the
 .Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 Section below.
 .Pp
 If the world and the kernel get out of sync the
 .Nm
 ABI may break, preventing you from being able to add any rules.  This can
 adversely effect the booting process.  You can use
 .Nm
 .Cm disable
 .Cm firewall
 to temporarily disable the firewall to regain access to the network,
 allowing you to fix the problem.
 .Sh PACKET FLOW
 A packet is checked against the active ruleset in multiple places
 in the protocol stack, under control of several sysctl variables.
 These places and variables are shown below, and it is important to
 have this picture in mind in order to design a correct ruleset.
 .Bd -literal -offset indent
       ^	    to upper layers   V
       |                       |
       +----------->-----------+
       ^                       V
  [ip_input]              [ip_output]   net.inet.ip.fw.enable=1
       |                       |
       ^                       V
 [ether_demux]    [ether_output_frame]  net.link.ether.ipfw=1
       |                       |
       +-->--[bdg_forward]-->--+        net.link.ether.bridge_ipfw=1
       ^                       V
       |      to devices       |
 .Ed
 .Pp
 As can be noted from the above picture, the number of
 times the same packet goes through the firewall can
 vary between 0 and 4 depending on packet source and
 destination, and system configuration.
 .Pp
 Note that as packets flow through the stack, headers can be
 stripped or added to it, and so they may or may not be available
 for inspection.
 E.g., incoming packets will include the MAC header when
 .Nm
 is invoked from
 .Cm ether_demux() ,
 but the same packets will have the MAC header stripped off when
 .Nm
 is invoked from
 .Cm ip_input() .
 .Pp
 Also note that each packet is always checked against the complete ruleset,
 irrespective of the place where the check occurs, or the source of the packet.
 If a rule contains some match patterns or actions which are not valid
 for the place of invocation (e.g. trying to match a MAC header within
 .Cm ip_input()
 ), the match pattern will not match, but a
 .Cm not
 operator in front of such patterns
 .Em will
 cause the pattern to
 .Em always
 match on those packets.
 It is thus the responsibility of
 the programmer, if necessary, to write a suitable ruleset to
 differentiate among the possible places.
 .Cm skipto
 rules can be useful here, as an example:
 .Bd -literal -offset indent
 # packets from ether_demux or bdg_forward
 ipfw add 10 skipto 1000 all from any to any layer2 in
 # packets from ip_input
 ipfw add 10 skipto 2000 all from any to any not layer2 in
 # packets from ip_output
 ipfw add 10 skipto 3000 all from any to any not layer2 out
 # packets from ether_output_frame
 ipfw add 10 skipto 4000 all from any to any layer2 out
 .Ed
 .Pp
 (yes, at the moment there is no way to differentiate between
 ether_demux and bdg_forward).
 .Sh SYNTAX
 In general, each keyword or argument must be provided as
 a separate command line argument, with no leading or trailing
 spaces. Keywords are case-sensitive, whereas arguments may
 or may not be case-sensitive depending on their nature
 (e.g. uid's are, hostnames are not).
 .Pp
 In
 .Nm ipfw2
 you can introduce spaces after commas ',' to make
 the line more readable. You can also put the entire
 command (including flags) into a single argument.
 E.g. the following forms are equivalent:
 .Bd -literal -offset indent
 ipfw -q add deny src-ip 10.0.0.0/24,127.0.0.1/8
 ipfw -q add deny src-ip 10.0.0.0/24, 127.0.0.1/8
 ipfw "-q add deny src-ip 10.0.0.0/24, 127.0.0.1/8"
 .Ed
 .Sh RULE FORMAT
 The format of
 .Nm
 rules is the following:
 .Bd -ragged -offset indent
 .Op Ar rule_number
 .Op Cm set Ar set_number
 .Op Cm prob Ar match_probability
 .br
 .Ar "   " action
 .Op Cm log Op Cm logamount Ar number
 .Ar body
 .Ed
 .Pp
 where the body of the rule specifies which information is used
 for filtering packets, among the following:
 .Pp
 .Bl -tag -width "Source and dest. addresses and ports" -offset XXX -compact
 .It Layer-2 header fields
 When available
 .It IPv4 Protocol
 TCP, UDP, ICMP, etc.
 .It Source and dest. addresses and ports
 .It Direction
 See Section
 .Sx PACKET FLOW
 .It Transmit and receive interface
 By name or address
 .It Misc. IP header fields
 Version, type of service, datagram length, identification,
 fragment flag (non-zero IP offset),
 Time To Live
 .It IP options
 .It Misc. TCP header fields
 TCP flags (SYN, FIN, ACK, RST, etc.),
 sequence number, acknowledgment number,
 window
 .It TCP options
 .It ICMP types
 for ICMP packets
 .It User/group ID
 When the packet can be associated with a local socket.
 .El
 .Pp
 Note that some of the above information, e.g. source MAC or IP addresses and
 TCP/UDP ports, could easily be spoofed, so filtering on those fields
 alone might not guarantee the desired results.
 .Bl -tag -width indent
 .It Ar rule_number
 Each rule is associated with a
 .Ar rule_number
 in the range 1..65535, with the latter reserved for the
 .Em default
 rule.
 Rules are checked sequentially by rule number.
 Multiple rules can have the same number, in which case they are
 checked (and listed) according to the order in which they have
 been added.
 If a rule is entered without specifying a number, the kernel will
 assign one in such a way that the rule becomes the last one
 before the
 .Em default
 rule.
 Automatic rule numbers are assigned by incrementing the last
 non-default rule number by the value of the sysctl variable
 .Ar net.inet.ip.fw.autoinc_step
 which defaults to 100.
 If this is not possible (e.g. because we would go beyond the
 maximum allowed rule number), the number of the last
 non-default value is used instead.
 .It Cm set Ar set_number
 Each rule is associated with a
 .Ar set_number
 in the range 0..31.
 Sets can be individually disabled and enabled, so this parameter
 is of fundamental importance for atomic ruleset manipulation.
 It can be also used to simplify deletion of groups of rules.
 If a rule is entered without specifying a set number,
 set 0 will be used.
 .br
 Set 31 is special in that it cannot be disabled,
 and rules in set 31 are not deleted by the
 .Nm ipfw flush
 command (but you can delete them with the
 .Nm ipfw delete set 31
 command).
 Set 31 is also used for the
 .Em default
 rule.
 .It Cm prob Ar match_probability
 A match is only declared with the specified probability
 (floating point number between 0 and 1).
 This can be useful for a number of applications such as
 random packet drop or
 (in conjunction with
 .Xr dummynet 4 )
 to simulate the effect of multiple paths leading to out-of-order
 packet delivery.
 .Pp
 Note: this condition is checked before any other condition, including
 ones such as keep-state or check-state which might have side effects.
 .It Cm log Op Cm logamount Ar number
 When a packet matches a rule with the
 .Cm log
 keyword, a message will be
 logged to
 .Xr syslogd 8
 with a
 .Dv LOG_SECURITY
 facility.
 The logging only occurs if the sysctl variable
 .Em net.inet.ip.fw.verbose
 is set to 1
 (which is the default when the kernel is compiled with
 .Dv IPFIREWALL_VERBOSE
 ) and the number of packets logged so far for that
 particular rule does not exceed the
 .Cm logamount
 parameter.
 If no
 .Cm logamount
 is specified, the limit is taken from the sysctl variable
 .Em net.inet.ip.fw.verbose_limit .
 In both cases, a value of 0 removes the logging limit.
 .Pp
 Once the limit is reached, logging can be re-enabled by
 clearing the logging counter or the packet counter for that entry, see the
 .Cm resetlog
 command.
 .Pp
 Note: logging is done after all other packet matching conditions
 have been successfully verified, and before performing the final
 action (accept, deny, etc.) on the packet.
 .El
 .Ss RULE ACTIONS
 A rule can be associated with one of the following actions, which
 will be executed when the packet matches the body of the rule.
 .Bl -tag -width indent
 .It Cm allow | accept | pass | permit
 Allow packets that match rule.
 The search terminates.
 .It Cm check-state
 Checks the packet against the dynamic ruleset.
 If a match is found, execute the action associated with
 the rule which generated this dynamic rule, otherwise
 move to the next rule.
 .br
 .Cm Check-state
 rules do not have a body.
 If no
 .Cm check-state
 rule is found, the dynamic ruleset is checked at the first
 .Cm keep-state
 or
 .Cm limit
 rule.
 .It Cm count
 Update counters for all packets that match rule.
 The search continues with the next rule.
 .It Cm deny | drop
 Discard packets that match this rule.
 The search terminates.
 .It Cm divert Ar port
 Divert packets that match this rule to the
 .Xr divert 4
 socket bound to port
 .Ar port .
 The search terminates.
 .It Cm fwd | forward Ar ipaddr Ns Op , Ns Ar port
 Change the next-hop on matching packets to
 .Ar ipaddr ,
 which can be an IP address in dotted quad format or a host name.
 The search terminates if this rule matches.
 .Pp
 If
 .Ar ipaddr
 is a local address, then matching packets will be forwarded to
 .Ar port
 (or the port number in the packet if one is not specified in the rule)
 on the local machine.
 .br
 If
 .Ar ipaddr
 is not a local address, then the port number
 (if specified) is ignored, and the packet will be
 forwarded to the remote address, using the route as found in
 the local routing table for that IP.
 .br
 A
 .Ar fwd
 rule will not match layer-2 packets (those received
 on ether_input, ether_output, or bridged).
 .br
 The
 .Cm fwd
 action does not change the contents of the packet at all.
 In particular, the destination address remains unmodified, so
 packets forwarded to another system will usually be rejected by that system
 unless there is a matching rule on that system to capture them.
 For packets forwarded locally,
 the local address of the socket will be
 set to the original destination address of the packet.
 This makes the
 .Xr netstat 1
 entry look rather weird but is intended for
 use with transparent proxy servers.
 .It Cm pipe Ar pipe_nr
 Pass packet to a
 .Xr dummynet 4
 .Dq pipe
 (for bandwidth limitation, delay, etc.).
 See the
 .Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 Section for further information.
 The search terminates; however, on exit from the pipe and if
 the
 .Xr sysctl 8
 variable
 .Em net.inet.ip.fw.one_pass
 is not set, the packet is passed again to the firewall code
 starting from the next rule.
 .It Cm queue Ar queue_nr
 Pass packet to a
 .Xr dummynet 4
 .Dq queue
 (for bandwidth limitation using WF2Q+).
 .It Cm reject
 (Deprecated).
 Synonym for
 .Cm unreach host .
 .It Cm reset
 Discard packets that match this rule, and if the
 packet is a TCP packet, try to send a TCP reset (RST) notice.
 The search terminates.
 .It Cm skipto Ar number
 Skip all subsequent rules numbered less than
 .Ar number .
 The search continues with the first rule numbered
 .Ar number
 or higher.
 .It Cm tee Ar port
 Send a copy of packets matching this rule to the
 .Xr divert 4
 socket bound to port
 .Ar port .
 The search terminates and the original packet is accepted
 (but see Section
 .Sx BUGS
 below).
 .It Cm unreach Ar code
 Discard packets that match this rule, and try to send an ICMP
 unreachable notice with code
 .Ar code ,
 where
 .Ar code
 is a number from 0 to 255, or one of these aliases:
 .Cm net , host , protocol , port ,
 .Cm needfrag , srcfail , net-unknown , host-unknown ,
 .Cm isolated , net-prohib , host-prohib , tosnet ,
 .Cm toshost , filter-prohib , host-precedence
 or
 .Cm precedence-cutoff .
 The search terminates.
 .El
 .Ss RULE BODY
 The body of a rule contains zero or more patterns (such as
 specific source and destination addresses or ports,
 protocol options, incoming or outgoing interfaces, etc.)
 that the packet must match in order to be recognised.
 In general, the patterns are connected by (implicit)
 .Cm and
 operators -- i.e. all must match in order for the
 rule to match.
 Individual patterns can be prefixed by the
 .Cm not
 operator to reverse the result of the match, as in
 .Pp
 .Dl "ipfw add 100 allow ip from not 1.2.3.4 to any"
 .Pp
 Additionally, sets of alternative match patterns (
 .Em or-blocks
 ) can be constructed by putting the patterns in
 lists enclosed between parentheses ( ) or braces { }, and
 using the
 .Cm or
 operator as follows:
 .Pp
 .Dl "ipfw add 100 allow ip from { x or not y or z } to any"
 .Pp
 Only one level of parentheses is allowed.
 Beware that most shells have special meanings for parentheses
 or braces, so it is advisable to put a backslash \\ in front of them
 to prevent such interpretations.
 .Pp
 The body of a rule must in general include a source and destination
 address specifier.
 The keyword
 .Ar any
 can be used in various places to specify that the content of
 a required field is irrelevant.
 .Pp
 The rule body has the following format:
 .Bd -ragged -offset indent
 .Op Ar proto Cm from Ar src Cm to Ar dst
 .Op Ar options
 .Ed
 .Pp
 The first part (proto from src to dst) is for backward
 compatibility with
 .Nm ipfw1 .
 In
 .Nm ipfw2
 any match pattern (including MAC headers, IPv4 protocols,
 addresses and ports) can be specified in the
 .Ar options
 section.
 .Pp
 Rule fields have the following meaning:
 .Bl -tag -width indent
 .It Ar proto : protocol | Cm { Ar protocol Cm or ... }
 .It Ar protocol : Oo Cm not Oc Ar protocol-name | protocol-number
 An IPv4 protocol specified by number or name
 (for a complete list see
 .Pa /etc/protocols ) .
 The
 .Cm ip
 or
 .Cm all
 keywords mean any protocol will match.
 .Pp
 The
 .Cm { Ar protocol Cm or ... }
 format (an
 .Em or-block )
 is provided for convenience only but its use is deprecated.
 .It Ar src No and Ar dst : Bro Cm addr | Cm { Ar addr Cm or ... } Brc Op Oo Cm not Oc Ar ports
 An address (or a list, see below)
 optionally followed by
 .Ar ports
 specifiers.
 .Pp
 The second format (
 .Em or-block
 with multiple addresses) is provided for convenience only and
 its use is discouraged.
 .It Ar addr : Oo Cm not Oc Brq Cm any | me | Ar addr-list | Ar addr-set
 .It Cm any
 matches any IP address.
 .It Cm me
 matches any IP address configured on an interface in the system.
 The address list is evaluated at the time the packet is
 analysed.
 .It Ar addr-list : ip-addr Ns Op Ns , Ns Ar addr-list
 .It Ar ip-addr :
 A host or subnet address specified in one of the following ways:
 .Bl -tag -width indent
 .It Ar numeric-ip | hostname
 Matches a single IPv4 address, specified as dotted-quad or a hostname.
 Hostnames are resolved at the time the rule is added to the firewall list.
 .It Ar addr Ns / Ns Ar masklen
 Matches all addresses with base
 .Ar addr
 (specified as a dotted quad or a hostname)
 and mask width of
 .Cm masklen
 bits.
 As an example, 1.2.3.4/25 will match
 all IP numbers from 1.2.3.0 to 1.2.3.127 .
 .It Ar addr Ns : Ns Ar mask
 Matches all addresses with base
 .Ar addr
 (specified as a dotted quad or a hostname)
 and the mask of
 .Ar mask ,
 specified as a dotted quad.
 As an example, 1.2.3.4/255.0.255.0 will match
 1.*.3.*.
 We suggest to use this form only for non-contiguous
 masks, and resort to the
 .Ar addr Ns / Ns Ar masklen
 format for contiguous masks, which is more compact and less
 error-prone.
 .El
 .It Ar addr-set : addr Ns Oo Ns / Ns Ar masklen Oc Ns Cm { Ns Ar list Ns Cm }
 .It Ar list : Bro Ar num | num-num Brc Ns Op Ns , Ns Ar list
 Matches all addresses with base address
 .Ar addr
 (specified as a dotted quad or a hostname)
 and whose last byte is in the list between braces { } .
 Note that there must be no spaces between braces and
 numbers (spaces after commas are allowed).
 Elements of the list can be specified as single entries
 or ranges.
 The
 .Ar masklen
 field is used to limit the size of the set of addresses,
 and can have any value between 24 and 32. If not specified,
 it will be assumed as 24.
 .br
 This format is particularly useful to handle sparse address sets
 within a single rule. Because the matching occurs using a
 bitmask, it takes constant time and dramatically reduces
 the complexity of rulesets.
 .br
 As an example, an address specified as 1.2.3.4/24{128,35-55,89}
 will match the following IP addresses:
 .br
 1.2.3.128, 1.2.3.35 to 1.2.3.55, 1.2.3.89 .
 .It Ar ports : Bro Ar port | port Ns \&- Ns Ar port Ns Brc Ns Op , Ns Ar ports
 For protocols which support port numbers (such as TCP and UDP), optional
 .Cm ports
 may be specified as one or more ports or port ranges, separated
 by commas but no spaces, and an optional
 .Cm not
 operator.
 The
 .Ql \&-
 notation specifies a range of ports (including boundaries).
 .Pp
 Service names (from
 .Pa /etc/services )
 may be used instead of numeric port values.
 The length of the port list is limited to 30 ports or ranges,
 though one can specify larger ranges by using an
 .Em or-block
 in the
 .Cm options
 section of the rule.
 .Pp
 A backslash
 .Pq Ql \e
 can be used to escape the dash
 .Pq Ql -
 character in a service name (from a shell, the backslash must be
 typed twice to avoid the shell itself interpreting it as an escape
 character).
 .Pp
 .Dl "ipfw add count tcp from any ftp\e\e-data-ftp to any"
 .Pp
 Fragmented packets which have a non-zero offset (i.e. not the first
 fragment) will never match a rule which has one or more port
 specifications.
 See the
 .Cm frag
 option for details on matching fragmented packets.
 .El
 .Ss RULE OPTIONS (MATCH PATTERNS)
 Additional match patterns can be used within
 rules. Zero or more of these so-called
 .Em options
 can be present in a rule, optionally prefixed by the
 .Cm not
 operand, and possibly grouped into
 .Em or-blocks .
 .Pp
 The following match patterns can be used (listed in alphabetical order):
 .Bl -tag -width indent
 .It Cm // this is a comment.
 Inserts the specified text as a comment in the rule.
 Everything following // is considered as a comment and stored in the rule.
 You can have comment-only rules, which are listed as having a
 .Cm count
 action followed by the comment.
 .It Cm bridged
 Matches only bridged packets.
 .It Cm dst-ip Ar ip-address
 Matches IP packets whose destination IP is one of the address(es)
 specified as argument.
 .It Cm dst-port Ar ports
 Matches IP packets whose destination port is one of the port(s)
 specified as argument.
 .It Cm established
 Matches TCP packets that have the RST or ACK bits set.
 .It Cm frag
 Matches packets that are fragments and not the first
 fragment of an IP datagram. Note that these packets will not have
 the next protocol header (e.g. TCP, UDP) so options that look into
 these headers cannot match.
 .It Cm gid Ar group
 Matches all TCP or UDP packets sent by or received for a
 .Ar group .
 A
 .Ar group
 may be specified by name or number.
 .It Cm icmptypes Ar types
 Matches ICMP packets whose ICMP type is in the list
 .Ar types .
 The list may be specified as any combination of
 individual types (numeric) separated by commas.
 .Em Ranges are not allowed.
 The supported ICMP types are:
 .Pp
 echo reply
 .Pq Cm 0 ,
 destination unreachable
 .Pq Cm 3 ,
 source quench
 .Pq Cm 4 ,
 redirect
 .Pq Cm 5 ,
 echo request
 .Pq Cm 8 ,
 router advertisement
 .Pq Cm 9 ,
 router solicitation
 .Pq Cm 10 ,
 time-to-live exceeded
 .Pq Cm 11 ,
 IP header bad
 .Pq Cm 12 ,
 timestamp request
 .Pq Cm 13 ,
 timestamp reply
 .Pq Cm 14 ,
 information request
 .Pq Cm 15 ,
 information reply
 .Pq Cm 16 ,
 address mask request
 .Pq Cm 17
 and address mask reply
 .Pq Cm 18 .
 .It Cm in | out
 Matches incoming or outgoing packets, respectively.
 .Cm in
 and
 .Cm out
 are mutually exclusive (in fact,
 .Cm out
 is implemented as
 .Cm not in Ns No ).
 .It Cm ipid Ar id-list
 Matches IP packets whose
 .Cm ip_id
 field has value included in
 .Ar id-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm iplen Ar len-list
 Matches IP packets whose total length, including header and data, is
 in the set
 .Ar len-list ,
 which is either a single value or a list of values or ranges 
 specified in the same way as
 .Ar ports .
 .It Cm ipoptions Ar spec
 Matches packets whose IP header contains the comma separated list of
 options specified in
 .Ar spec .
 The supported IP options are:
 .Pp
 .Cm ssrr
 (strict source route),
 .Cm lsrr
 (loose source route),
 .Cm rr
 (record packet route) and
 .Cm ts
 (timestamp).
 The absence of a particular option may be denoted
 with a
 .Ql \&! .
 .It Cm ipprecedence Ar precedence
 Matches IP packets whose precedence field is equal to
 .Ar precedence .
 .It Cm ipsec
 Matches packets that have IPSEC history associated with them
 (i.e. the packet comes encapsulated in IPSEC, the kernel
 has IPSEC support and IPSEC_FILTERGIF option, and can correctly
 decapsulate it).
 .Pp
 Note that specifying
 .Cm ipsec
 is different from specifying
 .Cm proto Ar ipsec
 as the latter will only look at the specific IP protocol field,
 irrespective of IPSEC kernel support and the validity of the IPSEC data.
+.Pp
+Further note that this flag is silently ignored in kernels without
+IPSEC support.
+It does not affect rule processing when given and the
+rules are handled as if with no
+.Cm ipsec
+flag.
 .It Cm iptos Ar spec
 Matches IP packets whose
 .Cm tos
 field contains the comma separated list of
 service types specified in
 .Ar spec .
 The supported IP types of service are:
 .Pp
 .Cm lowdelay
 .Pq Dv IPTOS_LOWDELAY ,
 .Cm throughput
 .Pq Dv IPTOS_THROUGHPUT ,
 .Cm reliability
 .Pq Dv IPTOS_RELIABILITY ,
 .Cm mincost
 .Pq Dv IPTOS_MINCOST ,
 .Cm congestion
 .Pq Dv IPTOS_CE .
 The absence of a particular type may be denoted
 with a
 .Ql \&! .
 .It Cm ipttl Ar ttl-list
 Matches IP packets whose time to live is included in
 .Ar ttl-list ,
 which is either a single value or a list of values or ranges
 specified in the same way as
 .Ar ports .
 .It Cm ipversion Ar ver
 Matches IP packets whose IP version field is
 .Ar ver .
 .It Cm keep-state
 Upon a match, the firewall will create a dynamic rule, whose
 default behaviour is to match bidirectional traffic between
 source and destination IP/port using the same protocol.
 The rule has a limited lifetime (controlled by a set of
 .Xr sysctl 8
 variables), and the lifetime is refreshed every time a matching
 packet is found.
 .It Cm layer2
 Matches only layer2 packets, i.e. those passed to
 .Nm
 from ether_demux() and ether_output_frame().
 .It Cm limit Bro Cm src-addr | src-port | dst-addr | dst-port Brc Ar N
 The firewall will only allow
 .Ar N
 connections with the same
 set of parameters as specified in the rule.
 One or more
 of source and destination addresses and ports can be
 specified.
 .It Cm { MAC | mac } Ar dst-mac src-mac
 Match packets with a given
 .Ar dst-mac
 and
 .Ar src-mac
 addresses, specified as the
 .Cm any
 keyword (matching any MAC address), or six groups of hex digits
 separated by colons,
 and optionally followed by a mask indicating the significant bits.
 The mask may be specified using either of the following methods:
 .Bl -enum -width indent
 .It
 A slash
 .Pq /
 followed by the number of significant bits.
 For example, an address with 33 significant bits could be specified as:
 .Pp
 .Dl "MAC 10:20:30:40:50:60/33 any"
 .Pp
 .It
 An ampersand
 .Pq &
 followed by a bitmask specified as six groups of hex digits separated
 by colons.
 For example, an address in which the last 16 bits are significant could
 be specified as:
 .Pp
 .Dl "MAC 10:20:30:40:50:60&00:00:00:00:ff:ff any"
 .Pp
 Note that the ampersand character has a special meaning in many shells
 and should generally be escaped.
 .Pp
 .El
 Note that the order of MAC addresses (destination first,
 source second) is
 the same as on the wire, but the opposite of the one used for
 IP addresses.
 .It Cm mac-type Ar mac-type
 Matches packets whose Ethernet Type field
 corresponds to one of those specified as argument.
 .Ar mac-type
 is specified in the same way as
 .Cm port numbers
 (i.e. one or more comma-separated single values or ranges).
 You can use symbolic names for known values such as
 .Em vlan , ipv4, ipv6 .
 Values can be entered as decimal or hexadecimal (if prefixed by 0x),
 and they are always printed as hexadecimal (unless the
 .Cm -N
 option is used, in which case symbolic resolution will be attempted).
 .It Cm proto Ar protocol
 Matches packets with the corresponding IPv4 protocol.
 .It Cm recv | xmit | via Brq Ar ifX | Ar if Ns Cm * | Ar ipno | Ar any
 Matches packets received, transmitted or going through,
 respectively, the interface specified by exact name
 .Ns No ( Ar ifX Ns No ),
 by device name
 .Ns No ( Ar if Ns Ar * Ns No ),
 by IP address, or through some interface.
 .Pp
 The
 .Cm via
 keyword causes the interface to always be checked.
 If
 .Cm recv
 or
 .Cm xmit
 is used instead of
 .Cm via ,
 then only the receive or transmit interface (respectively)
 is checked.
 By specifying both, it is possible to match packets based on
 both receive and transmit interface, e.g.:
 .Pp
 .Dl "ipfw add deny ip from any to any out recv ed0 xmit ed1"
 .Pp
 The
 .Cm recv
 interface can be tested on either incoming or outgoing packets,
 while the
 .Cm xmit
 interface can only be tested on outgoing packets.
 So
 .Cm out
 is required (and
 .Cm in
 is invalid) whenever
 .Cm xmit
 is used.
 .Pp
 A packet may not have a receive or transmit interface: packets
 originating from the local host have no receive interface,
 while packets destined for the local host have no transmit
 interface.
 .It Cm setup
 Matches TCP packets that have the SYN bit set but no ACK bit.
 This is the short form of
 .Dq Li tcpflags\ syn,!ack .
 .It Cm src-ip Ar ip-address
 Matches IP packets whose source IP is one of the address(es)
 specified as argument.
 .It Cm src-port Ar ports
 Matches IP packets whose source port is one of the port(s)
 specified as argument.
 .It Cm tcpack Ar ack
 TCP packets only.
 Match if the TCP header acknowledgment number field is set to
 .Ar ack .
 .It Cm tcpflags Ar spec
 TCP packets only.
 Match if the TCP header contains the comma separated list of
 flags specified in
 .Ar spec .
 The supported TCP flags are:
 .Pp
 .Cm fin ,
 .Cm syn ,
 .Cm rst ,
 .Cm psh ,
 .Cm ack
 and
 .Cm urg .
 The absence of a particular flag may be denoted
 with a
 .Ql \&! .
 A rule which contains a
 .Cm tcpflags
 specification can never match a fragmented packet which has
 a non-zero offset.
 See the
 .Cm frag
 option for details on matching fragmented packets.
 .It Cm tcpseq Ar seq
 TCP packets only.
 Match if the TCP header sequence number field is set to
 .Ar seq .
 .It Cm tcpwin Ar win
 TCP packets only.
 Match if the TCP header window field is set to
 .Ar win .
 .It Cm tcpoptions Ar spec
 TCP packets only.
 Match if the TCP header contains the comma separated list of
 options specified in
 .Ar spec .
 The supported TCP options are:
 .Pp
 .Cm mss
 (maximum segment size),
 .Cm window
 (tcp window advertisement),
 .Cm sack
 (selective ack),
 .Cm ts
 (rfc1323 timestamp) and
 .Cm cc
 (rfc1644 t/tcp connection count).
 The absence of a particular option may be denoted
 with a
 .Ql \&! .
 .It Cm uid Ar user
 Match all TCP or UDP packets sent by or received for a
 .Ar user .
 A
 .Ar user
 may be matched by name or identification number.
 .It Cm verrevpath
 For incoming packets,
 a routing table lookup is done on the packet's source address.
 If the interface on which the packet entered the system matches the
 outgoing interface for the route,
 the packet matches.
 If the interfaces do not match up,
 the packet does not match.
 All outgoing packets or packets with no incoming interface match.
 .Pp
 The name and functionality of the option is intentionally similar to
 the Cisco IOS command:
 .Pp
 .Dl ip verify unicast reverse-path
 .Pp
 This option can be used to make anti-spoofing rules.
 .El
 .Sh SETS OF RULES
 Each rule belongs to one of 32 different
 .Em sets
 , numbered 0 to 31.
 Set 31 is reserved for the default rule.
 .Pp
 By default, rules are put in set 0, unless you use the
 .Cm set N
 attribute when entering a new rule.
 Sets can be individually and atomically enabled or disabled,
 so this mechanism permits an easy way to store multiple configurations
 of the firewall and quickly (and atomically) switch between them.
 The command to enable/disable sets is
 .Bd -ragged -offset indent
 .Nm
 .Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ...
 .Ed
 .Pp
 where multiple
 .Cm enable
 or
 .Cm disable
 sections can be specified.
 Command execution is atomic on all the sets specified in the command.
 By default, all sets are enabled.
 .Pp
 When you disable a set, its rules behave as if they do not exist
 in the firewall configuration, with only one exception:
 .Bd -ragged -offset indent
 dynamic rules created from a rule before it had been disabled
 will still be active until they expire. In order to delete
 dynamic rules you have to explicitly delete the parent rule
 which generated them.
 .Ed
 .Pp
 The set number of rules can be changed with the command
 .Bd -ragged -offset indent
 .Nm
 .Cm set move
 .Brq Cm rule Ar rule-number | old-set
 .Cm to Ar new-set
 .Ed
 .Pp
 Also, you can atomically swap two rulesets with the command
 .Bd -ragged -offset indent
 .Nm
 .Cm set swap Ar first-set second-set
 .Ed
 .Pp
 See the
 .Sx EXAMPLES
 Section on some possible uses of sets of rules.
 .Sh STATEFUL FIREWALL
 Stateful operation is a way for the firewall to dynamically
 create rules for specific flows when packets that
 match a given pattern are detected. Support for stateful
 operation comes through the
 .Cm check-state , keep-state
 and
 .Cm limit
 options of
 .Nm rules.
 .Pp
 Dynamic rules are created when a packet matches a
 .Cm keep-state
 or
 .Cm limit
 rule, causing the creation of a
 .Em dynamic
 rule which will match all and only packets with
 a given
 .Em protocol
 between a
 .Em src-ip/src-port dst-ip/dst-port
 pair of addresses (
 .Em src
 and
 .Em dst
 are used here only to denote the initial match addresses, but they
 are completely equivalent afterwards).
 Dynamic rules will be checked at the first
 .Cm check-state, keep-state
 or
 .Cm limit
 occurrence, and the action performed upon a match will be the same
 as in the parent rule.
 .Pp
 Note that no additional attributes other than protocol and IP addresses
 and ports are checked on dynamic rules.
 .Pp
 The typical use of dynamic rules is to keep a closed firewall configuration,
 but let the first TCP SYN packet from the inside network install a
 dynamic rule for the flow so that packets belonging to that session
 will be allowed through the firewall:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add allow tcp from my-subnet to any setup keep-state"
 .Dl "ipfw add deny tcp from any to any"
 .Pp
 A similar approach can be used for UDP, where an UDP packet coming
 from the inside will install a dynamic rule to let the response through
 the firewall:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add allow udp from my-subnet to any keep-state"
 .Dl "ipfw add deny udp from any to any"
 .Pp
 Dynamic rules expire after some time, which depends on the status
 of the flow and the setting of some
 .Cm sysctl
 variables.
 See Section
 .Sx SYSCTL VARIABLES
 for more details.
 For TCP sessions, dynamic rules can be instructed to periodically
 send keepalive packets to refresh the state of the rule when it is
 about to expire.
 .Pp
 See Section
 .Sx EXAMPLES
 for more examples on how to use dynamic rules.
 .Sh TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
 .Nm
 is also the user interface for the
 .Xr dummynet 4
 traffic shaper.
 .Pp
 .Nm dummynet
 operates by first using the firewall to classify packets and divide them into
 .Em flows ,
 using any match pattern that can be used in
 .Nm
 rules.
 Depending on local policies, a flow can contain packets for a single
 TCP connection, or from/to a given host, or entire subnet, or a
 protocol type, etc.
 .Pp
 Packets belonging to the same flow are then passed to either of two
 different objects, which implement the traffic regulation:
 .Bl -hang -offset XXXX
 .It Em pipe
 A pipe emulates a link with given bandwidth, propagation delay,
 queue size and packet loss rate.
 Packets are queued in front of the pipe as they come out from the classifier,
 and then transferred to the pipe according to the pipe's parameters.
 .Pp
 .It Em queue
 A queue
 is an abstraction used to implement the WF2Q+
 (Worst-case Fair Weighted Fair Queueing) policy, which is
 an efficient variant of the WFQ policy.
 .br
 The queue associates a
 .Em weight
 and a reference pipe to each flow, and then all backlogged (i.e.,
 with packets queued) flows linked to the same pipe share the pipe's
 bandwidth proportionally to their weights.
 Note that weights are not priorities; a flow with a lower weight
 is still guaranteed to get its fraction of the bandwidth even if a
 flow with a higher weight is permanently backlogged.
 .Pp
 .El
 In practice,
 .Em pipes
 can be used to set hard limits to the bandwidth that a flow can use, whereas
 .Em queues
 can be used to determine how different flow share the available bandwidth.
 .Pp
 The
 .Em pipe
 and
 .Em queue
 configuration commands are the following:
 .Bd -ragged -offset indent
 .Cm pipe Ar number Cm config Ar pipe-configuration
 .Pp
 .Cm queue Ar number Cm config Ar queue-configuration
 .Ed
 .Pp
 The following parameters can be configured for a pipe:
 .Pp
 .Bl -tag -width indent -compact
 .It Cm bw Ar bandwidth | device
 Bandwidth, measured in
 .Sm off
 .Op Cm K | M
 .Brq Cm bit/s | Byte/s .
 .Sm on
 .Pp
 A value of 0 (default) means unlimited bandwidth.
 The unit must immediately follow the number, as in
 .Pp
 .Dl "ipfw pipe 1 config bw 300Kbit/s"
 .Pp
 If a device name is specified instead of a numeric value, as in
 .Pp
 .Dl "ipfw pipe 1 config bw tun0"
 .Pp
 then the transmit clock is supplied by the specified device.
 At the moment only the
 .Xr tun 4
 device supports this
 functionality, for use in conjunction with
 .Xr ppp 8 .
 .Pp
 .It Cm delay Ar ms-delay
 Propagation delay, measured in milliseconds.
 The value is rounded to the next multiple of the clock tick
 (typically 10ms, but it is a good practice to run kernels
 with
 .Dq "options HZ=1000"
 to reduce
 the granularity to 1ms or less).
 Default value is 0, meaning no delay.
 .El
 .Pp
 The following parameters can be configured for a queue:
 .Pp
 .Bl -tag -width indent -compact
 .It Cm pipe Ar pipe_nr
 Connects a queue to the specified pipe.
 Multiple queues (with the same or different weights) can be connected to
 the same pipe, which specifies the aggregate rate for the set of queues.
 .Pp
 .It Cm weight Ar weight
 Specifies the weight to be used for flows matching this queue.
 The weight must be in the range 1..100, and defaults to 1.
 .El
 .Pp
 Finally, the following parameters can be configured for both
 pipes and queues:
 .Pp
 .Bl -tag -width XXXX -compact
 .Pp
 .It Cm buckets Ar hash-table-size
 Specifies the size of the hash table used for storing the
 various queues.
 Default value is 64 controlled by the
 .Xr sysctl 8
 variable
 .Em net.inet.ip.dummynet.hash_size ,
 allowed range is 16 to 65536.
 .Pp
 .It Cm mask Ar mask-specifier
 Packets sent to a given pipe or queue by an
 .Nm
 rule can be further classified into multiple flows, each of which is then
 sent to a different
 .Em dynamic
 pipe or queue.
 A flow identifier is constructed by masking the IP addresses,
 ports and protocol types as specified with the
 .Cm mask
 options in the configuration of the pipe or queue.
 For each different flow identifier, a new pipe or queue is created
 with the same parameters as the original object, and matching packets
 are sent to it.
 .Pp
 Thus, when
 .Em dynamic pipes
 are used, each flow will get the same bandwidth as defined by the pipe,
 whereas when
 .Em dynamic queues
 are used, each flow will share the parent's pipe bandwidth evenly
 with other flows generated by the same queue (note that other queues
 with different weights might be connected to the same pipe).
 .br
 Available mask specifiers are a combination of one or more of the following:
 .Pp
 .Cm dst-ip Ar mask ,
 .Cm src-ip Ar mask ,
 .Cm dst-port Ar mask ,
 .Cm src-port Ar mask ,
 .Cm proto Ar mask
 or
 .Cm all ,
 .Pp
 where the latter means all bits in all fields are significant.
 .Pp
 .It Cm noerror
 When a packet is dropped by a dummynet queue or pipe, the error
 is normally reported to the caller routine in the kernel, in the
 same way as it happens when a device queue fills up. Setting this
 option reports the packet as successfully delivered, which can be
 needed for some experimental setups where you want to simulate
 loss or congestion at a remote router.
 .Pp
 .It Cm plr Ar packet-loss-rate
 Packet loss rate.
 Argument
 .Ar packet-loss-rate
 is a floating-point number between 0 and 1, with 0 meaning no
 loss, 1 meaning 100% loss.
 The loss rate is internally represented on 31 bits.
 .Pp
 .It Cm queue Brq Ar slots | size Ns Cm Kbytes
 Queue size, in
 .Ar slots
 or
 .Cm KBytes .
 Default value is 50 slots, which
 is the typical queue size for Ethernet devices.
 Note that for slow speed links you should keep the queue
 size short or your traffic might be affected by a significant
 queueing delay.
 E.g., 50 max-sized ethernet packets (1500 bytes) mean 600Kbit
 or 20s of queue on a 30Kbit/s pipe.
 Even worse effect can result if you get packets from an
 interface with a much larger MTU, e.g. the loopback interface
 with its 16KB packets.
 .Pp
 .It Cm red | gred Ar w_q Ns / Ns Ar min_th Ns / Ns Ar max_th Ns / Ns Ar max_p
 Make use of the RED (Random Early Detection) queue management algorithm.
 .Ar w_q
 and
 .Ar max_p
 are floating
 point numbers between 0 and 1 (0 not included), while
 .Ar min_th
 and
 .Ar max_th
 are integer numbers specifying thresholds for queue management
 (thresholds are computed in bytes if the queue has been defined
 in bytes, in slots otherwise).
 The
 .Xr dummynet 4
 also supports the gentle RED variant (gred).
 Three
 .Xr sysctl 8
 variables can be used to control the RED behaviour:
 .Bl -tag -width indent
 .It Em net.inet.ip.dummynet.red_lookup_depth
 specifies the accuracy in computing the average queue
 when the link is idle (defaults to 256, must be greater than zero)
 .It Em net.inet.ip.dummynet.red_avg_pkt_size
 specifies the expected average packet size (defaults to 512, must be
 greater than zero)
 .It Em net.inet.ip.dummynet.red_max_pkt_size
 specifies the expected maximum packet size, only used when queue
 thresholds are in bytes (defaults to 1500, must be greater than zero).
 .El
 .El
 .Sh CHECKLIST
 Here are some important points to consider when designing your
 rules:
 .Bl -bullet
 .It
 Remember that you filter both packets going
 .Cm in
 and
 .Cm out .
 Most connections need packets going in both directions.
 .It
 Remember to test very carefully.
 It is a good idea to be near the console when doing this.
 If you cannot be near the console,
 use an auto-recovery script such as the one in
 .Pa /usr/share/examples/ipfw/change_rules.sh .
 .It
 Don't forget the loopback interface.
 .El
 .Sh FINE POINTS
 .Bl -bullet
 .It
 There are circumstances where fragmented datagrams are unconditionally
 dropped.
 TCP packets are dropped if they do not contain at least 20 bytes of
 TCP header, UDP packets are dropped if they do not contain a full 8
 byte UDP header, and ICMP packets are dropped if they do not contain
 4 bytes of ICMP header, enough to specify the ICMP type, code, and
 checksum.
 These packets are simply logged as
 .Dq pullup failed
 since there may not be enough good data in the packet to produce a
 meaningful log entry.
 .It
 Another type of packet is unconditionally dropped, a TCP packet with a
 fragment offset of one.
 This is a valid packet, but it only has one use, to try
 to circumvent firewalls.
 When logging is enabled, these packets are
 reported as being dropped by rule -1.
 .It
 If you are logged in over a network, loading the
 .Xr kld 4
 version of
 .Nm
 is probably not as straightforward as you would think.
 I recommend the following command line:
 .Bd -literal -offset indent
 kldload ipfw && \e
 ipfw add 32000 allow ip from any to any
 .Ed
 .Pp
 Along the same lines, doing an
 .Bd -literal -offset indent
 ipfw flush
 .Ed
 .Pp
 in similar surroundings is also a bad idea.
 .It
 The
 .Nm
 filter list may not be modified if the system security level
 is set to 3 or higher
 (see
 .Xr init 8
 for information on system security levels).
 .El
 .Sh PACKET DIVERSION
 A
 .Xr divert 4
 socket bound to the specified port will receive all packets
 diverted to that port.
 If no socket is bound to the destination port, or if the kernel
 wasn't compiled with divert socket support, the packets are
 dropped.
 .Sh SYSCTL VARIABLES
 A set of
 .Xr sysctl 8
 variables controls the behaviour of the firewall and
 associated modules (
 .Nm dummynet, bridge
 ).
 These are shown below together with their default value
 (but always check with the
 .Xr sysctl 8
 command what value is actually in use) and meaning:
 .Bl -tag -width indent
 .It Em net.inet.ip.dummynet.expire : No 1
 Lazily delete dynamic pipes/queue once they have no pending traffic.
 You can disable this by setting the variable to 0, in which case
 the pipes/queues will only be deleted when the threshold is reached.
 .It Em net.inet.ip.dummynet.hash_size : No 64
 Default size of the hash table used for dynamic pipes/queues.
 This value is used when no
 .Cm buckets
 option is specified when configuring a pipe/queue.
 .It Em net.inet.ip.dummynet.max_chain_len : No 16
 Target value for the maximum number of pipes/queues in a hash bucket.
 The product
 .Cm max_chain_len*hash_size
 is used to determine the threshold over which empty pipes/queues
 will be expired even when
 .Cm net.inet.ip.dummynet.expire=0 .
 .It Em net.inet.ip.dummynet.red_lookup_depth : No 256
 .It Em net.inet.ip.dummynet.red_avg_pkt_size : No 512
 .It Em net.inet.ip.dummynet.red_max_pkt_size : No 1500
 Parameters used in the computations of the drop probability
 for the RED algorithm.
 .It Em net.inet.ip.fw.autoinc_step : No 100
 Delta between rule numbers when auto-generating them.
 The value must be in the range 1..1000.
 This variable is only present in
 .Nm ipfw2 ,
 the delta is hardwired to 100 in
 .Nm ipfw1 .
 .It Em net.inet.ip.fw.curr_dyn_buckets : Em net.inet.ip.fw.dyn_buckets
 The current number of buckets in the hash table for dynamic rules
 (readonly).
 .It Em net.inet.ip.fw.debug : No 1
 Controls debugging messages produced by
 .Nm .
 .It Em net.inet.ip.fw.dyn_buckets : No 256
 The number of buckets in the hash table for dynamic rules.
 Must be a power of 2, up to 65536.
 It only takes effect when all dynamic rules have expired, so you
 are advised to use a
 .Cm flush
 command to make sure that the hash table is resized.
 .It Em net.inet.ip.fw.dyn_count : No 3
 Current number of dynamic rules
 (read-only).
 .It Em net.inet.ip.fw.dyn_keepalive : No 1
 Enables generation of keepalive packets for
 .Cm keep-state
 rules on TCP sessions. A keepalive is generated to both
 sides of the connection every 5 seconds for the last 20
 seconds of the lifetime of the rule.
 .It Em net.inet.ip.fw.dyn_max : No 8192
 Maximum number of dynamic rules.
 When you hit this limit, no more dynamic rules can be
 installed until old ones expire.
 .It Em net.inet.ip.fw.dyn_ack_lifetime : No 300
 .It Em net.inet.ip.fw.dyn_syn_lifetime : No 20
 .It Em net.inet.ip.fw.dyn_fin_lifetime : No 1
 .It Em net.inet.ip.fw.dyn_rst_lifetime : No 1
 .It Em net.inet.ip.fw.dyn_udp_lifetime : No 5
 .It Em net.inet.ip.fw.dyn_short_lifetime : No 30
 These variables control the lifetime, in seconds, of dynamic
 rules.
 Upon the initial SYN exchange the lifetime is kept short,
 then increased after both SYN have been seen, then decreased
 again during the final FIN exchange or when a RST is received.
 Both
 .Em dyn_fin_lifetime
 and
 .Em dyn_rst_lifetime
 must be strictly lower than 5 seconds, the period of
 repetition of keepalives. The firewall enforces that.
 .It Em net.inet.ip.fw.enable : No 1
 Enables the firewall.
 Setting this variable to 0 lets you run your machine without
 firewall even if compiled in.
 .It Em net.inet.ip.fw.one_pass : No 1
 When set, the packet exiting from the
 .Xr dummynet 4
 pipe is not passed though the firewall again.
 Otherwise, after a pipe action, the packet is
 reinjected into the firewall at the next rule.
 .It Em net.inet.ip.fw.verbose : No 1
 Enables verbose messages.
 .It Em net.inet.ip.fw.verbose_limit : No 0
 Limits the number of messages produced by a verbose firewall.
 .It Em net.link.ether.ipfw : No 0
 Controls whether layer-2 packets are passed to
 .Nm .
 Default is no.
 .It Em net.link.ether.bridge_ipfw : No 0
 Controls whether bridged packets are passed to
 .Nm .
 Default is no.
 .El
 .Sh USING IPFW2 IN FreeBSD-STABLE
 .Nm ipfw2
 is standard in
 .Fx
 CURRENT, whereas
 .Fx
 STABLE still uses
 .Nm ipfw1
 unless the kernel is compiled with
 .Cm options IPFW2 ,
 and
 .Nm /sbin/ipfw
 and
 .Nm /usr/lib/libalias
 are recompiled with
 .Cm -DIPFW2
 and reinstalled (the same effect can be achieved by adding
 .Cm IPFW2=TRUE
 to
 .Nm /etc/make.conf
 before a buildworld).
 .Pp
 .Sh IPFW2 ENHANCEMENTS
 This Section lists the features that have been introduced in
 .Nm ipfw2
 which were not present in
 .Nm ipfw1 .
 We list them in order of the potential impact that they can
 have in writing your rulesets.
 You might want to consider using these features in order to
 write your rulesets in a more efficient way.
 .Bl -tag -width indent
 .It Syntax and flags
 .Nm ipfw1
 does not support the -n flag (only test syntax),
 nor it allows spaces after commas or supports all
 rule fields in a single argument.
 .It Handling of non-IPv4 packets
 .Nm ipfw1
 will silently accept all non-IPv4 packets (which
 .Nm ipfw1
 will only see when
 .Em net.link.ether.bridge_ipfw=1 Ns
 ).
 .Nm ipfw2
 will filter all packets (including non-IPv4 ones) according to the ruleset.
 To achieve the same behaviour as
 .Nm ipfw1
 you can use the following as the very first rule in your ruleset:
 .Pp
 .Dl "ipfw add 1 allow layer2 not mac-type ip"
 .Pp
 The
 .Cm layer2
 option might seem redundant, but it is necessary -- packets
 passed to the firewall from layer3 will not have a MAC header,
 so the
 .Cm mac-type ip
 pattern will always fail on them, and the
 .Cm not
 operator will make this rule into a pass-all.
 .It Addresses
 .Nm ipfw1
 does not supports address sets or lists of addresses.
 .Pp
 .It Port specifications
 .Nm ipfw1
 only allows one port range when specifying TCP and UDP ports, and
 is limited to 10 entries instead of the 15 allowed by
 .Nm ipfw2 .
 Also, in
 .Nm ipfw1
 you can only specify ports when the rule is requesting
 .Cm tcp
 or
 .Cm udp
 packets. With
 .Nm ipfw2
 you can put port specifications in rules matching all packets,
 and the match will be attempted only on those packets carrying
 protocols which include port identifiers.
 .Pp
 Finally,
 .Nm ipfw1
 allowed the first port entry to be specified as
 .Ar port:mask
 where
 .Ar mask
 can be an arbitrary 16-bit mask.
 This syntax is of questionable usefulness and it is not
 supported anymore in
 .Nm ipfw2 .
 .It Or-blocks
 .Nm ipfw1
 does not support Or-blocks.
 .It keepalives
 .Nm ipfw1
 does not generate keepalives for stateful sessions.
 As a consequence, it might cause idle sessions to drop because
 the lifetime of the dynamic rules expires.
 .It Sets of rules
 .Nm ipfw1
 does not implement sets of rules.
 .It MAC header filtering and Layer-2 firewalling.
 .Nm ipfw1
 does not implement filtering on MAC header fields, nor is it
 invoked on packets from
 .Cm ether_demux()
 and
 .Cm ether_output_frame().
 The sysctl variable
 .Em net.link.ether.ipfw
 has no effect there.
 .It Options
 In
 .Nm ipfw1 ,
 the following options only accept a single value as an argument:
 .Pp
 .Cm ipid, iplen, ipttl
 .Pp
 The following options are not implemented by
 .Nm ipfw1 :
 .Pp
 .Cm dst-ip, dst-port, layer2, mac, mac-type, src-ip, src-port.
 .Pp
 Additionally, the RELENG_4 version of
 .Nm ipfw1
 does not implement the following options:
 .Pp
 .Cm ipid, iplen, ipprecedence, iptos, ipttl,
 .Cm ipversion, tcpack, tcpseq, tcpwin .
 .It Dummynet options
 The following option for
 .Nm dummynet
 pipes/queues is not supported:
 .Cm noerror .
 .El
 .Sh EXAMPLES
 There are far too many possible uses of
 .Nm
 so this Section will only give a small set of examples.
 .Pp
 .Ss BASIC PACKET FILTERING
 This command adds an entry which denies all tcp packets from
 .Em cracker.evil.org
 to the telnet port of
 .Em wolf.tambov.su
 from being forwarded by the host:
 .Pp
 .Dl "ipfw add deny tcp from cracker.evil.org to wolf.tambov.su telnet"
 .Pp
 This one disallows any connection from the entire cracker's
 network to my host:
 .Pp
 .Dl "ipfw add deny ip from 123.45.67.0/24 to my.host.org"
 .Pp
 A first and efficient way to limit access (not using dynamic rules)
 is the use of the following rules:
 .Pp
 .Dl "ipfw add allow tcp from any to any established"
 .Dl "ipfw add allow tcp from net1 portlist1 to net2 portlist2 setup"
 .Dl "ipfw add allow tcp from net3 portlist3 to net3 portlist3 setup"
 .Dl "..."
 .Dl "ipfw add deny tcp from any to any"
 .Pp
 The first rule will be a quick match for normal TCP packets,
 but it will not match the initial SYN packet, which will be
 matched by the
 .Cm setup
 rules only for selected source/destination pairs.
 All other SYN packets will be rejected by the final
 .Cm deny
 rule.
 .Pp
 If you administer one or more subnets, you can take advantage of the
 .Nm ipfw2
 syntax to specify address sets and or-blocks and write extremely
 compact rulesets which selectively enable services to blocks
 of clients, as below:
 .Pp
 .Dl "goodguys=\*q{ 10.1.2.0/24{20,35,66,18} or 10.2.3.0/28{6,3,11} }\*q"
 .Dl "badguys=\*q10.1.2.0/24{8,38,60}\*q"
 .Dl ""
 .Dl "ipfw add allow ip from ${goodguys} to any"
 .Dl "ipfw add deny ip from ${badguys} to any"
 .Dl "... normal policies ..."
 .Pp
 The
 .Nm ipfw1
 syntax would require a separate rule for each IP in the above
 example.
 .Pp
 The
 .Cm verrevpath
 option could be used to do automated anti-spoofing by adding the
 following to the top of a ruleset:
 .Pp
 .Dl "ipfw add deny ip from any to any not verrevpath in"
 .Pp
 This rule drops all incoming packets that appear to be coming to the
 system on the wrong interface. For example, a packet with a source
 address belonging to a host on a protected internal network would be
 dropped if it tried to enter the system from an external interface.
 .Ss DYNAMIC RULES
 In order to protect a site from flood attacks involving fake
 TCP packets, it is safer to use dynamic rules:
 .Pp
 .Dl "ipfw add check-state"
 .Dl "ipfw add deny tcp from any to any established"
 .Dl "ipfw add allow tcp from my-net to any setup keep-state"
 .Pp
 This will let the firewall install dynamic rules only for
 those connection which start with a regular SYN packet coming
 from the inside of our network.
 Dynamic rules are checked when encountering the first
 .Cm check-state
 or
 .Cm keep-state
 rule.
 A
 .Cm check-state
 rule should usually be placed near the beginning of the
 ruleset to minimize the amount of work scanning the ruleset.
 Your mileage may vary.
 .Pp
 To limit the number of connections a user can open
 you can use the following type of rules:
 .Pp
 .Dl "ipfw add allow tcp from my-net/24 to any setup limit src-addr 10"
 .Dl "ipfw add allow tcp from any to me setup limit src-addr 4"
 .Pp
 The former (assuming it runs on a gateway) will allow each host
 on a /24 network to open at most 10 TCP connections.
 The latter can be placed on a server to make sure that a single
 client does not use more than 4 simultaneous connections.
 .Pp
 .Em BEWARE :
 stateful rules can be subject to denial-of-service attacks
 by a SYN-flood which opens a huge number of dynamic rules.
 The effects of such attacks can be partially limited by
 acting on a set of
 .Xr sysctl 8
 variables which control the operation of the firewall.
 .Pp
 Here is a good usage of the
 .Cm list
 command to see accounting records and timestamp information:
 .Pp
 .Dl ipfw -at list
 .Pp
 or in short form without timestamps:
 .Pp
 .Dl ipfw -a list
 .Pp
 which is equivalent to:
 .Pp
 .Dl ipfw show
 .Pp
 Next rule diverts all incoming packets from 192.168.2.0/24
 to divert port 5000:
 .Pp
 .Dl ipfw divert 5000 ip from 192.168.2.0/24 to any in
 .Pp
 .Ss TRAFFIC SHAPING
 The following rules show some of the applications of
 .Nm
 and
 .Xr dummynet 4
 for simulations and the like.
 .Pp
 This rule drops random incoming packets with a probability
 of 5%:
 .Pp
 .Dl "ipfw add prob 0.05 deny ip from any to any in"
 .Pp
 A similar effect can be achieved making use of dummynet pipes:
 .Pp
 .Dl "ipfw add pipe 10 ip from any to any"
 .Dl "ipfw pipe 10 config plr 0.05"
 .Pp
 We can use pipes to artificially limit bandwidth, e.g. on a
 machine acting as a router, if we want to limit traffic from
 local clients on 192.168.2.0/24 we do:
 .Pp
 .Dl "ipfw add pipe 1 ip from 192.168.2.0/24 to any out"
 .Dl "ipfw pipe 1 config bw 300Kbit/s queue 50KBytes"
 .Pp
 note that we use the
 .Cm out
 modifier so that the rule is not used twice.
 Remember in fact that
 .Nm
 rules are checked both on incoming and outgoing packets.
 .Pp
 Should we want to simulate a bidirectional link with bandwidth
 limitations, the correct way is the following:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any out"
 .Dl "ipfw add pipe 2 ip from any to any in"
 .Dl "ipfw pipe 1 config bw 64Kbit/s queue 10Kbytes"
 .Dl "ipfw pipe 2 config bw 64Kbit/s queue 10Kbytes"
 .Pp
 The above can be very useful, e.g. if you want to see how
 your fancy Web page will look for a residential user who
 is connected only through a slow link.
 You should not use only one pipe for both directions, unless
 you want to simulate a half-duplex medium (e.g. AppleTalk,
 Ethernet, IRDA).
 It is not necessary that both pipes have the same configuration,
 so we can also simulate asymmetric links.
 .Pp
 Should we want to verify network performance with the RED queue
 management algorithm:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any"
 .Dl "ipfw pipe 1 config bw 500Kbit/s queue 100 red 0.002/30/80/0.1"
 .Pp
 Another typical application of the traffic shaper is to
 introduce some delay in the communication.
 This can significantly affect applications which do a lot of Remote
 Procedure Calls, and where the round-trip-time of the
 connection often becomes a limiting factor much more than
 bandwidth:
 .Pp
 .Dl "ipfw add pipe 1 ip from any to any out"
 .Dl "ipfw add pipe 2 ip from any to any in"
 .Dl "ipfw pipe 1 config delay 250ms bw 1Mbit/s"
 .Dl "ipfw pipe 2 config delay 250ms bw 1Mbit/s"
 .Pp
 Per-flow queueing can be useful for a variety of purposes.
 A very simple one is counting traffic:
 .Pp
 .Dl "ipfw add pipe 1 tcp from any to any"
 .Dl "ipfw add pipe 1 udp from any to any"
 .Dl "ipfw add pipe 1 ip from any to any"
 .Dl "ipfw pipe 1 config mask all"
 .Pp
 The above set of rules will create queues (and collect
 statistics) for all traffic.
 Because the pipes have no limitations, the only effect is
 collecting statistics.
 Note that we need 3 rules, not just the last one, because
 when
 .Nm
 tries to match IP packets it will not consider ports, so we
 would not see connections on separate ports as different
 ones.
 .Pp
 A more sophisticated example is limiting the outbound traffic
 on a net with per-host limits, rather than per-network limits:
 .Pp
 .Dl "ipfw add pipe 1 ip from 192.168.2.0/24 to any out"
 .Dl "ipfw add pipe 2 ip from any to 192.168.2.0/24 in"
 .Dl "ipfw pipe 1 config mask src-ip 0x000000ff bw 200Kbit/s queue 20Kbytes"
 .Dl "ipfw pipe 2 config mask dst-ip 0x000000ff bw 200Kbit/s queue 20Kbytes"
 .Ss SETS OF RULES
 To add a set of rules atomically, e.g. set 18:
 .Pp
 .Dl "ipfw set disable 18"
 .Dl "ipfw add NN set 18 ...         # repeat as needed"
 .Dl "ipfw set enable 18"
 .Pp
 To delete a set of rules atomically the command is simply:
 .Pp
 .Dl "ipfw delete set 18"
 .Pp
 To test a ruleset and disable it and regain control if something goes wrong:
 .Pp
 .Dl "ipfw set disable 18"
 .Dl "ipfw add NN set 18 ...         # repeat as needed"
 .Dl "ipfw set enable 18; echo done; sleep 30 && ipfw set disable 18"
 .Pp
 Here if everything goes well, you press control-C before the "sleep"
 terminates, and your ruleset will be left active. Otherwise, e.g. if
 you cannot access your box, the ruleset will be disabled after
 the sleep terminates thus restoring the previous situation.
 .Sh SEE ALSO
 .Xr cpp 1 ,
 .Xr m4 1 ,
 .Xr bridge 4 ,
 .Xr divert 4 ,
 .Xr dummynet 4 ,
 .Xr ip 4 ,
 .Xr ipfirewall 4 ,
 .Xr protocols 5 ,
 .Xr services 5 ,
 .Xr init 8 ,
 .Xr kldload 8 ,
 .Xr reboot 8 ,
 .Xr sysctl 8 ,
 .Xr syslogd 8
 .Sh BUGS
 The syntax has grown over the years and sometimes it might be confusing.
 Unfortunately, backward compatibility prevents cleaning up mistakes
 made in the definition of the syntax.
 .Pp
 .Em !!! WARNING !!!
 .Pp
 Misconfiguring the firewall can put your computer in an unusable state,
 possibly shutting down network services and requiring console access to
 regain control of it.
 .Pp
 Incoming packet fragments diverted by
 .Cm divert
 or
 .Cm tee
 are reassembled before delivery to the socket.
 The action used on those packet is the one from the
 rule which matches the first fragment of the packet.
 .Pp
 Packets that match a
 .Cm tee
 rule should not be immediately accepted, but should continue
 going through the rule list.
 This may be fixed in a later version.
 .Pp
 Packets diverted to userland, and then reinserted by a userland process
 may lose various packet attributes.
 The packet source interface name
 will be preserved if it is shorter than 8 bytes and the userland process
 saves and reuses the sockaddr_in
 (as does
 .Xr natd 8 ) ;
 otherwise, it may be lost.
 If a packet is reinserted in this manner, later rules may be incorrectly
 applied, making the order of
 .Cm divert
 rules in the rule sequence very important.
 .Sh AUTHORS
 .An Ugen J. S. Antsilevich ,
 .An Poul-Henning Kamp ,
 .An Alex Nash ,
 .An Archie Cobbs ,
 .An Luigi Rizzo .
 .Pp
 .An -nosplit
 API based upon code written by
 .An Daniel Boulet
 for BSDI.
 .Pp
 Work on
 .Xr dummynet 4
 traffic shaper supported by Akamba Corp.
 .Sh HISTORY
 The
 .Nm
 utility first appeared in
 .Fx 2.0 .
 .Xr dummynet 4
 was introduced in
 .Fx 2.2.8 .
 Stateful extensions were introduced in
 .Fx 4.0 .
 .Nm ipfw2
 was introduced in Summer 2002.
Index: head/sys/netinet/ip_fw2.c
===================================================================
--- head/sys/netinet/ip_fw2.c	(revision 123095)
+++ head/sys/netinet/ip_fw2.c	(revision 123096)
@@ -1,3034 +1,3035 @@
 /*
  * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #define        DEB(x)
 #define        DDB(x) x
 
 /*
  * Implement IP packet firewall (new version)
  */
 
 #if !defined(KLD_MODULE)
 #include "opt_ipfw.h"
 #include "opt_ipdn.h"
 #include "opt_ipdivert.h"
 #include "opt_inet.h"
+#include "opt_ipsec.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 #endif
 
 #define IPFW2	1
 #if IPFW2
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #endif
 
 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
 /*
  * This is used to avoid that a firewall-generated packet
  * loops forever through the firewall.  Note that it must
  * be a flag that is unused by other protocols that might
  * be called from ip_output (e.g. IPsec) and it must be
  * listed in M_COPYFLAGS in mbuf.h so that if the mbuf chain
  * is altered on the way through ip_output it is not lost.
  * It might be better to add an m_tag since the this happens
  * infrequently.
  */
 #define M_SKIP_FIREWALL         M_PROTO6
 
 /*
  * set_disable contains one bit per set value (0..31).
  * If the bit is set, all rules with the corresponding set
  * are disabled. Set RESVD_SET(31) is reserved for the default rule
  * and rules that are not deleted by the flush command,
  * and CANNOT be disabled.
  * Rules in set RESVD_SET can only be deleted explicitly.
  */
 static u_int32_t set_disable;
 
 static int fw_verbose;
 static int verbose_limit;
 
 static struct callout ipfw_timeout;
 #define	IPFW_DEFAULT_RULE	65535
 
 struct ip_fw_chain {
 	struct ip_fw	*rules;		/* list of rules */
 	struct ip_fw	*reap;		/* list of rules to reap */
 	struct mtx	mtx;		/* lock guarding rule list */
 };
 #define	IPFW_LOCK_INIT(_chain) \
 	mtx_init(&(_chain)->mtx, "IPFW static rules", NULL, \
 		MTX_DEF | MTX_RECURSE)
 #define	IPFW_LOCK_DESTROY(_chain)	mtx_destroy(&(_chain)->mtx)
 #define	IPFW_LOCK(_chain)	mtx_lock(&(_chain)->mtx)
 #define	IPFW_UNLOCK(_chain)	mtx_unlock(&(_chain)->mtx)
 #define	IPFW_LOCK_ASSERT(_chain)	mtx_assert(&(_chain)->mtx, MA_OWNED)
 
 /*
  * list of rules for layer 3
  */
 static struct ip_fw_chain layer3_chain;
 
 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
 
 static int fw_debug = 1;
 static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
 
 #ifdef SYSCTL_NODE
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable,
     CTLFLAG_RW | CTLFLAG_SECURE3,
     &fw_enable, 0, "Enable ipfw");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW,
     &autoinc_step, 0, "Rule number autincrement step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_RW | CTLFLAG_SECURE3,
     &fw_one_pass, 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW,
     &fw_debug, 0, "Enable printing of debug ip_fw statements");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
     CTLFLAG_RW | CTLFLAG_SECURE3,
     &fw_verbose, 0, "Log matches to ipfw rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW,
     &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
 
 /*
  * Description of dynamic rules.
  *
  * Dynamic rules are stored in lists accessed through a hash table
  * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
  * be modified through the sysctl variable dyn_buckets which is
  * updated when the table becomes empty.
  *
  * XXX currently there is only one list, ipfw_dyn.
  *
  * When a packet is received, its address fields are first masked
  * with the mask defined for the rule, then hashed, then matched
  * against the entries in the corresponding list.
  * Dynamic rules can be used for different purposes:
  *  + stateful rules;
  *  + enforcing limits on the number of sessions;
  *  + in-kernel NAT (not implemented yet)
  *
  * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
  * measured in seconds and depending on the flags.
  *
  * The total number of dynamic rules is stored in dyn_count.
  * The max number of dynamic rules is dyn_max. When we reach
  * the maximum number of rules we do not create anymore. This is
  * done to avoid consuming too much memory, but also too much
  * time when searching on each packet (ideally, we should try instead
  * to put a limit on the length of the list on each bucket...).
  *
  * Each dynamic rule holds a pointer to the parent ipfw rule so
  * we know what action to perform. Dynamic rules are removed when
  * the parent rule is deleted. XXX we should make them survive.
  *
  * There are some limitations with dynamic rules -- we do not
  * obey the 'randomized match', and we do not do multiple
  * passes through the firewall. XXX check the latter!!!
  */
 static ipfw_dyn_rule **ipfw_dyn_v = NULL;
 static u_int32_t dyn_buckets = 256; /* must be power of 2 */
 static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */
 
 static struct mtx ipfw_dyn_mtx;		/* mutex guarding dynamic rules */
 #define	IPFW_DYN_LOCK_INIT() \
 	mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF)
 #define	IPFW_DYN_LOCK_DESTROY()	mtx_destroy(&ipfw_dyn_mtx)
 #define	IPFW_DYN_LOCK()		mtx_lock(&ipfw_dyn_mtx)
 #define	IPFW_DYN_UNLOCK()	mtx_unlock(&ipfw_dyn_mtx)
 #define	IPFW_DYN_LOCK_ASSERT()	mtx_assert(&ipfw_dyn_mtx, MA_OWNED)
 
 /*
  * Timeouts for various events in handing dynamic rules.
  */
 static u_int32_t dyn_ack_lifetime = 300;
 static u_int32_t dyn_syn_lifetime = 20;
 static u_int32_t dyn_fin_lifetime = 1;
 static u_int32_t dyn_rst_lifetime = 1;
 static u_int32_t dyn_udp_lifetime = 10;
 static u_int32_t dyn_short_lifetime = 5;
 
 /*
  * Keepalives are sent if dyn_keepalive is set. They are sent every
  * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
  * seconds of lifetime of a rule.
  * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
  * than dyn_keepalive_period.
  */
 
 static u_int32_t dyn_keepalive_interval = 20;
 static u_int32_t dyn_keepalive_period = 5;
 static u_int32_t dyn_keepalive = 1;	/* do send keepalives */
 
 static u_int32_t static_count;	/* # of static rules */
 static u_int32_t static_len;	/* size in bytes of static rules */
 static u_int32_t dyn_count;		/* # of dynamic rules */
 static u_int32_t dyn_max = 4096;	/* max # of dynamic rules */
 
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW,
     &dyn_buckets, 0, "Number of dyn. buckets");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD,
     &curr_dyn_buckets, 0, "Current Number of dyn. buckets");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD,
     &dyn_count, 0, "Number of dyn. rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW,
     &dyn_max, 0, "Max number of dyn. rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD,
     &static_count, 0, "Number of static rules");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW,
     &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW,
     &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW,
     &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW,
     &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW,
     &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW,
     &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW,
     &dyn_keepalive, 0, "Enable keepalives for dyn. rules");
 
 #endif /* SYSCTL_NODE */
 
 
 static ip_fw_chk_t	ipfw_chk;
 
 ip_dn_ruledel_t *ip_dn_ruledel_ptr = NULL;	/* hook into dummynet */
 
 /*
  * This macro maps an ip pointer into a layer3 header pointer of type T
  */
 #define	L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
 
 static __inline int
 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd)
 {
 	int type = L3HDR(struct icmp,ip)->icmp_type;
 
 	return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
 }
 
 #define TT	( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
 
 static int
 is_icmp_query(struct ip *ip)
 {
 	int type = L3HDR(struct icmp, ip)->icmp_type;
 	return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
 }
 #undef TT
 
 /*
  * The following checks use two arrays of 8 or 16 bits to store the
  * bits that we want set or clear, respectively. They are in the
  * low and high half of cmd->arg1 or cmd->d[0].
  *
  * We scan options and store the bits we find set. We succeed if
  *
  *	(want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
  *
  * The code is sometimes optimized not to store additional variables.
  */
 
 static int
 flags_match(ipfw_insn *cmd, u_int8_t bits)
 {
 	u_char want_clear;
 	bits = ~bits;
 
 	if ( ((cmd->arg1 & 0xff) & bits) != 0)
 		return 0; /* some bits we want set were clear */
 	want_clear = (cmd->arg1 >> 8) & 0xff;
 	if ( (want_clear & bits) != want_clear)
 		return 0; /* some bits we want clear were set */
 	return 1;
 }
 
 static int
 ipopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	u_char *cp = (u_char *)(ip + 1);
 	int x = (ip->ip_hl << 2) - sizeof (struct ip);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[IPOPT_OPTVAL];
 
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[IPOPT_OLEN];
 			if (optlen <= 0 || optlen > x)
 				return 0; /* invalid or truncated */
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		case IPOPT_LSRR:
 			bits |= IP_FW_IPOPT_LSRR;
 			break;
 
 		case IPOPT_SSRR:
 			bits |= IP_FW_IPOPT_SSRR;
 			break;
 
 		case IPOPT_RR:
 			bits |= IP_FW_IPOPT_RR;
 			break;
 
 		case IPOPT_TS:
 			bits |= IP_FW_IPOPT_TS;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 tcpopts_match(struct ip *ip, ipfw_insn *cmd)
 {
 	int optlen, bits = 0;
 	struct tcphdr *tcp = L3HDR(struct tcphdr,ip);
 	u_char *cp = (u_char *)(tcp + 1);
 	int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
 
 	for (; x > 0; x -= optlen, cp += optlen) {
 		int opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			optlen = cp[1];
 			if (optlen <= 0)
 				break;
 		}
 
 		switch (opt) {
 
 		default:
 			break;
 
 		case TCPOPT_MAXSEG:
 			bits |= IP_FW_TCPOPT_MSS;
 			break;
 
 		case TCPOPT_WINDOW:
 			bits |= IP_FW_TCPOPT_WINDOW;
 			break;
 
 		case TCPOPT_SACK_PERMITTED:
 		case TCPOPT_SACK:
 			bits |= IP_FW_TCPOPT_SACK;
 			break;
 
 		case TCPOPT_TIMESTAMP:
 			bits |= IP_FW_TCPOPT_TS;
 			break;
 
 		case TCPOPT_CC:
 		case TCPOPT_CCNEW:
 		case TCPOPT_CCECHO:
 			bits |= IP_FW_TCPOPT_CC;
 			break;
 		}
 	}
 	return (flags_match(cmd, bits));
 }
 
 static int
 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
 {
 	if (ifp == NULL)	/* no iface with this packet, match fails */
 		return 0;
 	/* Check by name or by IP address */
 	if (cmd->name[0] != '\0') { /* match by name */
 		/* Check name */
 		if (cmd->p.glob) {
 			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
 				return(1);
 		} else {
 			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
 				return(1);
 		}
 	} else {
 		struct ifaddr *ia;
 
 		/* XXX lock? */
 		TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
 			if (ia->ifa_addr == NULL)
 				continue;
 			if (ia->ifa_addr->sa_family != AF_INET)
 				continue;
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr)
 				return(1);	/* match */
 		}
 	}
 	return(0);	/* no match, fail ... */
 }
 
 /*
  * The 'verrevpath' option checks that the interface that an IP packet
  * arrives on is the same interface that traffic destined for the
  * packet's source address would be routed out of. This is a measure
  * to block forged packets. This is also commonly known as "anti-spoofing"
  * or Unicast Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The
  * name of the knob is purposely reminisent of the Cisco IOS command,
  *
  *   ip verify unicast reverse-path
  *
  * which implements the same functionality. But note that syntax is
  * misleading. The check may be performed on all IP packets whether unicast,
  * multicast, or broadcast.
  */
 static int
 verify_rev_path(struct in_addr src, struct ifnet *ifp)
 {
 	struct route ro;
 	struct sockaddr_in *dst;
 
 	bzero(&ro, sizeof(ro));
 
 	dst = (struct sockaddr_in *)&(ro.ro_dst);
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_addr = src;
 	rtalloc_ign(&ro, RTF_CLONING);
 
 	if (ro.ro_rt == NULL)
 		return 0;
 	if ((ifp == NULL) || (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) {
 		RTFREE(ro.ro_rt);
 		return 0;
 	}
 	RTFREE(ro.ro_rt);
 	return 1;
 }
 
 
 static u_int64_t norule_counter;	/* counter for ipfw_log(NULL...) */
 
 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
 #define SNP(buf) buf, sizeof(buf)
 
 /*
  * We enter here when we have a rule with O_LOG.
  * XXX this function alone takes about 2Kbytes of code!
  */
 static void
 ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh,
 	struct mbuf *m, struct ifnet *oif)
 {
 	char *action;
 	int limit_reached = 0;
 	char action2[40], proto[48], fragment[28];
 
 	fragment[0] = '\0';
 	proto[0] = '\0';
 
 	if (f == NULL) {	/* bogus pkt */
 		if (verbose_limit != 0 && norule_counter >= verbose_limit)
 			return;
 		norule_counter++;
 		if (norule_counter == verbose_limit)
 			limit_reached = verbose_limit;
 		action = "Refuse";
 	} else {	/* O_LOG is the first action, find the real one */
 		ipfw_insn *cmd = ACTION_PTR(f);
 		ipfw_insn_log *l = (ipfw_insn_log *)cmd;
 
 		if (l->max_log != 0 && l->log_left == 0)
 			return;
 		l->log_left--;
 		if (l->log_left == 0)
 			limit_reached = l->max_log;
 		cmd += F_LEN(cmd);	/* point to first action */
 		if (cmd->opcode == O_PROB)
 			cmd += F_LEN(cmd);
 
 		action = action2;
 		switch (cmd->opcode) {
 		case O_DENY:
 			action = "Deny";
 			break;
 
 		case O_REJECT:
 			if (cmd->arg1==ICMP_REJECT_RST)
 				action = "Reset";
 			else if (cmd->arg1==ICMP_UNREACH_HOST)
 				action = "Reject";
 			else
 				snprintf(SNPARGS(action2, 0), "Unreach %d",
 					cmd->arg1);
 			break;
 
 		case O_ACCEPT:
 			action = "Accept";
 			break;
 		case O_COUNT:
 			action = "Count";
 			break;
 		case O_DIVERT:
 			snprintf(SNPARGS(action2, 0), "Divert %d",
 				cmd->arg1);
 			break;
 		case O_TEE:
 			snprintf(SNPARGS(action2, 0), "Tee %d",
 				cmd->arg1);
 			break;
 		case O_SKIPTO:
 			snprintf(SNPARGS(action2, 0), "SkipTo %d",
 				cmd->arg1);
 			break;
 		case O_PIPE:
 			snprintf(SNPARGS(action2, 0), "Pipe %d",
 				cmd->arg1);
 			break;
 		case O_QUEUE:
 			snprintf(SNPARGS(action2, 0), "Queue %d",
 				cmd->arg1);
 			break;
 		case O_FORWARD_IP: {
 			ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
 			int len;
 
 			len = snprintf(SNPARGS(action2, 0), "Forward to %s",
 				inet_ntoa(sa->sa.sin_addr));
 			if (sa->sa.sin_port)
 				snprintf(SNPARGS(action2, len), ":%d",
 				    sa->sa.sin_port);
 			}
 			break;
 		default:
 			action = "UNKNOWN";
 			break;
 		}
 	}
 
 	if (hlen == 0) {	/* non-ip */
 		snprintf(SNPARGS(proto, 0), "MAC");
 	} else {
 		struct ip *ip = mtod(m, struct ip *);
 		/* these three are all aliases to the same thing */
 		struct icmp *const icmp = L3HDR(struct icmp, ip);
 		struct tcphdr *const tcp = (struct tcphdr *)icmp;
 		struct udphdr *const udp = (struct udphdr *)icmp;
 
 		int ip_off, offset, ip_len;
 
 		int len;
 
 		if (eh != NULL) { /* layer 2 packets are as on the wire */
 			ip_off = ntohs(ip->ip_off);
 			ip_len = ntohs(ip->ip_len);
 		} else {
 			ip_off = ip->ip_off;
 			ip_len = ip->ip_len;
 		}
 		offset = ip_off & IP_OFFMASK;
 		switch (ip->ip_p) {
 		case IPPROTO_TCP:
 			len = snprintf(SNPARGS(proto, 0), "TCP %s",
 			    inet_ntoa(ip->ip_src));
 			if (offset == 0)
 				snprintf(SNPARGS(proto, len), ":%d %s:%d",
 				    ntohs(tcp->th_sport),
 				    inet_ntoa(ip->ip_dst),
 				    ntohs(tcp->th_dport));
 			else
 				snprintf(SNPARGS(proto, len), " %s",
 				    inet_ntoa(ip->ip_dst));
 			break;
 
 		case IPPROTO_UDP:
 			len = snprintf(SNPARGS(proto, 0), "UDP %s",
 				inet_ntoa(ip->ip_src));
 			if (offset == 0)
 				snprintf(SNPARGS(proto, len), ":%d %s:%d",
 				    ntohs(udp->uh_sport),
 				    inet_ntoa(ip->ip_dst),
 				    ntohs(udp->uh_dport));
 			else
 				snprintf(SNPARGS(proto, len), " %s",
 				    inet_ntoa(ip->ip_dst));
 			break;
 
 		case IPPROTO_ICMP:
 			if (offset == 0)
 				len = snprintf(SNPARGS(proto, 0),
 				    "ICMP:%u.%u ",
 				    icmp->icmp_type, icmp->icmp_code);
 			else
 				len = snprintf(SNPARGS(proto, 0), "ICMP ");
 			len += snprintf(SNPARGS(proto, len), "%s",
 			    inet_ntoa(ip->ip_src));
 			snprintf(SNPARGS(proto, len), " %s",
 			    inet_ntoa(ip->ip_dst));
 			break;
 
 		default:
 			len = snprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p,
 			    inet_ntoa(ip->ip_src));
 			snprintf(SNPARGS(proto, len), " %s",
 			    inet_ntoa(ip->ip_dst));
 			break;
 		}
 
 		if (ip_off & (IP_MF | IP_OFFMASK))
 			snprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)",
 			     ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2),
 			     offset << 3,
 			     (ip_off & IP_MF) ? "+" : "");
 	}
 	if (oif || m->m_pkthdr.rcvif)
 		log(LOG_SECURITY | LOG_INFO,
 		    "ipfw: %d %s %s %s via %s%s\n",
 		    f ? f->rulenum : -1,
 		    action, proto, oif ? "out" : "in",
 		    oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
 		    fragment);
 	else
 		log(LOG_SECURITY | LOG_INFO,
 		    "ipfw: %d %s %s [no if info]%s\n",
 		    f ? f->rulenum : -1,
 		    action, proto, fragment);
 	if (limit_reached)
 		log(LOG_SECURITY | LOG_NOTICE,
 		    "ipfw: limit %d reached on entry %d\n",
 		    limit_reached, f ? f->rulenum : -1);
 }
 
 /*
  * IMPORTANT: the hash function for dynamic rules must be commutative
  * in source and destination (ip,port), because rules are bidirectional
  * and we want to find both in the same bucket.
  */
 static __inline int
 hash_packet(struct ipfw_flow_id *id)
 {
 	u_int32_t i;
 
 	i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port);
 	i &= (curr_dyn_buckets - 1);
 	return i;
 }
 
 /**
  * unlink a dynamic rule from a chain. prev is a pointer to
  * the previous one, q is a pointer to the rule to delete,
  * head is a pointer to the head of the queue.
  * Modifies q and potentially also head.
  */
 #define UNLINK_DYN_RULE(prev, head, q) {				\
 	ipfw_dyn_rule *old_q = q;					\
 									\
 	/* remove a refcount to the parent */				\
 	if (q->dyn_type == O_LIMIT)					\
 		q->parent->count--;					\
 	DEB(printf("ipfw: unlink entry 0x%08x %d -> 0x%08x %d, %d left\n",\
 		(q->id.src_ip), (q->id.src_port),			\
 		(q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); )	\
 	if (prev != NULL)						\
 		prev->next = q = q->next;				\
 	else								\
 		head = q = q->next;					\
 	dyn_count--;							\
 	free(old_q, M_IPFW); }
 
 #define TIME_LEQ(a,b)       ((int)((a)-(b)) <= 0)
 
 /**
  * Remove dynamic rules pointing to "rule", or all of them if rule == NULL.
  *
  * If keep_me == NULL, rules are deleted even if not expired,
  * otherwise only expired rules are removed.
  *
  * The value of the second parameter is also used to point to identify
  * a rule we absolutely do not want to remove (e.g. because we are
  * holding a reference to it -- this is the case with O_LIMIT_PARENT
  * rules). The pointer is only used for comparison, so any non-null
  * value will do.
  */
 static void
 remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me)
 {
 	static u_int32_t last_remove = 0;
 
 #define FORCE (keep_me == NULL)
 
 	ipfw_dyn_rule *prev, *q;
 	int i, pass = 0, max_pass = 0;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL || dyn_count == 0)
 		return;
 	/* do not expire more than once per second, it is useless */
 	if (!FORCE && last_remove == time_second)
 		return;
 	last_remove = time_second;
 
 	/*
 	 * because O_LIMIT refer to parent rules, during the first pass only
 	 * remove child and mark any pending LIMIT_PARENT, and remove
 	 * them in a second pass.
 	 */
 next_pass:
 	for (i = 0 ; i < curr_dyn_buckets ; i++) {
 		for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) {
 			/*
 			 * Logic can become complex here, so we split tests.
 			 */
 			if (q == keep_me)
 				goto next;
 			if (rule != NULL && rule != q->rule)
 				goto next; /* not the one we are looking for */
 			if (q->dyn_type == O_LIMIT_PARENT) {
 				/*
 				 * handle parent in the second pass,
 				 * record we need one.
 				 */
 				max_pass = 1;
 				if (pass == 0)
 					goto next;
 				if (FORCE && q->count != 0 ) {
 					/* XXX should not happen! */
 					printf("ipfw: OUCH! cannot remove rule,"
 					     " count %d\n", q->count);
 				}
 			} else {
 				if (!FORCE &&
 				    !TIME_LEQ( q->expire, time_second ))
 					goto next;
 			}
              if (q->dyn_type != O_LIMIT_PARENT || !q->count) {
                      UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q);
                      continue;
              }
 next:
 			prev=q;
 			q=q->next;
 		}
 	}
 	if (pass++ < max_pass)
 		goto next_pass;
 }
 
 
 /**
  * lookup a dynamic rule.
  */
 static ipfw_dyn_rule *
 lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction,
 	struct tcphdr *tcp)
 {
 	/*
 	 * stateful ipfw extensions.
 	 * Lookup into dynamic session queue
 	 */
 #define MATCH_REVERSE	0
 #define MATCH_FORWARD	1
 #define MATCH_NONE	2
 #define MATCH_UNKNOWN	3
 	int i, dir = MATCH_NONE;
 	ipfw_dyn_rule *prev, *q=NULL;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL)
 		goto done;	/* not found */
 	i = hash_packet( pkt );
 	for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) {
 		if (q->dyn_type == O_LIMIT_PARENT && q->count)
 			goto next;
 		if (TIME_LEQ( q->expire, time_second)) { /* expire entry */
 			UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q);
 			continue;
 		}
 		if (pkt->proto == q->id.proto &&
 		    q->dyn_type != O_LIMIT_PARENT) {
 			if (pkt->src_ip == q->id.src_ip &&
 			    pkt->dst_ip == q->id.dst_ip &&
 			    pkt->src_port == q->id.src_port &&
 			    pkt->dst_port == q->id.dst_port ) {
 				dir = MATCH_FORWARD;
 				break;
 			}
 			if (pkt->src_ip == q->id.dst_ip &&
 			    pkt->dst_ip == q->id.src_ip &&
 			    pkt->src_port == q->id.dst_port &&
 			    pkt->dst_port == q->id.src_port ) {
 				dir = MATCH_REVERSE;
 				break;
 			}
 		}
 next:
 		prev = q;
 		q = q->next;
 	}
 	if (q == NULL)
 		goto done; /* q = NULL, not found */
 
 	if ( prev != NULL) { /* found and not in front */
 		prev->next = q->next;
 		q->next = ipfw_dyn_v[i];
 		ipfw_dyn_v[i] = q;
 	}
 	if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
 		u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST);
 
 #define BOTH_SYN	(TH_SYN | (TH_SYN << 8))
 #define BOTH_FIN	(TH_FIN | (TH_FIN << 8))
 		q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8);
 		switch (q->state) {
 		case TH_SYN:				/* opening */
 			q->expire = time_second + dyn_syn_lifetime;
 			break;
 
 		case BOTH_SYN:			/* move to established */
 		case BOTH_SYN | TH_FIN :	/* one side tries to close */
 		case BOTH_SYN | (TH_FIN << 8) :
  			if (tcp) {
 #define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0)
 			    u_int32_t ack = ntohl(tcp->th_ack);
 			    if (dir == MATCH_FORWARD) {
 				if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd))
 				    q->ack_fwd = ack;
 				else { /* ignore out-of-sequence */
 				    break;
 				}
 			    } else {
 				if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev))
 				    q->ack_rev = ack;
 				else { /* ignore out-of-sequence */
 				    break;
 				}
 			    }
 			}
 			q->expire = time_second + dyn_ack_lifetime;
 			break;
 
 		case BOTH_SYN | BOTH_FIN:	/* both sides closed */
 			if (dyn_fin_lifetime >= dyn_keepalive_period)
 				dyn_fin_lifetime = dyn_keepalive_period - 1;
 			q->expire = time_second + dyn_fin_lifetime;
 			break;
 
 		default:
 #if 0
 			/*
 			 * reset or some invalid combination, but can also
 			 * occur if we use keep-state the wrong way.
 			 */
 			if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0)
 				printf("invalid state: 0x%x\n", q->state);
 #endif
 			if (dyn_rst_lifetime >= dyn_keepalive_period)
 				dyn_rst_lifetime = dyn_keepalive_period - 1;
 			q->expire = time_second + dyn_rst_lifetime;
 			break;
 		}
 	} else if (pkt->proto == IPPROTO_UDP) {
 		q->expire = time_second + dyn_udp_lifetime;
 	} else {
 		/* other protocols */
 		q->expire = time_second + dyn_short_lifetime;
 	}
 done:
 	if (match_direction)
 		*match_direction = dir;
 	return q;
 }
 
 static ipfw_dyn_rule *
 lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
 	struct tcphdr *tcp)
 {
 	ipfw_dyn_rule *q;
 
 	IPFW_DYN_LOCK();
 	q = lookup_dyn_rule_locked(pkt, match_direction, tcp);
 	if (q == NULL)
 		IPFW_DYN_UNLOCK();
 	/* NB: return table locked when q is not NULL */
 	return q;
 }
 
 static void
 realloc_dynamic_table(void)
 {
 	IPFW_DYN_LOCK_ASSERT();
 
 	/*
 	 * Try reallocation, make sure we have a power of 2 and do
 	 * not allow more than 64k entries. In case of overflow,
 	 * default to 1024.
 	 */
 
 	if (dyn_buckets > 65536)
 		dyn_buckets = 1024;
 	if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */
 		dyn_buckets = curr_dyn_buckets; /* reset */
 		return;
 	}
 	curr_dyn_buckets = dyn_buckets;
 	if (ipfw_dyn_v != NULL)
 		free(ipfw_dyn_v, M_IPFW);
 	for (;;) {
 		ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *),
 		       M_IPFW, M_NOWAIT | M_ZERO);
 		if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2)
 			break;
 		curr_dyn_buckets /= 2;
 	}
 }
 
 /**
  * Install state of type 'type' for a dynamic session.
  * The hash table contains two type of rules:
  * - regular rules (O_KEEP_STATE)
  * - rules for sessions with limited number of sess per user
  *   (O_LIMIT). When they are created, the parent is
  *   increased by 1, and decreased on delete. In this case,
  *   the third parameter is the parent rule and not the chain.
  * - "parent" rules for the above (O_LIMIT_PARENT).
  */
 static ipfw_dyn_rule *
 add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule)
 {
 	ipfw_dyn_rule *r;
 	int i;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v == NULL ||
 	    (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) {
 		realloc_dynamic_table();
 		if (ipfw_dyn_v == NULL)
 			return NULL; /* failed ! */
 	}
 	i = hash_packet(id);
 
 	r = malloc(sizeof *r, M_IPFW, M_NOWAIT | M_ZERO);
 	if (r == NULL) {
 		printf ("ipfw: sorry cannot allocate state\n");
 		return NULL;
 	}
 
 	/* increase refcount on parent, and set pointer */
 	if (dyn_type == O_LIMIT) {
 		ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule;
 		if ( parent->dyn_type != O_LIMIT_PARENT)
 			panic("invalid parent");
 		parent->count++;
 		r->parent = parent;
 		rule = parent->rule;
 	}
 
 	r->id = *id;
 	r->expire = time_second + dyn_syn_lifetime;
 	r->rule = rule;
 	r->dyn_type = dyn_type;
 	r->pcnt = r->bcnt = 0;
 	r->count = 0;
 
 	r->bucket = i;
 	r->next = ipfw_dyn_v[i];
 	ipfw_dyn_v[i] = r;
 	dyn_count++;
 	DEB(printf("ipfw: add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n",
 	   dyn_type,
 	   (r->id.src_ip), (r->id.src_port),
 	   (r->id.dst_ip), (r->id.dst_port),
 	   dyn_count ); )
 	return r;
 }
 
 /**
  * lookup dynamic parent rule using pkt and rule as search keys.
  * If the lookup fails, then install one.
  */
 static ipfw_dyn_rule *
 lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule)
 {
 	ipfw_dyn_rule *q;
 	int i;
 
 	IPFW_DYN_LOCK_ASSERT();
 
 	if (ipfw_dyn_v) {
 		i = hash_packet( pkt );
 		for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next)
 			if (q->dyn_type == O_LIMIT_PARENT &&
 			    rule== q->rule &&
 			    pkt->proto == q->id.proto &&
 			    pkt->src_ip == q->id.src_ip &&
 			    pkt->dst_ip == q->id.dst_ip &&
 			    pkt->src_port == q->id.src_port &&
 			    pkt->dst_port == q->id.dst_port) {
 				q->expire = time_second + dyn_short_lifetime;
 				DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);)
 				return q;
 			}
 	}
 	return add_dyn_rule(pkt, O_LIMIT_PARENT, rule);
 }
 
 /**
  * Install dynamic state for rule type cmd->o.opcode
  *
  * Returns 1 (failure) if state is not installed because of errors or because
  * session limitations are enforced.
  */
 static int
 install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
 	struct ip_fw_args *args)
 {
 	static int last_log;
 
 	ipfw_dyn_rule *q;
 
 	DEB(printf("ipfw: install state type %d 0x%08x %u -> 0x%08x %u\n",
 	    cmd->o.opcode,
 	    (args->f_id.src_ip), (args->f_id.src_port),
 	    (args->f_id.dst_ip), (args->f_id.dst_port) );)
 
 	IPFW_DYN_LOCK();
 
 	q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
 
 	if (q != NULL) { /* should never occur */
 		if (last_log != time_second) {
 			last_log = time_second;
 			printf("ipfw: install_state: entry already present, done\n");
 		}
 		IPFW_DYN_UNLOCK();
 		return 0;
 	}
 
 	if (dyn_count >= dyn_max)
 		/*
 		 * Run out of slots, try to remove any expired rule.
 		 */
 		remove_dyn_rule(NULL, (ipfw_dyn_rule *)1);
 
 	if (dyn_count >= dyn_max) {
 		if (last_log != time_second) {
 			last_log = time_second;
 			printf("ipfw: install_state: Too many dynamic rules\n");
 		}
 		IPFW_DYN_UNLOCK();
 		return 1; /* cannot install, notify caller */
 	}
 
 	switch (cmd->o.opcode) {
 	case O_KEEP_STATE: /* bidir rule */
 		add_dyn_rule(&args->f_id, O_KEEP_STATE, rule);
 		break;
 
 	case O_LIMIT: /* limit number of sessions */
 	    {
 		u_int16_t limit_mask = cmd->limit_mask;
 		struct ipfw_flow_id id;
 		ipfw_dyn_rule *parent;
 
 		DEB(printf("ipfw: installing dyn-limit rule %d\n",
 		    cmd->conn_limit);)
 
 		id.dst_ip = id.src_ip = 0;
 		id.dst_port = id.src_port = 0;
 		id.proto = args->f_id.proto;
 
 		if (limit_mask & DYN_SRC_ADDR)
 			id.src_ip = args->f_id.src_ip;
 		if (limit_mask & DYN_DST_ADDR)
 			id.dst_ip = args->f_id.dst_ip;
 		if (limit_mask & DYN_SRC_PORT)
 			id.src_port = args->f_id.src_port;
 		if (limit_mask & DYN_DST_PORT)
 			id.dst_port = args->f_id.dst_port;
 		parent = lookup_dyn_parent(&id, rule);
 		if (parent == NULL) {
 			printf("ipfw: add parent failed\n");
 			return 1;
 		}
 		if (parent->count >= cmd->conn_limit) {
 			/*
 			 * See if we can remove some expired rule.
 			 */
 			remove_dyn_rule(rule, parent);
 			if (parent->count >= cmd->conn_limit) {
 				if (fw_verbose && last_log != time_second) {
 					last_log = time_second;
 					log(LOG_SECURITY | LOG_DEBUG,
 					    "drop session, too many entries\n");
 				}
 				IPFW_DYN_UNLOCK();
 				return 1;
 			}
 		}
 		add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent);
 	    }
 		break;
 	default:
 		printf("ipfw: unknown dynamic rule type %u\n", cmd->o.opcode);
 		IPFW_DYN_UNLOCK();
 		return 1;
 	}
 	lookup_dyn_rule_locked(&args->f_id, NULL, NULL); /* XXX just set lifetime */
 	IPFW_DYN_UNLOCK();
 	return 0;
 }
 
 /*
  * Transmit a TCP packet, containing either a RST or a keepalive.
  * When flags & TH_RST, we are sending a RST packet, because of a
  * "reset" action matched the packet.
  * Otherwise we are sending a keepalive, and flags & TH_
  */
 static void
 send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags)
 {
 	struct mbuf *m;
 	struct ip *ip;
 	struct tcphdr *tcp;
 
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
 	if (m == 0)
 		return;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
 	m->m_data += max_linkhdr;
 
 	ip = mtod(m, struct ip *);
 	bzero(ip, m->m_len);
 	tcp = (struct tcphdr *)(ip + 1); /* no IP options */
 	ip->ip_p = IPPROTO_TCP;
 	tcp->th_off = 5;
 	/*
 	 * Assume we are sending a RST (or a keepalive in the reverse
 	 * direction), swap src and destination addresses and ports.
 	 */
 	ip->ip_src.s_addr = htonl(id->dst_ip);
 	ip->ip_dst.s_addr = htonl(id->src_ip);
 	tcp->th_sport = htons(id->dst_port);
 	tcp->th_dport = htons(id->src_port);
 	if (flags & TH_RST) {	/* we are sending a RST */
 		if (flags & TH_ACK) {
 			tcp->th_seq = htonl(ack);
 			tcp->th_ack = htonl(0);
 			tcp->th_flags = TH_RST;
 		} else {
 			if (flags & TH_SYN)
 				seq++;
 			tcp->th_seq = htonl(0);
 			tcp->th_ack = htonl(seq);
 			tcp->th_flags = TH_RST | TH_ACK;
 		}
 	} else {
 		/*
 		 * We are sending a keepalive. flags & TH_SYN determines
 		 * the direction, forward if set, reverse if clear.
 		 * NOTE: seq and ack are always assumed to be correct
 		 * as set by the caller. This may be confusing...
 		 */
 		if (flags & TH_SYN) {
 			/*
 			 * we have to rewrite the correct addresses!
 			 */
 			ip->ip_dst.s_addr = htonl(id->dst_ip);
 			ip->ip_src.s_addr = htonl(id->src_ip);
 			tcp->th_dport = htons(id->dst_port);
 			tcp->th_sport = htons(id->src_port);
 		}
 		tcp->th_seq = htonl(seq);
 		tcp->th_ack = htonl(ack);
 		tcp->th_flags = TH_ACK;
 	}
 	/*
 	 * set ip_len to the payload size so we can compute
 	 * the tcp checksum on the pseudoheader
 	 * XXX check this, could save a couple of words ?
 	 */
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	tcp->th_sum = in_cksum(m, m->m_pkthdr.len);
 	/*
 	 * now fill fields left out earlier
 	 */
 	ip->ip_ttl = ip_defttl;
 	ip->ip_len = m->m_pkthdr.len;
 	m->m_flags |= M_SKIP_FIREWALL;
 	ip_output(m, NULL, NULL, 0, NULL, NULL);
 }
 
 /*
  * sends a reject message, consuming the mbuf passed as an argument.
  */
 static void
 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len)
 {
 
 	if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
 		/* We need the IP header in host order for icmp_error(). */
 		if (args->eh != NULL) {
 			struct ip *ip = mtod(args->m, struct ip *);
 			ip->ip_len = ntohs(ip->ip_len);
 			ip->ip_off = ntohs(ip->ip_off);
 		}
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
 		    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
 		if ( (tcp->th_flags & TH_RST) == 0)
 			send_pkt(&(args->f_id), ntohl(tcp->th_seq),
 				ntohl(tcp->th_ack),
 				tcp->th_flags | TH_RST);
 		m_freem(args->m);
 	} else
 		m_freem(args->m);
 	args->m = NULL;
 }
 
 /**
  *
  * Given an ip_fw *, lookup_next_rule will return a pointer
  * to the next rule, which can be either the jump
  * target (for skipto instructions) or the next one in the list (in
  * all other cases including a missing jump target).
  * The result is also written in the "next_rule" field of the rule.
  * Backward jumps are not allowed, so start looking from the next
  * rule...
  *
  * This never returns NULL -- in case we do not have an exact match,
  * the next rule is returned. When the ruleset is changed,
  * pointers are flushed so we are always correct.
  */
 
 static struct ip_fw *
 lookup_next_rule(struct ip_fw *me)
 {
 	struct ip_fw *rule = NULL;
 	ipfw_insn *cmd;
 
 	/* look for action, in case it is a skipto */
 	cmd = ACTION_PTR(me);
 	if (cmd->opcode == O_LOG)
 		cmd += F_LEN(cmd);
 	if ( cmd->opcode == O_SKIPTO )
 		for (rule = me->next; rule ; rule = rule->next)
 			if (rule->rulenum >= cmd->arg1)
 				break;
 	if (rule == NULL)			/* failure or not a skipto */
 		rule = me->next;
 	me->next_rule = rule;
 	return rule;
 }
 
 static int
 check_uidgid(ipfw_insn_u32 *insn,
 	int proto, struct ifnet *oif,
 	struct in_addr dst_ip, u_int16_t dst_port,
 	struct in_addr src_ip, u_int16_t src_port)
 {
 	struct inpcbinfo *pi;
 	int wildcard;
 	struct inpcb *pcb;
 	int match;
 
 	if (proto == IPPROTO_TCP) {
 		wildcard = 0;
 		pi = &tcbinfo;
 	} else if (proto == IPPROTO_UDP) {
 		wildcard = 1;
 		pi = &udbinfo;
 	} else
 		return 0;
 
 	match = 0;
 
 	INP_INFO_RLOCK(pi);	/* XXX LOR with IPFW */
 	pcb =  (oif) ?
 		in_pcblookup_hash(pi,
 		    dst_ip, htons(dst_port),
 		    src_ip, htons(src_port),
 		    wildcard, oif) :
 		in_pcblookup_hash(pi,
 		    src_ip, htons(src_port),
 		    dst_ip, htons(dst_port),
 		    wildcard, NULL);
 	if (pcb != NULL) {
 		INP_LOCK(pcb);
 		if (pcb->inp_socket != NULL) {
 #if __FreeBSD_version < 500034
 #define socheckuid(a,b)	((a)->so_cred->cr_uid != (b))
 #endif
 			if (insn->o.opcode == O_UID) {
 				match = !socheckuid(pcb->inp_socket,
 				   (uid_t)insn->d[0]);
 			} else  {
 				match = groupmember((uid_t)insn->d[0],
 				    pcb->inp_socket->so_cred);
 			}
 		}
 		INP_UNLOCK(pcb);
 	}
 	INP_INFO_RUNLOCK(pi);
 
 	return match;
 }
 
 /*
  * The main check routine for the firewall.
  *
  * All arguments are in args so we can modify them and return them
  * back to the caller.
  *
  * Parameters:
  *
  *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
  *		Starts with the IP header.
  *	args->eh (in)	Mac header if present, or NULL for layer3 packet.
  *	args->oif	Outgoing interface, or NULL if packet is incoming.
  *		The incoming interface is in the mbuf. (in)
  *	args->divert_rule (in/out)
  *		Skip up to the first rule past this rule number;
  *		upon return, non-zero port number for divert or tee.
  *
  *	args->rule	Pointer to the last matching rule (in/out)
  *	args->next_hop	Socket we are forwarding to (out).
  *	args->f_id	Addresses grabbed from the packet (out)
  *
  * Return value:
  *
  *	IP_FW_PORT_DENY_FLAG	the packet must be dropped.
  *	0	The packet is to be accepted and routed normally OR
  *      	the packet was denied/rejected and has been dropped;
  *		in the latter case, *m is equal to NULL upon return.
  *	port	Divert the packet to port, with these caveats:
  *
  *		- If IP_FW_PORT_TEE_FLAG is set, tee the packet instead
  *		  of diverting it (ie, 'ipfw tee').
  *
  *		- If IP_FW_PORT_DYNT_FLAG is set, interpret the lower
  *		  16 bits as a dummynet pipe number instead of diverting
  */
 
 static int
 ipfw_chk(struct ip_fw_args *args)
 {
 	/*
 	 * Local variables hold state during the processing of a packet.
 	 *
 	 * IMPORTANT NOTE: to speed up the processing of rules, there
 	 * are some assumption on the values of the variables, which
 	 * are documented here. Should you change them, please check
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
 	 * args->eh	The MAC header. It is non-null for a layer2
 	 *	packet, it is NULL for a layer-3 packet.
 	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
 	 * ip	is simply an alias of the value of m, and it is kept
 	 *	in sync with it (the packet is	supposed to start with
 	 *	the ip header).
 	 */
 	struct mbuf *m = args->m;
 	struct ip *ip = mtod(m, struct ip *);
 
 	/*
 	 * oif | args->oif	If NULL, ipfw_chk has been called on the
 	 *	inbound path (ether_input, bdg_forward, ip_input).
 	 *	If non-NULL, ipfw_chk has been called on the outbound path
 	 *	(ether_output, ip_output).
 	 */
 	struct ifnet *oif = args->oif;
 
 	struct ip_fw *f = NULL;		/* matching rule */
 	int retval = 0;
 
 	/*
 	 * hlen	The length of the IPv4 header.
 	 *	hlen >0 means we have an IPv4 packet.
 	 */
 	u_int hlen = 0;		/* hlen >0 means we have an IP pkt */
 
 	/*
 	 * offset	The offset of a fragment. offset != 0 means that
 	 *	we have a fragment at this offset of an IPv4 packet.
 	 *	offset == 0 means that (if this is an IPv4 packet)
 	 *	this is the first or only fragment.
 	 */
 	u_short offset = 0;
 
 	/*
 	 * Local copies of addresses. They are only valid if we have
 	 * an IP packet.
 	 *
 	 * proto	The protocol. Set to 0 for non-ip packets,
 	 *	or to the protocol read from the packet otherwise.
 	 *	proto != 0 means that we have an IPv4 packet.
 	 *
 	 * src_port, dst_port	port numbers, in HOST format. Only
 	 *	valid for TCP and UDP packets.
 	 *
 	 * src_ip, dst_ip	ip addresses, in NETWORK format.
 	 *	Only valid for IPv4 packets.
 	 */
 	u_int8_t proto;
 	u_int16_t src_port = 0, dst_port = 0;	/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	u_int16_t ip_len=0;
 	int pktlen;
 	int dyn_dir = MATCH_UNKNOWN;
 	ipfw_dyn_rule *q = NULL;
 	struct ip_fw_chain *chain = &layer3_chain;
 
 	if (m->m_flags & M_SKIP_FIREWALL)
 		return 0;	/* accept */
 	/*
 	 * dyn_dir = MATCH_UNKNOWN when rules unchecked,
 	 * 	MATCH_NONE when checked and not matched (q = NULL),
 	 *	MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
 	 */
 
 	pktlen = m->m_pkthdr.len;
 	if (args->eh == NULL ||		/* layer 3 packet */
 		( m->m_pkthdr.len >= sizeof(struct ip) &&
 		    ntohs(args->eh->ether_type) == ETHERTYPE_IP))
 			hlen = ip->ip_hl << 2;
 
 	/*
 	 * Collect parameters into local variables for faster matching.
 	 */
 	if (hlen == 0) {	/* do not grab addresses for non-ip pkts */
 		proto = args->f_id.proto = 0;	/* mark f_id invalid */
 		goto after_ip_checks;
 	}
 
 	proto = args->f_id.proto = ip->ip_p;
 	src_ip = ip->ip_src;
 	dst_ip = ip->ip_dst;
 	if (args->eh != NULL) { /* layer 2 packets are as on the wire */
 		offset = ntohs(ip->ip_off) & IP_OFFMASK;
 		ip_len = ntohs(ip->ip_len);
 	} else {
 		offset = ip->ip_off & IP_OFFMASK;
 		ip_len = ip->ip_len;
 	}
 	pktlen = ip_len < pktlen ? ip_len : pktlen;
 
 #define PULLUP_TO(len)						\
 		do {						\
 			if ((m)->m_len < (len)) {		\
 			    args->m = m = m_pullup(m, (len));	\
 			    if (m == 0)				\
 				goto pullup_failed;		\
 			    ip = mtod(m, struct ip *);		\
 			}					\
 		} while (0)
 
 	if (offset == 0) {
 		switch (proto) {
 		case IPPROTO_TCP:
 		    {
 			struct tcphdr *tcp;
 
 			PULLUP_TO(hlen + sizeof(struct tcphdr));
 			tcp = L3HDR(struct tcphdr, ip);
 			dst_port = tcp->th_dport;
 			src_port = tcp->th_sport;
 			args->f_id.flags = tcp->th_flags;
 			}
 			break;
 
 		case IPPROTO_UDP:
 		    {
 			struct udphdr *udp;
 
 			PULLUP_TO(hlen + sizeof(struct udphdr));
 			udp = L3HDR(struct udphdr, ip);
 			dst_port = udp->uh_dport;
 			src_port = udp->uh_sport;
 			}
 			break;
 
 		case IPPROTO_ICMP:
 			PULLUP_TO(hlen + 4);	/* type, code and checksum. */
 			args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type;
 			break;
 
 		default:
 			break;
 		}
 #undef PULLUP_TO
 	}
 
 	args->f_id.src_ip = ntohl(src_ip.s_addr);
 	args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	args->f_id.src_port = src_port = ntohs(src_port);
 	args->f_id.dst_port = dst_port = ntohs(dst_port);
 
 after_ip_checks:
 	IPFW_LOCK(chain);		/* XXX expensive? can we run lock free? */
 	if (args->rule) {
 		/*
 		 * Packet has already been tagged. Look for the next rule
 		 * to restart processing.
 		 *
 		 * If fw_one_pass != 0 then just accept it.
 		 * XXX should not happen here, but optimized out in
 		 * the caller.
 		 */
 		if (fw_one_pass) {
 			IPFW_UNLOCK(chain);	/* XXX optimize */
 			return 0;
 		}
 
 		f = args->rule->next_rule;
 		if (f == NULL)
 			f = lookup_next_rule(args->rule);
 	} else {
 		/*
 		 * Find the starting rule. It can be either the first
 		 * one, or the one after divert_rule if asked so.
 		 */
 		int skipto = args->divert_rule;
 
 		f = chain->rules;
 		if (args->eh == NULL && skipto != 0) {
 			if (skipto >= IPFW_DEFAULT_RULE) {
 				IPFW_UNLOCK(chain);
 				return(IP_FW_PORT_DENY_FLAG); /* invalid */
 			}
 			while (f && f->rulenum <= skipto)
 				f = f->next;
 			if (f == NULL) {	/* drop packet */
 				IPFW_UNLOCK(chain);
 				return(IP_FW_PORT_DENY_FLAG);
 			}
 		}
 	}
 	args->divert_rule = 0;	/* reset to avoid confusion later */
 
 	/*
 	 * Now scan the rules, and parse microinstructions for each rule.
 	 */
 	for (; f; f = f->next) {
 		int l, cmdlen;
 		ipfw_insn *cmd;
 		int skip_or; /* skip rest of OR block */
 
 again:
 		if (set_disable & (1 << f->set) )
 			continue;
 
 		skip_or = 0;
 		for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
 		    l -= cmdlen, cmd += cmdlen) {
 			int match;
 
 			/*
 			 * check_body is a jump target used when we find a
 			 * CHECK_STATE, and need to jump to the body of
 			 * the target rule.
 			 */
 
 check_body:
 			cmdlen = F_LEN(cmd);
 			/*
 			 * An OR block (insn_1 || .. || insn_n) has the
 			 * F_OR bit set in all but the last instruction.
 			 * The first match will set "skip_or", and cause
 			 * the following instructions to be skipped until
 			 * past the one with the F_OR bit clear.
 			 */
 			if (skip_or) {		/* skip this instruction */
 				if ((cmd->len & F_OR) == 0)
 					skip_or = 0;	/* next one is good */
 				continue;
 			}
 			match = 0; /* set to 1 if we succeed */
 
 			switch (cmd->opcode) {
 			/*
 			 * The first set of opcodes compares the packet's
 			 * fields with some pattern, setting 'match' if a
 			 * match is found. At the end of the loop there is
 			 * logic to deal with F_NOT and F_OR flags associated
 			 * with the opcode.
 			 */
 			case O_NOP:
 				match = 1;
 				break;
 
 			case O_FORWARD_MAC:
 				printf("ipfw: opcode %d unimplemented\n",
 				    cmd->opcode);
 				break;
 
 			case O_GID:
 			case O_UID:
 				/*
 				 * We only check offset == 0 && proto != 0,
 				 * as this ensures that we have an IPv4
 				 * packet with the ports info.
 				 */
 				if (offset!=0)
 					break;
 				if (proto == IPPROTO_TCP ||
 				    proto == IPPROTO_UDP)
 					match = check_uidgid(
 						    (ipfw_insn_u32 *)cmd,
 						    proto, oif,
 						    dst_ip, dst_port,
 						    src_ip, src_port);
 				break;
 
 			case O_RECV:
 				match = iface_match(m->m_pkthdr.rcvif,
 				    (ipfw_insn_if *)cmd);
 				break;
 
 			case O_XMIT:
 				match = iface_match(oif, (ipfw_insn_if *)cmd);
 				break;
 
 			case O_VIA:
 				match = iface_match(oif ? oif :
 				    m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd);
 				break;
 
 			case O_MACADDR2:
 				if (args->eh != NULL) {	/* have MAC header */
 					u_int32_t *want = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
 					u_int32_t *hdr = (u_int32_t *)args->eh;
 
 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
 					      want[1] == (hdr[1] & mask[1]) &&
 					      want[2] == (hdr[2] & mask[2]) );
 				}
 				break;
 
 			case O_MAC_TYPE:
 				if (args->eh != NULL) {
 					u_int16_t t =
 					    ntohs(args->eh->ether_type);
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (t>=p[0] && t<=p[1]);
 				}
 				break;
 
 			case O_FRAG:
 				match = (hlen > 0 && offset != 0);
 				break;
 
 			case O_IN:	/* "out" is "not in" */
 				match = (oif == NULL);
 				break;
 
 			case O_LAYER2:
 				match = (args->eh != NULL);
 				break;
 
 			case O_PROTO:
 				/*
 				 * We do not allow an arg of 0 so the
 				 * check of "proto" only suffices.
 				 */
 				match = (proto == cmd->arg1);
 				break;
 
 			case O_IP_SRC:
 				match = (hlen > 0 &&
 				    ((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    src_ip.s_addr);
 				break;
 
 			case O_IP_SRC_MASK:
 			case O_IP_DST_MASK:
 				if (hlen > 0) {
 				    uint32_t a =
 					(cmd->opcode == O_IP_DST_MASK) ?
 					    dst_ip.s_addr : src_ip.s_addr;
 				    uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
 				    int i = cmdlen-1;
 
 				    for (; !match && i>0; i-= 2, p+= 2)
 					match = (p[0] == (a & p[1]));
 				}
 				break;
 
 			case O_IP_SRC_ME:
 				if (hlen > 0) {
 					struct ifnet *tif;
 
 					INADDR_TO_IFP(src_ip, tif);
 					match = (tif != NULL);
 				}
 				break;
 
 			case O_IP_DST_SET:
 			case O_IP_SRC_SET:
 				if (hlen > 0) {
 					u_int32_t *d = (u_int32_t *)(cmd+1);
 					u_int32_t addr =
 					    cmd->opcode == O_IP_DST_SET ?
 						args->f_id.dst_ip :
 						args->f_id.src_ip;
 
 					    if (addr < d[0])
 						    break;
 					    addr -= d[0]; /* subtract base */
 					    match = (addr < cmd->arg1) &&
 						( d[ 1 + (addr>>5)] &
 						  (1<<(addr & 0x1f)) );
 				}
 				break;
 
 			case O_IP_DST:
 				match = (hlen > 0 &&
 				    ((ipfw_insn_ip *)cmd)->addr.s_addr ==
 				    dst_ip.s_addr);
 				break;
 
 			case O_IP_DST_ME:
 				if (hlen > 0) {
 					struct ifnet *tif;
 
 					INADDR_TO_IFP(dst_ip, tif);
 					match = (tif != NULL);
 				}
 				break;
 
 			case O_IP_SRCPORT:
 			case O_IP_DSTPORT:
 				/*
 				 * offset == 0 && proto != 0 is enough
 				 * to guarantee that we have an IPv4
 				 * packet with port info.
 				 */
 				if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
 				    && offset == 0) {
 					u_int16_t x =
 					    (cmd->opcode == O_IP_SRCPORT) ?
 						src_port : dst_port ;
 					u_int16_t *p =
 					    ((ipfw_insn_u16 *)cmd)->ports;
 					int i;
 
 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
 						match = (x>=p[0] && x<=p[1]);
 				}
 				break;
 
 			case O_ICMPTYPE:
 				match = (offset == 0 && proto==IPPROTO_ICMP &&
 				    icmptype_match(ip, (ipfw_insn_u32 *)cmd) );
 				break;
 
 			case O_IPOPT:
 				match = (hlen > 0 && ipopts_match(ip, cmd) );
 				break;
 
 			case O_IPVER:
 				match = (hlen > 0 && cmd->arg1 == ip->ip_v);
 				break;
 
 			case O_IPID:
 			case O_IPLEN:
 			case O_IPTTL:
 				if (hlen > 0) {	/* only for IP packets */
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
 
 				    if (cmd->opcode == O_IPLEN)
 					x = ip_len;
 				    else if (cmd->opcode == O_IPTTL)
 					x = ip->ip_ttl;
 				    else /* must be IPID */
 					x = ntohs(ip->ip_id);
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
 				    }
 				    /* otherwise we have ranges */
 				    p = ((ipfw_insn_u16 *)cmd)->ports;
 				    i = cmdlen - 1;
 				    for (; !match && i>0; i--, p += 2)
 					match = (x >= p[0] && x <= p[1]);
 				}
 				break;
 
 			case O_IPPRECEDENCE:
 				match = (hlen > 0 &&
 				    (cmd->arg1 == (ip->ip_tos & 0xe0)) );
 				break;
 
 			case O_IPTOS:
 				match = (hlen > 0 &&
 				    flags_match(cmd, ip->ip_tos));
 				break;
 
 			case O_TCPFLAGS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    flags_match(cmd,
 					L3HDR(struct tcphdr,ip)->th_flags));
 				break;
 
 			case O_TCPOPTS:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    tcpopts_match(ip, cmd));
 				break;
 
 			case O_TCPSEQ:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					L3HDR(struct tcphdr,ip)->th_seq);
 				break;
 
 			case O_TCPACK:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    ((ipfw_insn_u32 *)cmd)->d[0] ==
 					L3HDR(struct tcphdr,ip)->th_ack);
 				break;
 
 			case O_TCPWIN:
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    cmd->arg1 ==
 					L3HDR(struct tcphdr,ip)->th_win);
 				break;
 
 			case O_ESTAB:
 				/* reject packets which have SYN only */
 				/* XXX should i also check for TH_ACK ? */
 				match = (proto == IPPROTO_TCP && offset == 0 &&
 				    (L3HDR(struct tcphdr,ip)->th_flags &
 				     (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
 				break;
 
 			case O_LOG:
 				if (fw_verbose)
 					ipfw_log(f, hlen, args->eh, m, oif);
 				match = 1;
 				break;
 
 			case O_PROB:
 				match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
 				break;
 
 			case O_VERREVPATH:
 				/* Outgoing packets automatically pass/match */
 				match = ((oif != NULL) ||
 				    (m->m_pkthdr.rcvif == NULL) ||
 				    verify_rev_path(src_ip, m->m_pkthdr.rcvif));
 				break;
 
 			case O_IPSEC:
 #ifdef FAST_IPSEC
 				match = (m_tag_find(m,
 				    PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
 #endif
 #ifdef IPSEC
 				match = (ipsec_getnhist(m) != NULL);
 #endif
 				/* otherwise no match */
 				break;
 
 			/*
 			 * The second set of opcodes represents 'actions',
 			 * i.e. the terminal part of a rule once the packet
 			 * matches all previous patterns.
 			 * Typically there is only one action for each rule,
 			 * and the opcode is stored at the end of the rule
 			 * (but there are exceptions -- see below).
 			 *
 			 * In general, here we set retval and terminate the
 			 * outer loop (would be a 'break 3' in some language,
 			 * but we need to do a 'goto done').
 			 *
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
 			 *   ('goto next_rule', equivalent to a 'break 2'),
 			 *   or to the SKIPTO target ('goto again' after
 			 *   having set f, cmd and l), respectively.
 			 *
 			 * O_LIMIT and O_KEEP_STATE: these opcodes are
 			 *   not real 'actions', and are stored right
 			 *   before the 'action' part of the rule.
 			 *   These opcodes try to install an entry in the
 			 *   state tables; if successful, we continue with
 			 *   the next opcode (match=1; break;), otherwise
 			 *   the packet *   must be dropped
 			 *   ('goto done' after setting retval);
 			 *
 			 * O_PROBE_STATE and O_CHECK_STATE: these opcodes
 			 *   cause a lookup of the state table, and a jump
 			 *   to the 'action' part of the parent rule
 			 *   ('goto check_body') if an entry is found, or
 			 *   (CHECK_STATE only) a jump to the next rule if
 			 *   the entry is not found ('goto next_rule').
 			 *   The result of the lookup is cached to make
 			 *   further instances of these opcodes are
 			 *   effectively NOPs.
 			 */
 			case O_LIMIT:
 			case O_KEEP_STATE:
 				if (install_state(f,
 				    (ipfw_insn_limit *)cmd, args)) {
 					retval = IP_FW_PORT_DENY_FLAG;
 					goto done; /* error/limit violation */
 				}
 				match = 1;
 				break;
 
 			case O_PROBE_STATE:
 			case O_CHECK_STATE:
 				/*
 				 * dynamic rules are checked at the first
 				 * keep-state or check-state occurrence,
 				 * with the result being stored in dyn_dir.
 				 * The compiler introduces a PROBE_STATE
 				 * instruction for us when we have a
 				 * KEEP_STATE (because PROBE_STATE needs
 				 * to be run first).
 				 */
 				if (dyn_dir == MATCH_UNKNOWN &&
 				    (q = lookup_dyn_rule(&args->f_id,
 				     &dyn_dir, proto == IPPROTO_TCP ?
 					L3HDR(struct tcphdr, ip) : NULL))
 					!= NULL) {
 					/*
 					 * Found dynamic entry, update stats
 					 * and jump to the 'action' part of
 					 * the parent rule.
 					 */
 					q->pcnt++;
 					q->bcnt += pktlen;
 					f = q->rule;
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					IPFW_DYN_UNLOCK();
 					goto check_body;
 				}
 				/*
 				 * Dynamic entry not found. If CHECK_STATE,
 				 * skip to next rule, if PROBE_STATE just
 				 * ignore and continue with next opcode.
 				 */
 				if (cmd->opcode == O_CHECK_STATE)
 					goto next_rule;
 				match = 1;
 				break;
 
 			case O_ACCEPT:
 				retval = 0;	/* accept */
 				goto done;
 
 			case O_PIPE:
 			case O_QUEUE:
 				args->rule = f; /* report matching rule */
 				retval = cmd->arg1 | IP_FW_PORT_DYNT_FLAG;
 				goto done;
 
 			case O_DIVERT:
 			case O_TEE:
 				if (args->eh) /* not on layer 2 */
 					break;
 				args->divert_rule = f->rulenum;
 				retval = (cmd->opcode == O_DIVERT) ?
 				    cmd->arg1 :
 				    cmd->arg1 | IP_FW_PORT_TEE_FLAG;
 				goto done;
 
 			case O_COUNT:
 			case O_SKIPTO:
 				f->pcnt++;	/* update stats */
 				f->bcnt += pktlen;
 				f->timestamp = time_second;
 				if (cmd->opcode == O_COUNT)
 					goto next_rule;
 				/* handle skipto */
 				if (f->next_rule == NULL)
 					lookup_next_rule(f);
 				f = f->next_rule;
 				goto again;
 
 			case O_REJECT:
 				/*
 				 * Drop the packet and send a reject notice
 				 * if the packet is not ICMP (or is an ICMP
 				 * query), and it is not multicast/broadcast.
 				 */
 				if (hlen > 0 &&
 				    (proto != IPPROTO_ICMP ||
 				     is_icmp_query(ip)) &&
 				    !(m->m_flags & (M_BCAST|M_MCAST)) &&
 				    !IN_MULTICAST(dst_ip.s_addr)) {
 					send_reject(args, cmd->arg1,
 					    offset,ip_len);
 					m = args->m;
 				}
 				/* FALLTHROUGH */
 			case O_DENY:
 				retval = IP_FW_PORT_DENY_FLAG;
 				goto done;
 
 			case O_FORWARD_IP:
 				if (args->eh)	/* not valid on layer2 pkts */
 					break;
 				if (!q || dyn_dir == MATCH_FORWARD)
 					args->next_hop =
 					    &((ipfw_insn_sa *)cmd)->sa;
 				retval = 0;
 				goto done;
 
 			default:
 				panic("-- unknown opcode %d\n", cmd->opcode);
 			} /* end of switch() on opcodes */
 
 			if (cmd->len & F_NOT)
 				match = !match;
 
 			if (match) {
 				if (cmd->len & F_OR)
 					skip_or = 1;
 			} else {
 				if (!(cmd->len & F_OR)) /* not an OR block, */
 					break;		/* try next rule    */
 			}
 
 		}	/* end of inner for, scan opcodes */
 
 next_rule:;		/* try next rule		*/
 
 	}		/* end of outer for, scan rules */
 	printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	IPFW_UNLOCK(chain);
 	return(IP_FW_PORT_DENY_FLAG);
 
 done:
 	/* Update statistics */
 	f->pcnt++;
 	f->bcnt += pktlen;
 	f->timestamp = time_second;
 	IPFW_UNLOCK(chain);
 	return retval;
 
 pullup_failed:
 	if (fw_verbose)
 		printf("ipfw: pullup failed\n");
 	return(IP_FW_PORT_DENY_FLAG);
 }
 
 /*
  * When a rule is added/deleted, clear the next_rule pointers in all rules.
  * These will be reconstructed on the fly as packets are matched.
  */
 static void
 flush_rule_ptrs(struct ip_fw_chain *chain)
 {
 	struct ip_fw *rule;
 
 	IPFW_LOCK_ASSERT(chain);
 
 	for (rule = chain->rules; rule; rule = rule->next)
 		rule->next_rule = NULL;
 }
 
 /*
  * When pipes/queues are deleted, clear the "pipe_ptr" pointer to a given
  * pipe/queue, or to all of them (match == NULL).
  */
 void
 flush_pipe_ptrs(struct dn_flow_set *match)
 {
 	struct ip_fw *rule;
 
 	IPFW_LOCK(&layer3_chain);
 	for (rule = layer3_chain.rules; rule; rule = rule->next) {
 		ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)ACTION_PTR(rule);
 
 		if (cmd->o.opcode != O_PIPE && cmd->o.opcode != O_QUEUE)
 			continue;
 		/*
 		 * XXX Use bcmp/bzero to handle pipe_ptr to overcome
 		 * possible alignment problems on 64-bit architectures.
 		 * This code is seldom used so we do not worry too
 		 * much about efficiency.
 		 */
 		if (match == NULL ||
 		    !bcmp(&cmd->pipe_ptr, &match, sizeof(match)) )
 			bzero(&cmd->pipe_ptr, sizeof(cmd->pipe_ptr));
 	}
 	IPFW_UNLOCK(&layer3_chain);
 }
 
 /*
  * Add a new rule to the list. Copy the rule into a malloc'ed area, then
  * possibly create a rule number and add the rule to the list.
  * Update the rule_number in the input struct so the caller knows it as well.
  */
 static int
 add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
 {
 	struct ip_fw *rule, *f, *prev;
 	int l = RULESIZE(input_rule);
 
 	if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE)
 		return (EINVAL);
 
 	rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO);
 	if (rule == NULL)
 		return (ENOSPC);
 
 	bcopy(input_rule, rule, l);
 
 	rule->next = NULL;
 	rule->next_rule = NULL;
 
 	rule->pcnt = 0;
 	rule->bcnt = 0;
 	rule->timestamp = 0;
 
 	IPFW_LOCK(chain);
 
 	if (chain->rules == NULL) {	/* default rule */
 		chain->rules = rule;
 		goto done;
         }
 
 	/*
 	 * If rulenum is 0, find highest numbered rule before the
 	 * default rule, and add autoinc_step
 	 */
 	if (autoinc_step < 1)
 		autoinc_step = 1;
 	else if (autoinc_step > 1000)
 		autoinc_step = 1000;
 	if (rule->rulenum == 0) {
 		/*
 		 * locate the highest numbered rule before default
 		 */
 		for (f = chain->rules; f; f = f->next) {
 			if (f->rulenum == IPFW_DEFAULT_RULE)
 				break;
 			rule->rulenum = f->rulenum;
 		}
 		if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step)
 			rule->rulenum += autoinc_step;
 		input_rule->rulenum = rule->rulenum;
 	}
 
 	/*
 	 * Now insert the new rule in the right place in the sorted list.
 	 */
 	for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) {
 		if (f->rulenum > rule->rulenum) { /* found the location */
 			if (prev) {
 				rule->next = f;
 				prev->next = rule;
 			} else { /* head insert */
 				rule->next = chain->rules;
 				chain->rules = rule;
 			}
 			break;
 		}
 	}
 	flush_rule_ptrs(chain);
 done:
 	static_count++;
 	static_len += l;
 	IPFW_UNLOCK(chain);
 	DEB(printf("ipfw: installed rule %d, static count now %d\n",
 		rule->rulenum, static_count);)
 	return (0);
 }
 
 /**
  * Remove a static rule (including derived * dynamic rules)
  * and place it on the ``reap list'' for later reclamation.
  * The caller is in charge of clearing rule pointers to avoid
  * dangling pointers.
  * @return a pointer to the next entry.
  * Arguments are not checked, so they better be correct.
  */
 static struct ip_fw *
 remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, struct ip_fw *prev)
 {
 	struct ip_fw *n;
 	int l = RULESIZE(rule);
 
 	IPFW_LOCK_ASSERT(chain);
 
 	n = rule->next;
 	IPFW_DYN_LOCK();
 	remove_dyn_rule(rule, NULL /* force removal */);
 	IPFW_DYN_UNLOCK();
 	if (prev == NULL)
 		chain->rules = n;
 	else
 		prev->next = n;
 	static_count--;
 	static_len -= l;
 
 	rule->next = chain->reap;
 	chain->reap = rule;
 
 	return n;
 }
 
 /**
  * Reclaim storage associated with a list of rules.  This is
  * typically the list created using remove_rule.
  */
 static void
 reap_rules(struct ip_fw *head)
 {
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
 		head = head->next;
 		if (DUMMYNET_LOADED)
 			ip_dn_ruledel_ptr(rule);
 		free(rule, M_IPFW);
 	}
 }
 
 /*
  * Remove all rules from a chain (except rules in set RESVD_SET
  * unless kill_default = 1).  The caller is responsible for
  * reclaiming storage for the rules left in chain->reap.
  */
 static void
 free_chain(struct ip_fw_chain *chain, int kill_default)
 {
 	struct ip_fw *prev, *rule;
 
 	IPFW_LOCK_ASSERT(chain);
 
 	flush_rule_ptrs(chain); /* more efficient to do outside the loop */
 	for (prev = NULL, rule = chain->rules; rule ; )
 		if (kill_default || rule->set != RESVD_SET)
 			rule = remove_rule(chain, rule, prev);
 		else {
 			prev = rule;
 			rule = rule->next;
 		}
 }
 
 /**
  * Remove all rules with given number, and also do set manipulation.
  * Assumes chain != NULL && *chain != NULL.
  *
  * The argument is an u_int32_t. The low 16 bit are the rule or set number,
  * the next 8 bits are the new set, the top 8 bits are the command:
  *
  *	0	delete rules with given number
  *	1	delete rules with given set number
  *	2	move rules with given number to new set
  *	3	move rules with given set number to new set
  *	4	swap sets with given numbers
  */
 static int
 del_entry(struct ip_fw_chain *chain, u_int32_t arg)
 {
 	struct ip_fw *prev = NULL, *rule;
 	u_int16_t rulenum;	/* rule or old_set */
 	u_int8_t cmd, new_set;
 
 	rulenum = arg & 0xffff;
 	cmd = (arg >> 24) & 0xff;
 	new_set = (arg >> 16) & 0xff;
 
 	if (cmd > 4)
 		return EINVAL;
 	if (new_set > RESVD_SET)
 		return EINVAL;
 	if (cmd == 0 || cmd == 2) {
 		if (rulenum >= IPFW_DEFAULT_RULE)
 			return EINVAL;
 	} else {
 		if (rulenum > RESVD_SET)	/* old_set */
 			return EINVAL;
 	}
 
 	IPFW_LOCK(chain);
 	rule = chain->rules;
 	chain->reap = NULL;
 	switch (cmd) {
 	case 0:	/* delete rules with given number */
 		/*
 		 * locate first rule to delete
 		 */
 		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
 			;
 		if (rule->rulenum != rulenum) {
 			IPFW_UNLOCK(chain);
 			return EINVAL;
 		}
 
 		/*
 		 * flush pointers outside the loop, then delete all matching
 		 * rules. prev remains the same throughout the cycle.
 		 */
 		flush_rule_ptrs(chain);
 		while (rule->rulenum == rulenum)
 			rule = remove_rule(chain, rule, prev);
 		break;
 
 	case 1:	/* delete all rules with given set number */
 		flush_rule_ptrs(chain);
 		rule = chain->rules;
 		while (rule->rulenum < IPFW_DEFAULT_RULE)
 			if (rule->set == rulenum)
 				rule = remove_rule(chain, rule, prev);
 			else {
 				prev = rule;
 				rule = rule->next;
 			}
 		break;
 
 	case 2:	/* move rules with given number to new set */
 		rule = chain->rules;
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->rulenum == rulenum)
 				rule->set = new_set;
 		break;
 
 	case 3: /* move rules with given set number to new set */
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->set == rulenum)
 				rule->set = new_set;
 		break;
 
 	case 4: /* swap two sets */
 		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
 			if (rule->set == rulenum)
 				rule->set = new_set;
 			else if (rule->set == new_set)
 				rule->set = rulenum;
 		break;
 	}
 	/*
 	 * Look for rules to reclaim.  We grab the list before
 	 * releasing the lock then reclaim them w/o the lock to
 	 * avoid a LOR with dummynet.
 	 */
 	rule = chain->reap;
 	chain->reap = NULL;
 	IPFW_UNLOCK(chain);
 	if (rule)
 		reap_rules(rule);
 	return 0;
 }
 
 /*
  * Clear counters for a specific rule.
  * The enclosing "table" is assumed locked.
  */
 static void
 clear_counters(struct ip_fw *rule, int log_only)
 {
 	ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
 
 	if (log_only == 0) {
 		rule->bcnt = rule->pcnt = 0;
 		rule->timestamp = 0;
 	}
 	if (l->o.opcode == O_LOG)
 		l->log_left = l->max_log;
 }
 
 /**
  * Reset some or all counters on firewall rules.
  * @arg frwl is null to clear all entries, or contains a specific
  * rule number.
  * @arg log_only is 1 if we only want to reset logs, zero otherwise.
  */
 static int
 zero_entry(struct ip_fw_chain *chain, int rulenum, int log_only)
 {
 	struct ip_fw *rule;
 	char *msg;
 
 	IPFW_LOCK(chain);
 	if (rulenum == 0) {
 		norule_counter = 0;
 		for (rule = chain->rules; rule; rule = rule->next)
 			clear_counters(rule, log_only);
 		msg = log_only ? "ipfw: All logging counts reset.\n" :
 				"ipfw: Accounting cleared.\n";
 	} else {
 		int cleared = 0;
 		/*
 		 * We can have multiple rules with the same number, so we
 		 * need to clear them all.
 		 */
 		for (rule = chain->rules; rule; rule = rule->next)
 			if (rule->rulenum == rulenum) {
 				while (rule && rule->rulenum == rulenum) {
 					clear_counters(rule, log_only);
 					rule = rule->next;
 				}
 				cleared = 1;
 				break;
 			}
 		if (!cleared) {	/* we did not find any matching rules */
 			IPFW_UNLOCK(chain);
 			return (EINVAL);
 		}
 		msg = log_only ? "ipfw: Entry %d logging count reset.\n" :
 				"ipfw: Entry %d cleared.\n";
 	}
 	IPFW_UNLOCK(chain);
 
 	if (fw_verbose)
 		log(LOG_SECURITY | LOG_NOTICE, msg, rulenum);
 	return (0);
 }
 
 /*
  * Check validity of the structure before insert.
  * Fortunately rules are simple, so this mostly need to check rule sizes.
  */
 static int
 check_ipfw_struct(struct ip_fw *rule, int size)
 {
 	int l, cmdlen = 0;
 	int have_action=0;
 	ipfw_insn *cmd;
 
 	if (size < sizeof(*rule)) {
 		printf("ipfw: rule too short\n");
 		return (EINVAL);
 	}
 	/* first, check for valid size */
 	l = RULESIZE(rule);
 	if (l != size) {
 		printf("ipfw: size mismatch (have %d want %d)\n", size, l);
 		return (EINVAL);
 	}
 	/*
 	 * Now go for the individual checks. Very simple ones, basically only
 	 * instruction sizes.
 	 */
 	for (l = rule->cmd_len, cmd = rule->cmd ;
 			l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 		if (cmdlen > l) {
 			printf("ipfw: opcode %d size truncated\n",
 			    cmd->opcode);
 			return EINVAL;
 		}
 		DEB(printf("ipfw: opcode %d\n", cmd->opcode);)
 		switch (cmd->opcode) {
 		case O_PROBE_STATE:
 		case O_KEEP_STATE:
 		case O_PROTO:
 		case O_IP_SRC_ME:
 		case O_IP_DST_ME:
 		case O_LAYER2:
 		case O_IN:
 		case O_FRAG:
 		case O_IPOPT:
 		case O_IPTOS:
 		case O_IPPRECEDENCE:
 		case O_IPVER:
 		case O_TCPWIN:
 		case O_TCPFLAGS:
 		case O_TCPOPTS:
 		case O_ESTAB:
 		case O_VERREVPATH:
 		case O_IPSEC:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 			break;
 
 		case O_UID:
 		case O_GID:
 		case O_IP_SRC:
 		case O_IP_DST:
 		case O_TCPSEQ:
 		case O_TCPACK:
 		case O_PROB:
 		case O_ICMPTYPE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
 				goto bad_size;
 			break;
 
 		case O_LIMIT:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
 				goto bad_size;
 			break;
 
 		case O_LOG:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
 				goto bad_size;
 
 			((ipfw_insn_log *)cmd)->log_left =
 			    ((ipfw_insn_log *)cmd)->max_log;
 
 			break;
 
 		case O_IP_SRC_MASK:
 		case O_IP_DST_MASK:
 			/* only odd command lengths */
 			if ( !(cmdlen & 1) || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_IP_SRC_SET:
 		case O_IP_DST_SET:
 			if (cmd->arg1 == 0 || cmd->arg1 > 256) {
 				printf("ipfw: invalid set size %d\n",
 					cmd->arg1);
 				return EINVAL;
 			}
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
 			    (cmd->arg1+31)/32 )
 				goto bad_size;
 			break;
 
 		case O_MACADDR2:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
 				goto bad_size;
 			break;
 
 		case O_NOP:
 		case O_IPID:
 		case O_IPTTL:
 		case O_IPLEN:
 			if (cmdlen < 1 || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_MAC_TYPE:
 		case O_IP_SRCPORT:
 		case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
 			if (cmdlen < 2 || cmdlen > 31)
 				goto bad_size;
 			break;
 
 		case O_RECV:
 		case O_XMIT:
 		case O_VIA:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
 				goto bad_size;
 			break;
 
 		case O_PIPE:
 		case O_QUEUE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
 				goto bad_size;
 			goto check_action;
 
 		case O_FORWARD_IP:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
 				goto bad_size;
 			goto check_action;
 
 		case O_FORWARD_MAC: /* XXX not implemented yet */
 		case O_CHECK_STATE:
 		case O_COUNT:
 		case O_ACCEPT:
 		case O_DENY:
 		case O_REJECT:
 		case O_SKIPTO:
 		case O_DIVERT:
 		case O_TEE:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn))
 				goto bad_size;
 check_action:
 			if (have_action) {
 				printf("ipfw: opcode %d, multiple actions"
 					" not allowed\n",
 					cmd->opcode);
 				return EINVAL;
 			}
 			have_action = 1;
 			if (l != cmdlen) {
 				printf("ipfw: opcode %d, action must be"
 					" last opcode\n",
 					cmd->opcode);
 				return EINVAL;
 			}
 			break;
 		default:
 			printf("ipfw: opcode %d, unknown opcode\n",
 				cmd->opcode);
 			return EINVAL;
 		}
 	}
 	if (have_action == 0) {
 		printf("ipfw: missing action\n");
 		return EINVAL;
 	}
 	return 0;
 
 bad_size:
 	printf("ipfw: opcode %d size %d wrong\n",
 		cmd->opcode, cmdlen);
 	return EINVAL;
 }
 
 /*
  * Copy the static and dynamic rules to the supplied buffer
  * and return the amount of space actually used.
  */
 static size_t
 ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
 {
 	char *bp = buf;
 	char *ep = bp + space;
 	struct ip_fw *rule;
 	int i;
 
 	/* XXX this can take a long time and locking will block packet flow */
 	IPFW_LOCK(chain);
 	for (rule = chain->rules; rule ; rule = rule->next) {
 		/*
 		 * Verify the entry fits in the buffer in case the
 		 * rules changed between calculating buffer space and
 		 * now.  This would be better done using a generation
 		 * number but should suffice for now.
 		 */
 		i = RULESIZE(rule);
 		if (bp + i <= ep) {
 			bcopy(rule, bp, i);
 			bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule),
 			    sizeof(set_disable));
 			bp += i;
 		}
 	}
 	IPFW_UNLOCK(chain);
 	if (ipfw_dyn_v) {
 		ipfw_dyn_rule *p, *last = NULL;
 
 		IPFW_DYN_LOCK();
 		for (i = 0 ; i < curr_dyn_buckets; i++)
 			for (p = ipfw_dyn_v[i] ; p != NULL; p = p->next) {
 				if (bp + sizeof *p <= ep) {
 					ipfw_dyn_rule *dst =
 						(ipfw_dyn_rule *)bp;
 					bcopy(p, dst, sizeof *p);
 					bcopy(&(p->rule->rulenum), &(dst->rule),
 					    sizeof(p->rule->rulenum));
 					/*
 					 * store a non-null value in "next".
 					 * The userland code will interpret a
 					 * NULL here as a marker
 					 * for the last dynamic rule.
 					 */
 					bcopy(&dst, &dst->next, sizeof(dst));
 					last = dst;
 					dst->expire =
 					    TIME_LEQ(dst->expire, time_second) ?
 						0 : dst->expire - time_second ;
 					bp += sizeof(ipfw_dyn_rule);
 				}
 			}
 		IPFW_DYN_UNLOCK();
 		if (last != NULL) /* mark last dynamic rule */
 			bzero(&last->next, sizeof(last));
 	}
 	return (bp - (char *)buf);
 }
 
 
 /**
  * {set|get}sockopt parser.
  */
 static int
 ipfw_ctl(struct sockopt *sopt)
 {
 #define	RULE_MAXSIZE	(256*sizeof(u_int32_t))
 	int error, rule_num;
 	size_t size;
 	struct ip_fw *buf, *rule;
 	u_int32_t rulenum[2];
 
 	/*
 	 * Disallow modifications in really-really secure mode, but still allow
 	 * the logging counters to be reset.
 	 */
 	if (sopt->sopt_name == IP_FW_ADD ||
 	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
 #if __FreeBSD_version >= 500034
 		error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
 		if (error)
 			return (error);
 #else /* FreeBSD 4.x */
 		if (securelevel >= 3)
 			return (EPERM);
 #endif
 	}
 
 	error = 0;
 
 	switch (sopt->sopt_name) {
 	case IP_FW_GET:
 		/*
 		 * pass up a copy of the current rules. Static rules
 		 * come first (the last of which has number IPFW_DEFAULT_RULE),
 		 * followed by a possibly empty list of dynamic rule.
 		 * The last dynamic rule has NULL in the "next" field.
 		 *
 		 * Note that the calculated size is used to bound the
 		 * amount of data returned to the user.  The rule set may
 		 * change between calculating the size and returning the
 		 * data in which case we'll just return what fits.
 		 */
 		size = static_len;	/* size of static rules */
 		if (ipfw_dyn_v)		/* add size of dyn.rules */
 			size += (dyn_count * sizeof(ipfw_dyn_rule));
 
 		/*
 		 * XXX todo: if the user passes a short length just to know
 		 * how much room is needed, do not bother filling up the
 		 * buffer, just jump to the sooptcopyout.
 		 */
 		buf = malloc(size, M_TEMP, M_WAITOK);
 		error = sooptcopyout(sopt, buf,
 				ipfw_getrules(&layer3_chain, buf, size));
 		free(buf, M_TEMP);
 		break;
 
 	case IP_FW_FLUSH:
 		/*
 		 * Normally we cannot release the lock on each iteration.
 		 * We could do it here only because we start from the head all
 		 * the times so there is no risk of missing some entries.
 		 * On the other hand, the risk is that we end up with
 		 * a very inconsistent ruleset, so better keep the lock
 		 * around the whole cycle.
 		 *
 		 * XXX this code can be improved by resetting the head of
 		 * the list to point to the default rule, and then freeing
 		 * the old list without the need for a lock.
 		 */
 
 		IPFW_LOCK(&layer3_chain);
 		layer3_chain.reap = NULL;
 		free_chain(&layer3_chain, 0 /* keep default rule */);
 		rule = layer3_chain.reap, layer3_chain.reap = NULL;
 		IPFW_UNLOCK(&layer3_chain);
 		if (layer3_chain.reap != NULL)
 			reap_rules(rule);
 		break;
 
 	case IP_FW_ADD:
 		rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
 		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
 			sizeof(struct ip_fw) );
 		if (error == 0)
 			error = check_ipfw_struct(rule, sopt->sopt_valsize);
 		if (error == 0) {
 			error = add_rule(&layer3_chain, rule);
 			size = RULESIZE(rule);
 			if (!error && sopt->sopt_dir == SOPT_GET)
 				error = sooptcopyout(sopt, rule, size);
 		}
 		free(rule, M_TEMP);
 		break;
 
 	case IP_FW_DEL:
 		/*
 		 * IP_FW_DEL is used for deleting single rules or sets,
 		 * and (ab)used to atomically manipulate sets. Argument size
 		 * is used to distinguish between the two:
 		 *    sizeof(u_int32_t)
 		 *	delete single rule or set of rules,
 		 *	or reassign rules (or sets) to a different set.
 		 *    2*sizeof(u_int32_t)
 		 *	atomic disable/enable sets.
 		 *	first u_int32_t contains sets to be disabled,
 		 *	second u_int32_t contains sets to be enabled.
 		 */
 		error = sooptcopyin(sopt, rulenum,
 			2*sizeof(u_int32_t), sizeof(u_int32_t));
 		if (error)
 			break;
 		size = sopt->sopt_valsize;
 		if (size == sizeof(u_int32_t))	/* delete or reassign */
 			error = del_entry(&layer3_chain, rulenum[0]);
 		else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */
 			set_disable =
 			    (set_disable | rulenum[0]) & ~rulenum[1] &
 			    ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
 		else
 			error = EINVAL;
 		break;
 
 	case IP_FW_ZERO:
 	case IP_FW_RESETLOG: /* argument is an int, the rule number */
 		rule_num = 0;
 		if (sopt->sopt_val != 0) {
 		    error = sooptcopyin(sopt, &rule_num,
 			    sizeof(int), sizeof(int));
 		    if (error)
 			break;
 		}
 		error = zero_entry(&layer3_chain, rule_num,
 			sopt->sopt_name == IP_FW_RESETLOG);
 		break;
 
 	default:
 		printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
 		error = EINVAL;
 	}
 
 	return (error);
 #undef RULE_MAXSIZE
 }
 
 /**
  * dummynet needs a reference to the default rule, because rules can be
  * deleted while packets hold a reference to them. When this happens,
  * dummynet changes the reference to the default rule (it could well be a
  * NULL pointer, but this way we do not need to check for the special
  * case, plus here he have info on the default behaviour).
  */
 struct ip_fw *ip_fw_default_rule;
 
 /*
  * This procedure is only used to handle keepalives. It is invoked
  * every dyn_keepalive_period
  */
 static void
 ipfw_tick(void * __unused unused)
 {
 	int i;
 	ipfw_dyn_rule *q;
 
 	if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0)
 		goto done;
 
 	IPFW_DYN_LOCK();
 	for (i = 0 ; i < curr_dyn_buckets ; i++) {
 		for (q = ipfw_dyn_v[i] ; q ; q = q->next ) {
 			if (q->dyn_type == O_LIMIT_PARENT)
 				continue;
 			if (q->id.proto != IPPROTO_TCP)
 				continue;
 			if ( (q->state & BOTH_SYN) != BOTH_SYN)
 				continue;
 			if (TIME_LEQ( time_second+dyn_keepalive_interval,
 			    q->expire))
 				continue;	/* too early */
 			if (TIME_LEQ(q->expire, time_second))
 				continue;	/* too late, rule expired */
 
 			send_pkt(&(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN);
 			send_pkt(&(q->id), q->ack_fwd - 1, q->ack_rev, 0);
 		}
 	}
 	IPFW_DYN_UNLOCK();
 done:
 	callout_reset(&ipfw_timeout, dyn_keepalive_period*hz, ipfw_tick, NULL);
 }
 
 static int
 ipfw_init(void)
 {
 	struct ip_fw default_rule;
 	int error;
 
 	layer3_chain.rules = NULL;
 	IPFW_LOCK_INIT(&layer3_chain);
 	IPFW_DYN_LOCK_INIT();
 	callout_init(&ipfw_timeout, debug_mpsafenet ? CALLOUT_MPSAFE : 0);
 
 	bzero(&default_rule, sizeof default_rule);
 
 	default_rule.act_ofs = 0;
 	default_rule.rulenum = IPFW_DEFAULT_RULE;
 	default_rule.cmd_len = 1;
 	default_rule.set = RESVD_SET;
 
 	default_rule.cmd[0].len = 1;
 	default_rule.cmd[0].opcode =
 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
 				1 ? O_ACCEPT :
 #endif
 				O_DENY;
 
 	error = add_rule(&layer3_chain, &default_rule);
 	if (error != 0) {
 		printf("ipfw2: error %u initializing default rule "
 			"(support disabled)\n", error);
 		IPFW_DYN_LOCK_DESTROY();
 		IPFW_LOCK_DESTROY(&layer3_chain);
 		return (error);
 	}
 
 	ip_fw_default_rule = layer3_chain.rules;
 	printf("ipfw2 initialized, divert %s, "
 		"rule-based forwarding enabled, default to %s, logging ",
 #ifdef IPDIVERT
 		"enabled",
 #else
 		"disabled",
 #endif
 		default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny");
 
 #ifdef IPFIREWALL_VERBOSE
 	fw_verbose = 1;
 #endif
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
 	if (fw_verbose == 0)
 		printf("disabled\n");
 	else if (verbose_limit == 0)
 		printf("unlimited\n");
 	else
 		printf("limited to %d packets/entry by default\n",
 		    verbose_limit);
 
 	ip_fw_chk_ptr = ipfw_chk;
 	ip_fw_ctl_ptr = ipfw_ctl;
 	callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL);
 
 	return (0);
 }
 
 static void
 ipfw_destroy(void)
 {
 	struct ip_fw *reap;
 
 	IPFW_LOCK(&layer3_chain);
 	callout_stop(&ipfw_timeout);
 	ip_fw_chk_ptr = NULL;
 	ip_fw_ctl_ptr = NULL;
 	layer3_chain.reap = NULL;
 	free_chain(&layer3_chain, 1 /* kill default rule */);
 	reap = layer3_chain.reap, layer3_chain.reap = NULL;
 	IPFW_UNLOCK(&layer3_chain);
 	if (reap != NULL)
 		reap_rules(reap);
 
 	IPFW_DYN_LOCK_DESTROY();
 	IPFW_LOCK_DESTROY(&layer3_chain);
 	printf("IP firewall unloaded\n");
 }
 
 static int
 ipfw_modevent(module_t mod, int type, void *unused)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (IPFW_LOADED) {
 			printf("IP firewall already loaded\n");
 			err = EEXIST;
 		} else {
 			err = ipfw_init();
 		}
 		break;
 
 	case MOD_UNLOAD:
 		ipfw_destroy();
 		err = 0;
 		break;
 	default:
 		break;
 	}
 	return err;
 }
 
 static moduledata_t ipfwmod = {
 	"ipfw",
 	ipfw_modevent,
 	0
 };
 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(ipfw, 1);
 #endif /* IPFW2 */