Index: head/sbin/ifconfig/ifconfig.8
===================================================================
--- head/sbin/ifconfig/ifconfig.8	(revision 366932)
+++ head/sbin/ifconfig/ifconfig.8	(revision 366933)
@@ -1,3121 +1,3127 @@
 .\" Copyright (c) 1983, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     From: @(#)ifconfig.8	8.3 (Berkeley) 1/5/94
 .\" $FreeBSD$
 .\"
-.Dd September 17, 2020
+.Dd October 21, 2020
 .Dt IFCONFIG 8
 .Os
 .Sh NAME
 .Nm ifconfig
 .Nd configure network interface parameters
 .Sh SYNOPSIS
 .Nm
 .Op Fl f Ar type:format Ns Op Ar ,type:format
 .Op Fl L
 .Op Fl k
 .Op Fl m
 .Op Fl n
 .Ar interface
 .Op Cm create
 .Ar address_family
 .Oo
 .Ar address
 .Op Ar dest_address
 .Oc
 .Op Ar parameters
 .Nm
 .Ar interface
 .Cm destroy
 .Nm
 .Fl a
 .Op Fl L
 .Op Fl d
 .Op Fl [gG] Ar groupname
 .Op Fl m
 .Op Fl u
 .Op Fl v
 .Op Ar address_family
 .Nm
 .Fl l
 .Op Fl d
 .Op Fl u
 .Op Ar address_family
 .Nm
 .Op Fl L
 .Op Fl d
 .Op Fl k
 .Op Fl m
 .Op Fl u
 .Op Fl v
 .Op Fl C
 .Nm
 .Op Fl g Ar groupname
 .Sh DESCRIPTION
 The
 .Nm
 utility is used to assign an address
 to a network interface and/or configure
 network interface parameters.
 The
 .Nm
 utility must be used at boot time to define the network address
 of each interface present on a machine; it may also be used at
 a later time to redefine an interface's address
 or other operating parameters.
 .Pp
 The following options are available:
 .Bl -tag -width indent
 .It Ar address
 For the
 .Tn DARPA Ns -Internet
 family,
 the address is either a host name present in the host name data
 base,
 .Xr hosts 5 ,
 or a
 .Tn DARPA
 Internet address expressed in the Internet standard
 .Dq dot notation .
 .Pp
 It is also possible to use the CIDR notation (also known as the
 slash notation) to include the netmask.
 That is, one can specify an address like
 .Li 192.168.0.1/16 .
 .Pp
 For the
 .Dq inet6
 family, it is also possible to specify the prefix length using the slash
 notation, like
 .Li ::1/128 .
 See the
 .Cm prefixlen
 parameter below for more information.
 .\" For the Xerox Network Systems(tm) family,
 .\" addresses are
 .\" .Ar net:a.b.c.d.e.f ,
 .\" where
 .\" .Ar net
 .\" is the assigned network number (in decimal),
 .\" and each of the six bytes of the host number,
 .\" .Ar a
 .\" through
 .\" .Ar f ,
 .\" are specified in hexadecimal.
 .\" The host number may be omitted on IEEE 802 protocol
 .\" (Ethernet, FDDI, and Token Ring) interfaces,
 .\" which use the hardware physical address,
 .\" and on interfaces other than the first.
 .\" For the
 .\" .Tn ISO
 .\" family, addresses are specified as a long hexadecimal string,
 .\" as in the Xerox family.
 .\" However, two consecutive dots imply a zero
 .\" byte, and the dots are optional, if the user wishes to (carefully)
 .\" count out long strings of digits in network byte order.
 .Pp
 The link-level
 .Pq Dq link
 address
 is specified as a series of colon-separated hex digits.
 This can be used to, for example,
 set a new MAC address on an Ethernet interface, though the
 mechanism used is not Ethernet specific.
 Use the
 .Pq Dq random
 keyword to set a randomly generated MAC address.
 A randomly-generated MAC address might be the same as one already in use
 in the network.
 Such duplications are extremely unlikely.
 If the interface is already
 up when this option is used, it will be briefly brought down and
 then brought back up again in order to ensure that the receive
 filter in the underlying Ethernet hardware is properly reprogrammed.
 .It Ar address_family
 Specify the
 address family
 which affects interpretation of the remaining parameters.
 Since an interface can receive transmissions in differing protocols
 with different naming schemes, specifying the address family is recommended.
 The address or protocol families currently
 supported are
 .Dq inet ,
 .Dq inet6 ,
 and
 .Dq link .
 The default if available is
 .Dq inet
 or otherwise
 .Dq link .
 .Dq ether
 and
 .Dq lladdr
 are synonyms for
 .Dq link .
 When using the
 .Fl l
 flag, the
 .Dq ether
 address family has special meaning and is no longer synonymous with
 .Dq link
 or
 .Dq lladdr .
 Specifying
 .Fl l Dq ether
 will list only Ethernet interfaces, excluding all other interface types,
 including the loopback interface.
 .It Ar dest_address
 Specify the address of the correspondent on the other end
 of a point to point link.
 .It Ar interface
 This
 parameter is a string of the form
 .Dq name unit ,
 for example,
 .Dq Li em0 .
 .It Ar groupname
 List the interfaces in the given group.
 .El
 .Pp
 The output format of
 .Nm
 can be controlled using the
 .Fl f
 flag or the
 .Ev IFCONFIG_FORMAT
 environment variable.
 The format is specified as a comma separated list of
 .Sy type:format
 pairs.
 See the
 .Sx EXAMPLES
 section for more information.
 The
 .Sy types
 and their associated
 .Sy format
 strings are:
 .Bl -tag -width ether
 .It Sy addr
 Adjust the display of inet and inet6 addresses
 .Bl -tag -width default
 .It Sy default
 Display inet and inet6 addresses in the default format,
 .Sy numeric
 .It Sy fqdn
 Display inet and inet6 addresses as fully qualified domain names
 .Pq FQDN
 .It Sy host
 Display inet and inet6 addresses as unqualified hostnames
 .It Sy numeric
 Display inet and inet6 addresses in numeric format
 .El
 .It Sy ether
 Adjust the display of link-level ethernet (MAC) addresses
 .Bl -tag -width default
 .It Sy colon
 Separate address segments with a colon
 .It Sy dash
 Separate address segments with a dash
 .It Sy default
 Display ethernet addresses in the default format,
 .Sy colon
 .El
 .It Sy inet
 Adjust the display of inet address subnet masks:
 .Bl -tag -width default
 .It Sy cidr
 Display subnet masks in CIDR notation, for example:
 .br
 10.0.0.0/8 or 203.0.113.224/26
 .It Sy default
 Display subnet masks in the default format,
 .Sy hex
 .It Sy dotted
 Display subnet masks in dotted quad notation, for example:
 .br
 255.255.0.0 or 255.255.255.192
 .It Sy hex
 Display subnet masks in hexadecimal, for example:
 .br
 0xffff0000 or 0xffffffc0
 .El
 .It Sy inet6
 Adjust the display of inet6 address prefixes (subnet masks):
 .Bl -tag -width default
 .It Sy cidr
 Display subnet prefix in CIDR notation, for example:
 .br
 ::1/128 or fe80::1%lo0/64
 .It Sy default
 Display subnet prefix in the default format
 .Sy numeric
 .It Sy numeric
 Display subnet prefix in integer format, for example:
 .br
 prefixlen 64
 .El
 .El
 .Pp
 The following parameters may be set with
 .Nm :
 .Bl -tag -width indent
 .It Cm add
 Another name for the
 .Cm alias
 parameter.
 Introduced for compatibility
 with
 .Bsx .
 .It Cm alias
 Establish an additional network address for this interface.
 This is sometimes useful when changing network numbers, and
 one wishes to accept packets addressed to the old interface.
 If the address is on the same subnet as the first network address
 for this interface, a non-conflicting netmask must be given.
 Usually
 .Li 0xffffffff
 is most appropriate.
 .It Fl alias
 Remove the network address specified.
 This would be used if you incorrectly specified an alias, or it
 was no longer needed.
 If you have incorrectly set an NS address having the side effect
 of specifying the host portion, removing all NS addresses will
 allow you to respecify the host portion.
 .It Cm anycast
 (Inet6 only.)
 Specify that the address configured is an anycast address.
 Based on the current specification,
 only routers may configure anycast addresses.
 Anycast address will not be used as source address of any of outgoing
 IPv6 packets.
 .It Cm arp
 Enable the use of the Address Resolution Protocol
 .Pq Xr arp 4
 in mapping
 between network level addresses and link level addresses (default).
 This is currently implemented for mapping between
 .Tn DARPA
 Internet
 addresses and
 .Tn IEEE
 802 48-bit MAC addresses (Ethernet, FDDI, and Token Ring addresses).
 .It Fl arp
 Disable the use of the Address Resolution Protocol
 .Pq Xr arp 4 .
 .It Cm staticarp
 If the Address Resolution Protocol is enabled,
 the host will only reply to requests for its addresses,
 and will never send any requests.
 .It Fl staticarp
 If the Address Resolution Protocol is enabled,
 the host will perform normally,
 sending out requests and listening for replies.
 .It Cm broadcast
 (Inet only.)
 Specify the address to use to represent broadcasts to the
 network.
 The default broadcast address is the address with a host part of all 1's.
 .It Cm debug
 Enable driver dependent debugging code; usually, this turns on
 extra console error logging.
 .It Fl debug
 Disable driver dependent debugging code.
 .It Cm promisc
 Put interface into permanently promiscuous mode.
 .It Fl promisc
 Disable permanently promiscuous mode.
 .It Cm delete
 Another name for the
 .Fl alias
 parameter.
 .It Cm description Ar value , Cm descr Ar value
 Specify a description of the interface.
 This can be used to label interfaces in situations where they may
 otherwise be difficult to distinguish.
 .It Cm -description , Cm -descr
 Clear the interface description.
 .It Cm down
 Mark an interface
 .Dq down .
 When an interface is marked
 .Dq down ,
 the system will not attempt to
 transmit messages through that interface.
 If possible, the interface will be reset to disable reception as well.
 This action does not automatically disable routes using the interface.
 .It Cm group Ar groupname
 Assign the interface to a
 .Dq group .
 Any interface can be in multiple groups.
 .Pp
 Cloned interfaces are members of their interface family group by default.
 For example, a PPP interface such as
 .Em ppp0
 is a member of the PPP interface family group,
 .Em ppp .
 .\" The interface(s) the default route(s) point to are members of the
 .\" .Em egress
 .\" interface group.
 .It Cm -group Ar groupname
 Remove the interface from the given
 .Dq group .
 .It Cm eui64
 (Inet6 only.)
 Fill interface index
 (lowermost 64bit of an IPv6 address)
 automatically.
 .It Cm fib Ar fib_number
 Specify interface FIB.
 A FIB
 .Ar fib_number
 is assigned to all frames or packets received on that interface.
 The FIB is not inherited, e.g., vlans or other sub-interfaces will use
 the default FIB (0) irrespective of the parent interface's FIB.
 The kernel needs to be tuned to support more than the default FIB
 using the
 .Va ROUTETABLES
 kernel configuration option, or the
 .Va net.fibs
 tunable.
 .It Cm tunnelfib Ar fib_number
 Specify tunnel FIB.
 A FIB
 .Ar fib_number
 is assigned to all packets encapsulated by tunnel interface, e.g.,
 .Xr gif 4
 and
 .Xr gre 4 .
 .It Cm maclabel Ar label
 If Mandatory Access Control support is enabled in the kernel,
 set the MAC label to
 .Ar label .
 .\" (see
 .\" .Xr maclabel 7 ) .
 .It Cm media Ar type
 If the driver supports the media selection system, set the media type
 of the interface to
 .Ar type .
 Some interfaces support the mutually exclusive use of one of several
 different physical media connectors.
 For example, a 10Mbit/s Ethernet
 interface might support the use of either
 .Tn AUI
 or twisted pair connectors.
 Setting the media type to
 .Cm 10base5/AUI
 would change the currently active connector to the AUI port.
 Setting it to
 .Cm 10baseT/UTP
 would activate twisted pair.
 Refer to the interfaces' driver
 specific documentation or man page for a complete list of the
 available types.
 .It Cm mediaopt Ar opts
 If the driver supports the media selection system, set the specified
 media options on the interface.
 The
 .Ar opts
 argument
 is a comma delimited list of options to apply to the interface.
 Refer to the interfaces' driver specific man page for a complete
 list of available options.
 .It Fl mediaopt Ar opts
 If the driver supports the media selection system, disable the
 specified media options on the interface.
 .It Cm mode Ar mode
 If the driver supports the media selection system, set the specified
 operating mode on the interface to
 .Ar mode .
 For IEEE 802.11 wireless interfaces that support multiple operating modes
 this directive is used to select between 802.11a
 .Pq Cm 11a ,
 802.11b
 .Pq Cm 11b ,
 and 802.11g
 .Pq Cm 11g
 operating modes.
 .It Cm txrtlmt
 Set if the driver supports TX rate limiting.
 .It Cm inst Ar minst , Cm instance Ar minst
 Set the media instance to
 .Ar minst .
 This is useful for devices which have multiple physical layer interfaces
 .Pq PHYs .
 .It Cm name Ar name
 Set the interface name to
 .Ar name .
 .It Cm rxcsum , txcsum , rxcsum6 , txcsum6
 If the driver supports user-configurable checksum offloading,
 enable receive (or transmit) checksum offloading on the interface.
 The feature can be turned on selectively per protocol family.
 Use
 .Cm rxcsum6 , txcsum6
 for
 .Xr ip6 4
 or
 .Cm rxcsum , txcsum
 otherwise.
 Some drivers may not be able to enable these flags independently
 of each other, so setting one may also set the other.
 The driver will offload as much checksum work as it can reliably
 support, the exact level of offloading varies between drivers.
 .It Fl rxcsum , txcsum , rxcsum6 , txcsum6
 If the driver supports user-configurable checksum offloading,
 disable receive (or transmit) checksum offloading on the interface.
 The feature can be turned off selectively per protocol family.
 Use
 .Fl rxcsum6 , txcsum6
 for
 .Xr ip6 4
 or
 .Fl rxcsum , txcsum
 otherwise.
 These settings may not always be independent of each other.
 .It Cm tso
 If the driver supports
 .Xr tcp 4
 segmentation offloading, enable TSO on the interface.
 Some drivers may not be able to support TSO for
 .Xr ip 4
 and
 .Xr ip6 4
 packets, so they may enable only one of them.
 .It Fl tso
 If the driver supports
 .Xr tcp 4
 segmentation offloading, disable TSO on the interface.
 It will always disable TSO for
 .Xr ip 4
 and
 .Xr ip6 4 .
 .It Cm tso6 , tso4
 If the driver supports
 .Xr tcp 4
 segmentation offloading for
 .Xr ip6 4
 or
 .Xr ip 4
 use one of these to selectively enabled it only for one protocol family.
 .It Fl tso6 , tso4
 If the driver supports
 .Xr tcp 4
 segmentation offloading for
 .Xr ip6 4
 or
 .Xr ip 4
 use one of these to selectively disable it only for one protocol family.
 .It Cm lro
 If the driver supports
 .Xr tcp 4
 large receive offloading, enable LRO on the interface.
 .It Fl lro
 If the driver supports
 .Xr tcp 4
 large receive offloading, disable LRO on the interface.
 .It Cm txtls
 Transmit TLS offload encrypts Transport Layer Security (TLS) records and
 segments the encrypted record into one or more
 .Xr tcp 4
 segments over either
 .Xr ip 4
 or
 .Xr ip6 4 .
 If the driver supports transmit TLS offload,
 enable transmit TLS offload on the interface.
 Some drivers may not be able to support transmit TLS offload for
 .Xr ip 4
 and
 .Xr ip6 4
 packets, so they may enable only one of them.
 .It Fl txtls
 If the driver supports transmit TLS offload,
 disable transmit TLS offload on the interface.
 It will always disable TLS for
 .Xr ip 4
 and
 .Xr ip6 4 .
 .It Cm nomap
 If the driver supports unmapped network buffers,
 enable them on the interface.
 .It Fl nomap
 If the driver supports unmapped network buffers,
 disable them on the interface.
 .It Cm wol , wol_ucast , wol_mcast , wol_magic
 Enable Wake On Lan (WOL) support, if available.
 WOL is a facility whereby a machine in a low power state may be woken
 in response to a received packet.
 There are three types of packets that may wake a system:
 ucast (directed solely to the machine's mac address),
 mcast (directed to a broadcast or multicast address),
 or
 magic (unicast or multicast frames with a ``magic contents'').
 Not all devices support WOL, those that do indicate the mechanisms
 they support in their capabilities.
 .Cm wol
 is a synonym for enabling all available WOL mechanisms.
 To disable WOL use
 .Fl wol .
 .It Cm vlanmtu , vlanhwtag , vlanhwfilter , vlanhwcsum , vlanhwtso
 If the driver offers user-configurable VLAN support, enable
 reception of extended frames, tag processing in hardware,
 frame filtering in hardware, checksum offloading, or TSO on VLAN,
 respectively.
 Note that this must be configured on a physical interface associated with
 .Xr vlan 4 ,
 not on a
 .Xr vlan 4
 interface itself.
 .It Fl vlanmtu , vlanhwtag, vlanhwfilter, vlanhwtso
 If the driver offers user-configurable VLAN support, disable
 reception of extended frames, tag processing in hardware,
 frame filtering in hardware, or TSO on VLAN,
 respectively.
 .It Cm vxlanhwcsum , vxlanhwtso
 If the driver offers user-configurable VXLAN support, enable inner checksum
 offloading (receive and transmit) or TSO on VXLAN, respectively.
 Note that this must be configured on a physical interface associated with
 .Xr vxlan 4 ,
 not on a
 .Xr vxlan 4
 interface itself.
 The physical interface is either the interface specified as the vxlandev
 or the interface hosting the vxlanlocal address.
 The driver will offload as much checksum work and TSO as it can reliably
 support, the exact level of offloading may vary between drivers.
 .It Fl vxlanhwcsum , vxlanhwtso
 If the driver offers user-configurable VXLAN support, disable checksum
 offloading (receive and transmit) or TSO on VXLAN, respectively.
 .It Cm vnet Ar jail
 Move the interface to the
 .Xr jail 8 ,
 specified by name or JID.
 If the jail has a virtual network stack, the interface will disappear
 from the current environment and become visible to the jail.
 .It Fl vnet Ar jail
 Reclaim the interface from the
 .Xr jail 8 ,
 specified by name or JID.
 If the jail has a virtual network stack, the interface will disappear
 from the jail, and become visible to the current network environment.
 .It Cm polling
 Turn on
 .Xr polling 4
 feature and disable interrupts on the interface, if driver supports
 this mode.
 .It Fl polling
 Turn off
 .Xr polling 4
 feature and enable interrupt mode on the interface.
 .It Cm create
 Create the specified network pseudo-device.
 If the interface is given without a unit number, try to create a new
 device with an arbitrary unit number.
 If creation of an arbitrary device is successful, the new device name is
 printed to standard output unless the interface is renamed or destroyed
 in the same
 .Nm
 invocation.
 .It Cm destroy
 Destroy the specified network pseudo-device.
 .It Cm plumb
 Another name for the
 .Cm create
 parameter.
 Included for
 .Tn Solaris
 compatibility.
 .It Cm unplumb
 Another name for the
 .Cm destroy
 parameter.
 Included for
 .Tn Solaris
 compatibility.
 .It Cm metric Ar n
 Set the routing metric of the interface to
 .Ar n ,
 default 0.
 The routing metric is used by the routing protocol
 .Pq Xr routed 8 .
 Higher metrics have the effect of making a route
 less favorable; metrics are counted as additional hops
 to the destination network or host.
 .It Cm mtu Ar n
 Set the maximum transmission unit of the interface to
 .Ar n ,
 default is interface specific.
 The MTU is used to limit the size of packets that are transmitted on an
 interface.
 Not all interfaces support setting the MTU, and some interfaces have
 range restrictions.
 .It Cm netmask Ar mask
 .\" (Inet and ISO.)
 (Inet only.)
 Specify how much of the address to reserve for subdividing
 networks into sub-networks.
 The mask includes the network part of the local address
 and the subnet part, which is taken from the host field of the address.
 The mask can be specified as a single hexadecimal number
 with a leading
 .Ql 0x ,
 with a dot-notation Internet address,
 or with a pseudo-network name listed in the network table
 .Xr networks 5 .
 The mask contains 1's for the bit positions in the 32-bit address
 which are to be used for the network and subnet parts,
 and 0's for the host part.
 The mask should contain at least the standard network portion,
 and the subnet field should be contiguous with the network
 portion.
 .Pp
 The netmask can also be specified in CIDR notation after the address.
 See the
 .Ar address
 option above for more information.
 .It Cm prefixlen Ar len
 (Inet6 only.)
 Specify that
 .Ar len
 bits are reserved for subdividing networks into sub-networks.
 The
 .Ar len
 must be integer, and for syntactical reason it must be between 0 to 128.
 It is almost always 64 under the current IPv6 assignment rule.
 If the parameter is omitted, 64 is used.
 .Pp
 The prefix can also be specified using the slash notation after the address.
 See the
 .Ar address
 option above for more information.
 .It Cm remove
 Another name for the
 .Fl alias
 parameter.
 Introduced for compatibility
 with
 .Bsx .
 .Sm off
 .It Cm link Op Cm 0 No - Cm 2
 .Sm on
 Enable special processing of the link level of the interface.
 These three options are interface specific in actual effect, however,
 they are in general used to select special modes of operation.
 An example
 of this is to enable SLIP compression, or to select the connector type
 for some Ethernet cards.
 Refer to the man page for the specific driver
 for more information.
 .Sm off
 .It Fl link Op Cm 0 No - Cm 2
 .Sm on
 Disable special processing at the link level with the specified interface.
 .It Cm monitor
 Put the interface in monitor mode.
 No packets are transmitted, and received packets are discarded after
 .Xr bpf 4
 processing.
 .It Fl monitor
 Take the interface out of monitor mode.
 .It Cm pcp Ar priority_code_point
 Priority code point
 .Pq Dv PCP
 is an 3-bit field which refers to the IEEE 802.1p
 class of service and maps to the frame priority level.
 .It Fl pcp
 Stop tagging packets on the interface w/ the priority code point.
 .It Cm up
 Mark an interface
 .Dq up .
 This may be used to enable an interface after an
 .Dq Nm Cm down .
 It happens automatically when setting the first address on an interface.
 If the interface was reset when previously marked down,
 the hardware will be re-initialized.
 .El
 .Pp
 The following parameters are for ICMPv6 Neighbor Discovery Protocol.
 Note that the address family keyword
 .Dq Li inet6
 is needed for them:
 .Bl -tag -width indent
 .It Cm accept_rtadv
 Set a flag to enable accepting ICMPv6 Router Advertisement messages.
 The
 .Xr sysctl 8
 variable
 .Va net.inet6.ip6.accept_rtadv
 controls whether this flag is set by default or not.
 .It Cm -accept_rtadv
 Clear a flag
 .Cm accept_rtadv .
 .It Cm no_radr
 Set a flag to control whether routers from which the system accepts
 Router Advertisement messages will be added to the Default Router List
 or not.
 When the
 .Cm accept_rtadv
 flag is disabled, this flag has no effect.
 The
 .Xr sysctl 8
 variable
 .Va net.inet6.ip6.no_radr
 controls whether this flag is set by default or not.
 .It Cm -no_radr
 Clear a flag
 .Cm no_radr .
 .It Cm auto_linklocal
 Set a flag to perform automatic link-local address configuration when
 the interface becomes available.
 The
 .Xr sysctl 8
 variable
 .Va net.inet6.ip6.auto_linklocal
 controls whether this flag is set by default or not.
 .It Cm -auto_linklocal
 Clear a flag
 .Cm auto_linklocal .
 .It Cm defaultif
 Set the specified interface as the default route when there is no
 default router.
 .It Cm -defaultif
 Clear a flag
 .Cm defaultif .
 .It Cm ifdisabled
 Set a flag to disable all of IPv6 network communications on the
 specified interface.
 Note that if there are already configured IPv6
 addresses on that interface, all of them are marked as
 .Dq tentative
 and DAD will be performed when this flag is cleared.
 .It Cm -ifdisabled
 Clear a flag
 .Cm ifdisabled .
 When this flag is cleared and
 .Cm auto_linklocal
 flag is enabled, automatic configuration of a link-local address is
 performed.
 .It Cm nud
 Set a flag to enable Neighbor Unreachability Detection.
 .It Cm -nud
 Clear a flag
 .Cm nud .
 .It Cm no_prefer_iface
 Set a flag to not honor rule 5 of source address selection in RFC 3484.
 In practice this means the address on the outgoing interface will not be
 preferred, effectively yielding the decision to the address selection
 policy table, configurable with
 .Xr ip6addrctl 8 .
 .It Cm -no_prefer_iface
 Clear a flag
 .Cm no_prefer_iface .
 .It Cm no_dad
 Set a flag to disable Duplicate Address Detection.
 .It Cm -no_dad
 Clear a flag
 .Cm no_dad .
 .El
 .Pp
 The following parameters are specific for IPv6 addresses.
 Note that the address family keyword
 .Dq Li inet6
 is needed for them:
 .Bl -tag -width indent
 .It Cm autoconf
 Set the IPv6 autoconfigured address bit.
 .It Fl autoconf
 Clear the IPv6 autoconfigured address bit.
 .It Cm deprecated
 Set the IPv6 deprecated address bit.
 .It Fl deprecated
 Clear the IPv6 deprecated address bit.
 .It Cm pltime Ar n
 Set preferred lifetime for the address.
 .It Cm prefer_source
 Set a flag to prefer address as a candidate of the source address for
 outgoing packets.
 .It Cm -prefer_source
 Clear a flag
 .Cm prefer_source .
 .It Cm vltime Ar n
 Set valid lifetime for the address.
 .El
 .Pp
 The following parameters are specific to cloning
 IEEE 802.11 wireless interfaces with the
 .Cm create
 request:
 .Bl -tag -width indent
 .It Cm wlandev Ar device
 Use
 .Ar device
 as the parent for the cloned device.
 .It Cm wlanmode Ar mode
 Specify the operating mode for this cloned device.
 .Ar mode
 is one of
 .Cm sta ,
 .Cm ahdemo
 (or
 .Cm adhoc-demo ) ,
 .Cm ibss ,
 (or
 .Cm adhoc ) ,
 .Cm ap ,
 (or
 .Cm hostap ) ,
 .Cm wds ,
 .Cm tdma ,
 .Cm mesh ,
 and
 .Cm monitor .
 The operating mode of a cloned interface cannot be changed.
 The
 .Cm tdma
 mode is actually implemented as an
 .Cm adhoc-demo
 interface with special properties.
 .It Cm wlanbssid Ar bssid
 The 802.11 mac address to use for the bssid.
 This must be specified at create time for a legacy
 .Cm wds
 device.
 .It Cm wlanaddr Ar address
 The local mac address.
 If this is not specified then a mac address will automatically be assigned
 to the cloned device.
 Typically this address is the same as the address of the parent device
 but if the
 .Cm bssid
 parameter is specified then the driver will craft a unique address for
 the device (if supported).
 .It Cm wdslegacy
 Mark a
 .Cm wds
 device as operating in ``legacy mode''.
 Legacy
 .Cm wds
 devices have a fixed peer relationship and do not, for example, roam
 if their peer stops communicating.
 For completeness a Dynamic WDS (DWDS) interface may marked as
 .Fl wdslegacy .
 .It Cm bssid
 Request a unique local mac address for the cloned device.
 This is only possible if the device supports multiple mac addresses.
 To force use of the parent's mac address use
 .Fl bssid .
 .It Cm beacons
 Mark the cloned interface as depending on hardware support to
 track received beacons.
 To have beacons tracked in software use
 .Fl beacons .
 For
 .Cm hostap
 mode
 .Fl beacons
 can also be used to indicate no beacons should
 be transmitted; this can be useful when creating a WDS configuration but
 .Cm wds
 interfaces can only be created as companions to an access point.
 .El
 .Pp
 The following parameters are specific to IEEE 802.11 wireless interfaces
 cloned with a
 .Cm create
 operation:
 .Bl -tag -width indent
 .It Cm ampdu
 Enable sending and receiving AMPDU frames when using 802.11n (default).
 The 802.11n specification states a compliant station must be capable
 of receiving AMPDU frames but transmission is optional.
 Use
 .Fl ampdu
 to disable all use of AMPDU with 802.11n.
 For testing and/or to work around interoperability problems one can use
 .Cm ampdutx
 and
 .Cm ampdurx
 to control use of AMPDU in one direction.
 .It Cm ampdudensity Ar density
 Set the AMPDU density parameter used when operating with 802.11n.
 This parameter controls the inter-packet gap for AMPDU frames.
 The sending device normally controls this setting but a receiving station
 may request wider gaps.
 Legal values for
 .Ar density
 are 0, .25, .5, 1, 2, 4, 8, and 16 (microseconds).
 A value of
 .Cm -
 is treated the same as 0.
 .It Cm ampdulimit Ar limit
 Set the limit on packet size for receiving AMPDU frames when operating
 with 802.11n.
 Legal values for
 .Ar limit
 are 8192, 16384, 32768, and 65536 but one can also specify
 just the unique prefix: 8, 16, 32, 64.
 Note the sender may limit the size of AMPDU frames to be less
 than the maximum specified by the receiving station.
 .It Cm amsdu
 Enable sending and receiving AMSDU frames when using 802.11n.
 By default AMSDU is received but not transmitted.
 Use
 .Fl amsdu
 to disable all use of AMSDU with 802.11n.
 For testing and/or to work around interoperability problems one can use
 .Cm amsdutx
 and
 .Cm amsdurx
 to control use of AMSDU in one direction.
 .It Cm amsdulimit Ar limit
 Set the limit on packet size for sending and receiving AMSDU frames
 when operating with 802.11n.
 Legal values for
 .Ar limit
 are 7935 and 3839 (bytes).
 Note the sender may limit the size of AMSDU frames to be less
 than the maximum specified by the receiving station.
 Note also that devices are not required to support the 7935 limit,
 only 3839 is required by the specification and the larger value
 may require more memory to be dedicated to support functionality
 that is rarely used.
 .It Cm apbridge
 When operating as an access point, pass packets between
 wireless clients directly (default).
 To instead let them pass up through the
 system and be forwarded using some other mechanism, use
 .Fl apbridge .
 Disabling the internal bridging
 is useful when traffic is to be processed with
 packet filtering.
 .It Cm authmode Ar mode
 Set the desired authentication mode in infrastructure mode.
 Not all adapters support all modes.
 The set of
 valid modes is
 .Cm none , open , shared
 (shared key),
 .Cm 8021x
 (IEEE 802.1x),
 and
 .Cm wpa
 (IEEE WPA/WPA2/802.11i).
 The
 .Cm 8021x
 and
 .Cm wpa
 modes are only useful when using an authentication service
 (a supplicant for client operation or an authenticator when
 operating as an access point).
 Modes are case insensitive.
 .It Cm bgscan
 Enable background scanning when operating as a station.
 Background scanning is a technique whereby a station associated to
 an access point will temporarily leave the channel to scan for
 neighboring stations.
 This allows a station to maintain a cache of nearby access points
 so that roaming between access points can be done without
 a lengthy scan operation.
 Background scanning is done only when a station is not busy and
 any outbound traffic will cancel a scan operation.
 Background scanning should never cause packets to be lost though
 there may be some small latency if outbound traffic interrupts a
 scan operation.
 By default background scanning is enabled if the device is capable.
 To disable background scanning, use
 .Fl bgscan .
 Background scanning is controlled by the
 .Cm bgscanidle
 and
 .Cm bgscanintvl
 parameters.
 Background scanning must be enabled for roaming; this is an artifact
 of the current implementation and may not be required in the future.
 .It Cm bgscanidle Ar idletime
 Set the minimum time a station must be idle (not transmitting or
 receiving frames) before a background scan is initiated.
 The
 .Ar idletime
 parameter is specified in milliseconds.
 By default a station must be idle at least 250 milliseconds before
 a background scan is initiated.
 The idle time may not be set to less than 100 milliseconds.
 .It Cm bgscanintvl Ar interval
 Set the interval at which background scanning is attempted.
 The
 .Ar interval
 parameter is specified in seconds.
 By default a background scan is considered every 300 seconds (5 minutes).
 The
 .Ar interval
 may not be set to less than 15 seconds.
 .It Cm bintval Ar interval
 Set the interval at which beacon frames are sent when operating in
 ad-hoc or ap mode.
 The
 .Ar interval
 parameter is specified in TU's (1024 usecs).
 By default beacon frames are transmitted every 100 TU's.
 .It Cm bmissthreshold Ar count
 Set the number of consecutive missed beacons at which the station
 will attempt to roam (i.e., search for a new access point).
 The
 .Ar count
 parameter must be in the range 1 to 255; though the
 upper bound may be reduced according to device capabilities.
 The default threshold is 7 consecutive missed beacons; but
 this may be overridden by the device driver.
 Another name for the
 .Cm bmissthreshold
 parameter is
 .Cm bmiss .
 .It Cm bssid Ar address
 Specify the MAC address of the access point to use when operating
 as a station in a BSS network.
 This overrides any automatic selection done by the system.
 To disable a previously selected access point, supply
 .Cm any , none ,
 or
 .Cm -
 for the address.
 This option is useful when more than one access point uses the same SSID.
 Another name for the
 .Cm bssid
 parameter is
 .Cm ap .
 .It Cm burst
 Enable packet bursting.
 Packet bursting is a transmission technique whereby the wireless
 medium is acquired once to send multiple frames and the interframe
 spacing is reduced.
 This technique can significantly increase throughput by reducing
 transmission overhead.
 Packet bursting is supported by the 802.11e QoS specification
 and some devices that do not support QoS may still be capable.
 By default packet bursting is enabled if a device is capable
 of doing it.
 To disable packet bursting, use
 .Fl burst .
 .It Cm chanlist Ar channels
 Set the desired channels to use when scanning for access
 points, neighbors in an IBSS network, or looking for unoccupied
 channels when operating as an access point.
 The set of channels is specified as a comma-separated list with
 each element in the list representing either a single channel number or a range
 of the form
 .Dq Li a-b .
 Channel numbers must be in the range 1 to 255 and be permissible
 according to the operating characteristics of the device.
 .It Cm channel Ar number
 Set a single desired channel.
 Channels range from 1 to 255, but the exact selection available
 depends on the region your adaptor was manufactured for.
 Setting
 the channel to
 .Li any ,
 or
 .Cm -
 will clear any desired channel and, if the device is marked up,
 force a scan for a channel to operate on.
 Alternatively the frequency, in megahertz, may be specified
 instead of the channel number.
 .Pp
 When there are several ways to use a channel the channel
 number/frequency may be appended with attributes to clarify.
 For example, if a device is capable of operating on channel 6
 with 802.11n and 802.11g then one can specify that g-only use
 should be used by specifying ``6:g''.
 Similarly the channel width can be specified by appending it
 with ``/''; e.g., ``6/40'' specifies a 40MHz wide channel,
 These attributes can be combined as in: ``6:ht/40''.
 The full set of flags specified following a ``:'' are:
 .Cm a
 (802.11a),
 .Cm b
 (802.11b),
 .Cm d
 (Atheros Dynamic Turbo mode),
 .Cm g
 (802.11g),
 .Cm h
 or
 .Cm n
 (802.11n aka HT),
 .Cm s
 (Atheros Static Turbo mode),
 and
 .Cm t
 (Atheros Dynamic Turbo mode, or appended to ``st'' and ``dt'').
 The full set of channel widths following a '/' are:
 .Cm 5
 (5MHz aka quarter-rate channel),
 .Cm 10
 (10MHz aka half-rate channel),
 .Cm 20
 (20MHz mostly for use in specifying ht20),
 and
 .Cm 40
 (40MHz mostly for use in specifying ht40).
 In addition,
 a 40MHz HT channel specification may include the location
 of the extension channel by appending ``+'' or ``-'' for above and below,
 respectively; e.g., ``2437:ht/40+'' specifies 40MHz wide HT operation
 with the center channel at frequency 2437 and the extension channel above.
 .It Cm country Ar name
 Set the country code to use in calculating the regulatory constraints
 for operation.
 In particular the set of available channels, how the wireless device
 will operation on the channels, and the maximum transmit power that
 can be used on a channel are defined by this setting.
 Country/Region codes are specified as a 2-character abbreviation
 defined by ISO 3166 or using a longer, but possibly ambiguous, spelling;
 e.g., "ES" and "Spain".
 The set of country codes are taken from
 .Pa /etc/regdomain.xml
 and can also
 be viewed with the ``list countries'' request.
 Note that not all devices support changing the country code from a default
 setting; typically stored in EEPROM.
 See also
 .Cm regdomain ,
 .Cm indoor ,
 .Cm outdoor ,
 and
 .Cm anywhere .
 .It Cm dfs
 Enable Dynamic Frequency Selection (DFS) as specified in 802.11h.
 DFS embodies several facilities including detection of overlapping
 radar signals, dynamic transmit power control, and channel selection
 according to a least-congested criteria.
 DFS support is mandatory for some 5GHz frequencies in certain
 locales (e.g., ETSI).
 By default DFS is enabled according to the regulatory definitions
 specified in
 .Pa /etc/regdomain.xml
 and the current country code, regdomain,
 and channel.
 Note the underlying device (and driver) must support radar detection
 for full DFS support to work.
 To be fully compliant with the local regulatory agency frequencies that
 require DFS should not be used unless it is fully supported.
 Use
 .Fl dfs
 to disable this functionality for testing.
 .It Cm dotd
 Enable support for the 802.11d specification (default).
 When this support is enabled in station mode, beacon frames that advertise
 a country code different than the currently configured country code will
 cause an event to be dispatched to user applications.
 This event can be used by the station to adopt that country code and
 operate according to the associated regulatory constraints.
 When operating as an access point with 802.11d enabled the beacon and
 probe response frames transmitted will advertise the current regulatory
 domain settings.
 To disable 802.11d use
 .Fl dotd .
 .It Cm doth
 Enable 802.11h support including spectrum management.
 When 802.11h is enabled beacon and probe response frames will have
 the SpectrumMgt bit set in the capabilities field and
 country and power constraint information elements will be present.
 802.11h support also includes handling Channel Switch Announcements (CSA)
 which are a mechanism to coordinate channel changes by an access point.
 By default 802.11h is enabled if the device is capable.
 To disable 802.11h use
 .Fl doth .
 .It Cm deftxkey Ar index
 Set the default key to use for transmission.
 Typically this is only set when using WEP encryption.
 Note that you must set a default transmit key
 for the system to know which key to use in encrypting outbound traffic.
 The
 .Cm weptxkey
 is an alias for this request; it is provided for backwards compatibility.
 .It Cm dtimperiod Ar period
 Set the
 DTIM
 period for transmitting buffered multicast data frames when
 operating in ap mode.
 The
 .Ar period
 specifies the number of beacon intervals between DTIM
 and must be in the range 1 to 15.
 By default DTIM is 1 (i.e., DTIM occurs at each beacon).
 .It Cm quiet
 Enable the use of quiet IE.
 Hostap will use this to silence other
 stations to reduce interference for radar detection when
 operating on 5GHz frequency and doth support is enabled.
 Use
 .Fl quiet
 to disable this functionality.
 .It Cm quiet_period Ar period
 Set the QUIET
 .Ar period
 to the number of beacon intervals between the start of regularly
 scheduled quiet intervals defined by Quiet element.
 .It Cm quiet_count Ar count
 Set the QUIET
 .Ar count
 to the number of TBTTs until the beacon interval during which the
 next quiet interval shall start.
 A value of 1 indicates the quiet
 interval will start during the beacon interval starting at the next
 TBTT.
 A value 0 is reserved.
 .It Cm quiet_offset Ar offset
 Set the QUIET
 .Ar offset
 to the offset of the start of the quiet interval from the TBTT
 specified by the Quiet count, expressed in TUs.
 The value of the
 .Ar offset
 shall be less than one beacon interval.
 .It Cm quiet_duration Ar dur
 Set the QUIET
 .Ar dur
 to the duration of the Quiet interval, expressed in TUs.
 The value should be less than beacon interval.
 .It Cm dturbo
 Enable the use of Atheros Dynamic Turbo mode when communicating with
 another Dynamic Turbo-capable station.
 Dynamic Turbo mode is an Atheros-specific mechanism by which
 stations switch between normal 802.11 operation and a ``boosted''
 mode in which a 40MHz wide channel is used for communication.
 Stations using Dynamic Turbo mode operate boosted only when the
 channel is free of non-dturbo stations; when a non-dturbo station
 is identified on the channel all stations will automatically drop
 back to normal operation.
 By default, Dynamic Turbo mode is not enabled, even if the device is capable.
 Note that turbo mode (dynamic or static) is only allowed on some
 channels depending on the regulatory constraints; use the
 .Cm list chan
 command to identify the channels where turbo mode may be used.
 To disable Dynamic Turbo mode use
 .Fl dturbo .
 .It Cm dwds
 Enable Dynamic WDS (DWDS) support.
 DWDS is a facility by which 4-address traffic can be carried between
 stations operating in infrastructure mode.
 A station first associates to an access point and authenticates using
 normal procedures (e.g., WPA).
 Then 4-address frames are passed to carry traffic for stations
 operating on either side of the wireless link.
 DWDS extends the normal WDS mechanism by leveraging existing security
 protocols and eliminating static binding.
 .Pp
 When DWDS is enabled on an access point 4-address frames received from
 an authorized station will generate a ``DWDS discovery'' event to user
 applications.
 This event should be used to create a WDS interface that is bound
 to the remote station (and usually plumbed into a bridge).
 Once the WDS interface is up and running 4-address traffic then logically
 flows through that interface.
 .Pp
 When DWDS is enabled on a station, traffic with a destination address
 different from the peer station are encapsulated in a 4-address frame
 and transmitted to the peer.
 All 4-address traffic uses the security information of the stations
 (e.g., cryptographic keys).
 A station is associated using 802.11n facilities may transport
 4-address traffic using these same mechanisms; this depends on available
 resources and capabilities of the device.
 The DWDS implementation guards against layer 2 routing loops of
 multicast traffic.
 .It Cm ff
 Enable the use of Atheros Fast Frames when communicating with
 another Fast Frames-capable station.
 Fast Frames are an encapsulation technique by which two 802.3
 frames are transmitted in a single 802.11 frame.
 This can noticeably improve throughput but requires that the
 receiving station understand how to decapsulate the frame.
 Fast frame use is negotiated using the Atheros 802.11 vendor-specific
 protocol extension so enabling use is safe when communicating with
 non-Atheros devices.
 By default, use of fast frames is enabled if the device is capable.
 To explicitly disable fast frames, use
 .Fl ff .
 .It Cm fragthreshold Ar length
 Set the threshold for which transmitted frames are broken into fragments.
 The
 .Ar length
 argument is the frame size in bytes and must be in the range 256 to 2346.
 Setting
 .Ar length
 to
 .Li 2346 ,
 .Cm any ,
 or
 .Cm -
 disables transmit fragmentation.
 Not all adapters honor the fragmentation threshold.
 .It Cm hidessid
 When operating as an access point, do not broadcast the SSID
 in beacon frames or respond to probe request frames unless
 they are directed to the ap (i.e., they include the ap's SSID).
 By default, the SSID is included in beacon frames and
 undirected probe request frames are answered.
 To re-enable the broadcast of the SSID etc., use
 .Fl hidessid .
 .It Cm ht
 Enable use of High Throughput (HT) when using 802.11n (default).
 The 802.11n specification includes mechanisms for operation
 on 20MHz and 40MHz wide channels using different signalling mechanisms
 than specified in 802.11b, 802.11g, and 802.11a.
 Stations negotiate use of these facilities, termed HT20 and HT40,
 when they associate.
 To disable all use of 802.11n use
 .Fl ht .
 To disable use of HT20 (e.g., to force only HT40 use) use
 .Fl ht20 .
 To disable use of HT40 use
 .Fl ht40 .
 .Pp
 HT configuration is used to ``auto promote'' operation
 when several choices are available.
 For example, if a station associates to an 11n-capable access point
 it controls whether the station uses legacy operation, HT20, or HT40.
 When an 11n-capable device is setup as an access point and
 Auto Channel Selection is used to locate a channel to operate on,
 HT configuration controls whether legacy, HT20, or HT40 operation is setup
 on the selected channel.
 If a fixed channel is specified for a station then HT configuration can
 be given as part of the channel specification; e.g., 6:ht/20 to setup
 HT20 operation on channel 6.
 .It Cm htcompat
 Enable use of compatibility support for pre-802.11n devices (default).
 The 802.11n protocol specification went through several incompatible iterations.
 Some vendors implemented 11n support to older specifications that
 will not interoperate with a purely 11n-compliant station.
 In particular the information elements included in management frames
 for old devices are different.
 When compatibility support is enabled both standard and compatible data
 will be provided.
 Stations that associate using the compatibility mechanisms are flagged
 in ``list sta''.
 To disable compatibility support use
 .Fl htcompat .
 .It Cm htprotmode Ar technique
 For interfaces operating in 802.11n, use the specified
 .Ar technique
 for protecting HT frames in a mixed legacy/HT network.
 The set of valid techniques is
 .Cm off ,
 and
 .Cm rts
 (RTS/CTS, default).
 Technique names are case insensitive.
 .It Cm inact
 Enable inactivity processing for stations associated to an
 access point (default).
 When operating as an access point the 802.11 layer monitors
 the activity of each associated station.
 When a station is inactive for 5 minutes it will send several
 ``probe frames'' to see if the station is still present.
 If no response is received then the station is deauthenticated.
 Applications that prefer to handle this work can disable this
 facility by using
 .Fl inact .
 .It Cm indoor
 Set the location to use in calculating regulatory constraints.
 The location is also advertised in beacon and probe response frames
 when 802.11d is enabled with
 .Cm dotd .
 See also
 .Cm outdoor ,
 .Cm anywhere ,
 .Cm country ,
 and
 .Cm regdomain .
 .It Cm list active
 Display the list of channels available for use taking into account
 any restrictions set with the
 .Cm chanlist
 directive.
 See the description of
 .Cm list chan
 for more information.
 .It Cm list caps
 Display the adaptor's capabilities, including the operating
 modes supported.
 .It Cm list chan
 Display the list of channels available for use.
 Channels are shown with their IEEE channel number, equivalent
 frequency, and usage modes.
 Channels identified as
 .Ql 11g
 are also usable in
 .Ql 11b
 mode.
 Channels identified as
 .Ql 11a Turbo
 may be used only for Atheros' Static Turbo mode
 (specified with
 . Cm mediaopt turbo ) .
 Channels marked with a
 .Ql *
 have a regulatory constraint that they be passively scanned.
 This means a station is not permitted to transmit on the channel until
 it identifies the channel is being used for 802.11 communication;
 typically by hearing a beacon frame from an access point operating
 on the channel.
 .Cm list freq
 is another way of requesting this information.
 By default a compacted list of channels is displayed; if the
 .Fl v
 option is specified then all channels are shown.
 .It Cm list countries
 Display the set of country codes and regulatory domains that can be
 used in regulatory configuration.
 .It Cm list mac
 Display the current MAC Access Control List state.
 Each address is prefixed with a character that indicates the
 current policy applied to it:
 .Ql +
 indicates the address is allowed access,
 .Ql -
 indicates the address is denied access,
 .Ql *
 indicates the address is present but the current policy open
 (so the ACL is not consulted).
 .It Cm list mesh
 Displays the mesh routing table, used for forwarding packets on a mesh
 network.
 .It Cm list regdomain
 Display the current regulatory settings including the available channels
 and transmit power caps.
 .It Cm list roam
 Display the parameters that govern roaming operation.
 .It Cm list txparam
 Display the parameters that govern transmit operation.
 .It Cm list txpower
 Display the transmit power caps for each channel.
 .It Cm list scan
 Display the access points and/or ad-hoc neighbors
 located in the vicinity.
 This information may be updated automatically by the adapter
 with a
 .Cm scan
 request or through background scanning.
 Depending on the capabilities of the stations the following
 flags can be included in the output:
 .Bl -tag -width 3n
 .It Li A
 Channel agility.
 .It Li B
 PBCC modulation.
 .It Li C
 Poll request capability.
 .It Li D
 DSSS/OFDM capability.
 .It Li E
 Extended Service Set (ESS).
 .It Li I
 Independent Basic Service Set (IBSS).
 .It Li P
 Privacy capability.
 The station requires authentication.
 .It Li R
 Robust Secure Network (RSN).
 .It Li S
 Short Preamble.
 Indicates that the station is doing short preamble to optionally
 improve throughput performance with 802.11g and 802.11b.
 .It Li c
 Pollable capability.
 .It Li s
 Short slot time capability.
 .El
 .Pp
 By default interesting information elements captured from the neighboring
 stations are displayed at the end of each row.
 Possible elements include:
 .Cm WME
 (station supports WME),
 .Cm WPA
 (station supports WPA),
 .Cm WPS
 (station supports WPS),
 .Cm RSN
 (station supports 802.11i/RSN),
 .Cm HTCAP
 (station supports 802.11n/HT communication),
 .Cm ATH
 (station supports Atheros protocol extensions),
 .Cm VEN
 (station supports unknown vendor-specific extensions).
 If the
 .Fl v
 flag is used all the information elements and their
 contents will be shown.
 Specifying the
 .Fl v
 flag also enables display of long SSIDs.
 The
 .Cm list ap
 command is another way of requesting this information.
 .It Cm list sta
 When operating as an access point display the stations that are
 currently associated.
 When operating in ad-hoc mode display stations identified as
 neighbors in the IBSS.
 When operating in mesh mode display stations identified as
 neighbors in the MBSS.
 When operating in station mode display the access point.
 Capabilities advertised by the stations are described under
 the
 .Cm scan
 request.
 The following flags can be included in the output:
 .Bl -tag -width 3n
 .It Li A
 Authorized.
 Indicates that the station is permitted to send/receive data frames.
 .It Li E
 Extended Rate Phy (ERP).
 Indicates that the station is operating in an 802.11g network
 using extended transmit rates.
 .It Li H
 High Throughput (HT).
 Indicates that the station is using HT transmit rates.
 If a
 .Sq Li +
 follows immediately after then the station associated
 using deprecated mechanisms supported only when
 .Cm htcompat
 is enabled.
 .It Li P
 Power Save.
 Indicates that the station is operating in power save mode.
 .It Li Q
 Quality of Service (QoS).
 Indicates that the station is using QoS encapsulation for
 data frame.
 QoS encapsulation is enabled only when WME mode is enabled.
 .It Li S
 Short GI in HT 40MHz mode enabled.
 If a
 .Sq Li +
 follows immediately after then short GI in HT 20MHz mode is enabled as well.
 .It Li T
 Transitional Security Network (TSN).
 Indicates that the station associated using TSN; see also
 .Cm tsn
 below.
 .It Li W
 Wi-Fi Protected Setup (WPS).
 Indicates that the station associated using WPS.
 .It Li s
 Short GI in HT 20MHz mode enabled.
 .El
 .Pp
 By default information elements received from associated stations
 are displayed in a short form; the
 .Fl v
 flag causes this information to be displayed symbolically.
 .It Cm list wme
 Display the current channel parameters to use when operating in WME mode.
 If the
 .Fl v
 option is specified then both channel and BSS parameters are displayed
 for each AC (first channel, then BSS).
 When WME mode is enabled for an adaptor this information will be
 displayed with the regular status; this command is mostly useful
 for examining parameters when WME mode is disabled.
 See the description of the
 .Cm wme
 directive for information on the various parameters.
 .It Cm maxretry Ar count
 Set the maximum number of tries to use in sending unicast frames.
 The default setting is 6 but drivers may override this with a value
 they choose.
 .It Cm mcastrate Ar rate
 Set the rate for transmitting multicast/broadcast frames.
 Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s.
 This rate should be valid for the current operating conditions;
 if an invalid rate is specified drivers are free to chose an
 appropriate rate.
 .It Cm mgtrate Ar rate
 Set the rate for transmitting management and/or control frames.
 Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s.
 .It Cm outdoor
 Set the location to use in calculating regulatory constraints.
 The location is also advertised in beacon and probe response frames
 when 802.11d is enabled with
 .Cm dotd .
 See also
 .Cm anywhere ,
 .Cm country ,
 .Cm indoor ,
 and
 .Cm regdomain .
 .It Cm powersave
 Enable powersave operation.
 When operating as a client, the station will conserve power by
 periodically turning off the radio and listening for
 messages from the access point telling it there are packets waiting.
 The station must then retrieve the packets.
 Not all devices support power save operation as a client.
 The 802.11 specification requires that all access points support
 power save but some drivers do not.
 Use
 .Fl powersave
 to disable powersave operation when operating as a client.
 .It Cm powersavesleep Ar sleep
 Set the desired max powersave sleep time in TU's (1024 usecs).
 By default the max powersave sleep time is 100 TU's.
 .It Cm protmode Ar technique
 For interfaces operating in 802.11g, use the specified
 .Ar technique
 for protecting OFDM frames in a mixed 11b/11g network.
 The set of valid techniques is
 .Cm off , cts
 (CTS to self),
 and
 .Cm rtscts
 (RTS/CTS).
 Technique names are case insensitive.
 Not all devices support
 .Cm cts
 as a protection technique.
 .It Cm pureg
 When operating as an access point in 802.11g mode allow only
 11g-capable stations to associate (11b-only stations are not
 permitted to associate).
 To allow both 11g and 11b-only stations to associate, use
 .Fl pureg .
 .It Cm puren
 When operating as an access point in 802.11n mode allow only
 HT-capable stations to associate (legacy stations are not
 permitted to associate).
 To allow both HT and legacy stations to associate, use
 .Fl puren .
 .It Cm regdomain Ar sku
 Set the regulatory domain to use in calculating the regulatory constraints
 for operation.
 In particular the set of available channels, how the wireless device
 will operation on the channels, and the maximum transmit power that
 can be used on a channel are defined by this setting.
 Regdomain codes (SKU's) are taken from
 .Pa /etc/regdomain.xml
 and can also
 be viewed with the ``list countries'' request.
 Note that not all devices support changing the regdomain from a default
 setting; typically stored in EEPROM.
 See also
 .Cm country ,
 .Cm indoor ,
 .Cm outdoor ,
 and
 .Cm anywhere .
 .It Cm rifs
 Enable use of Reduced InterFrame Spacing (RIFS) when operating in 802.11n
 on an HT channel.
 Note that RIFS must be supported by both the station and access point
 for it to be used.
 To disable RIFS use
 .Fl rifs .
 .It Cm roam:rate Ar rate
 Set the threshold for controlling roaming when operating in a BSS.
 The
 .Ar rate
 parameter specifies the transmit rate in megabits
 at which roaming should be considered.
 If the current transmit rate drops below this setting and background scanning
 is enabled, then the system will check if a more desirable access point is
 available and switch over to it.
 The current scan cache contents are used if they are considered
 valid according to the
 .Cm scanvalid
 parameter; otherwise a background scan operation is triggered before
 any selection occurs.
 Each channel type has a separate rate threshold; the default values are:
 12 Mb/s (11a), 2 Mb/s (11b), 2 Mb/s (11g), MCS 1 (11na, 11ng).
 .It Cm roam:rssi Ar rssi
 Set the threshold for controlling roaming when operating in a BSS.
 The
 .Ar rssi
 parameter specifies the receive signal strength in dBm units
 at which roaming should be considered.
 If the current rssi drops below this setting and background scanning
 is enabled, then the system will check if a more desirable access point is
 available and switch over to it.
 The current scan cache contents are used if they are considered
 valid according to the
 .Cm scanvalid
 parameter; otherwise a background scan operation is triggered before
 any selection occurs.
 Each channel type has a separate rssi threshold; the default values are
 all 7 dBm.
 .It Cm roaming Ar mode
 When operating as a station, control how the system will
 behave when communication with the current access point
 is broken.
 The
 .Ar mode
 argument may be one of
 .Cm device
 (leave it to the hardware device to decide),
 .Cm auto
 (handle either in the device or the operating system\[em]as appropriate),
 .Cm manual
 (do nothing until explicitly instructed).
 By default, the device is left to handle this if it is
 capable; otherwise, the operating system will automatically
 attempt to reestablish communication.
 Manual mode is used by applications such as
 .Xr wpa_supplicant 8
 that want to control the selection of an access point.
 .It Cm rtsthreshold Ar length
 Set the threshold for which
 transmitted frames are preceded by transmission of an
 RTS
 control frame.
 The
 .Ar length
 argument
 is the frame size in bytes and must be in the range 1 to 2346.
 Setting
 .Ar length
 to
 .Li 2346 ,
 .Cm any ,
 or
 .Cm -
 disables transmission of RTS frames.
 Not all adapters support setting the RTS threshold.
 .It Cm scan
 Initiate a scan of neighboring stations, wait for it to complete, and
 display all stations found.
 Only the super-user can initiate a scan.
 See
 .Cm list scan
 for information on the display.
 By default a background scan is done; otherwise a foreground
 scan is done and the station may roam to a different access point.
 The
 .Cm list scan
 request can be used to show recent scan results without
 initiating a new scan.
 .It Cm scanvalid Ar threshold
 Set the maximum time the scan cache contents are considered valid;
 i.e., will be used without first triggering a scan operation to
 refresh the data.
 The
 .Ar threshold
 parameter is specified in seconds and defaults to 60 seconds.
 The minimum setting for
 .Ar threshold
 is 10 seconds.
 One should take care setting this threshold; if it is set too low
 then attempts to roam to another access point may trigger unnecessary
 background scan operations.
 .It Cm shortgi
 Enable use of Short Guard Interval when operating in 802.11n
 on an HT channel.
 NB: this currently enables Short GI on both HT40 and HT20 channels.
 To disable Short GI use
 .Fl shortgi .
 .It Cm smps
 Enable use of Static Spatial Multiplexing Power Save (SMPS)
 when operating in 802.11n.
 A station operating with Static SMPS maintains only a single
 receive chain active (this can significantly reduce power consumption).
 To disable SMPS use
 .Fl smps .
 .It Cm smpsdyn
 Enable use of Dynamic Spatial Multiplexing Power Save (SMPS)
 when operating in 802.11n.
 A station operating with Dynamic SMPS maintains only a single
 receive chain active but switches to multiple receive chains when it
 receives an RTS frame (this can significantly reduce power consumption).
 Note that stations cannot distinguish between RTS/CTS intended to
 enable multiple receive chains and those used for other purposes.
 To disable SMPS use
 .Fl smps .
 .It Cm ssid Ar ssid
 Set the desired Service Set Identifier (aka network name).
 The SSID is a string up to 32 characters
 in length and may be specified as either a normal string or in
 hexadecimal when preceded by
 .Ql 0x .
 Additionally, the SSID may be cleared by setting it to
 .Ql - .
 .It Cm tdmaslot Ar slot
 When operating with TDMA, use the specified
 .Ar slot
 configuration.
 The
 .Ar slot
 is a number between 0 and the maximum number of slots in the BSS.
 Note that a station configured as slot 0 is a master and
 will broadcast beacon frames advertising the BSS;
 stations configured to use other slots will always
 scan to locate a master before they ever transmit.
 By default
 .Cm tdmaslot
 is set to 1.
 .It Cm tdmaslotcnt Ar cnt
 When operating with TDMA, setup a BSS with
 .Ar cnt
 slots.
 The slot count may be at most 8.
 The current implementation is only tested with two stations
 (i.e., point to point applications).
 This setting is only meaningful when a station is configured as slot 0;
 other stations adopt this setting from the BSS they join.
 By default
 .Cm tdmaslotcnt
 is set to 2.
 .It Cm tdmaslotlen Ar len
 When operating with TDMA, setup a BSS such that each station has a slot
 .Ar len
 microseconds long.
 The slot length must be at least 150 microseconds (1/8 TU)
 and no more than 65 milliseconds.
 Note that setting too small a slot length may result in poor channel
 bandwidth utilization due to factors such as timer granularity and
 guard time.
 This setting is only meaningful when a station is configured as slot 0;
 other stations adopt this setting from the BSS they join.
 By default
 .Cm tdmaslotlen
 is set to 10 milliseconds.
 .It Cm tdmabintval Ar intval
 When operating with TDMA, setup a BSS such that beacons are transmitted every
 .Ar intval
 superframes to synchronize the TDMA slot timing.
 A superframe is defined as the number of slots times the slot length; e.g.,
 a BSS with two slots of 10 milliseconds has a 20 millisecond superframe.
 The beacon interval may not be zero.
 A lower setting of
 .Cm tdmabintval
 causes the timers to be resynchronized more often; this can be help if
 significant timer drift is observed.
 By default
 .Cm tdmabintval
 is set to 5.
 .It Cm tsn
 When operating as an access point with WPA/802.11i allow legacy
 stations to associate using static key WEP and open authentication.
 To disallow legacy station use of WEP, use
 .Fl tsn .
 .It Cm txpower Ar power
 Set the power used to transmit frames.
 The
 .Ar power
 argument is specified in .5 dBm units.
 Out of range values are truncated.
 Typically only a few discreet power settings are available and
 the driver will use the setting closest to the specified value.
 Not all adapters support changing the transmit power.
 .It Cm ucastrate Ar rate
 Set a fixed rate for transmitting unicast frames.
 Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s.
 This rate should be valid for the current operating conditions;
 if an invalid rate is specified drivers are free to chose an
 appropriate rate.
 .It Cm wepmode Ar mode
 Set the desired WEP mode.
 Not all adapters support all modes.
 The set of valid modes is
 .Cm off , on ,
 and
 .Cm mixed .
 The
 .Cm mixed
 mode explicitly tells the adaptor to allow association with access
 points which allow both encrypted and unencrypted traffic.
 On these adapters,
 .Cm on
 means that the access point must only allow encrypted connections.
 On other adapters,
 .Cm on
 is generally another name for
 .Cm mixed .
 Modes are case insensitive.
 .It Cm weptxkey Ar index
 Set the WEP key to be used for transmission.
 This is the same as setting the default transmission key with
 .Cm deftxkey .
 .It Cm wepkey Ar key Ns | Ns Ar index : Ns Ar key
 Set the selected WEP key.
 If an
 .Ar index
 is not given, key 1 is set.
 A WEP key will be either 5 or 13
 characters (40 or 104 bits) depending on the local network and the
 capabilities of the adaptor.
 It may be specified either as a plain
 string or as a string of hexadecimal digits preceded by
 .Ql 0x .
 For maximum portability, hex keys are recommended;
 the mapping of text keys to WEP encryption is usually driver-specific.
 In particular, the
 .Tn Windows
 drivers do this mapping differently to
 .Fx .
 A key may be cleared by setting it to
 .Ql - .
 If WEP is supported then there are at least four keys.
 Some adapters support more than four keys.
 If that is the case, then the first four keys
 (1-4) will be the standard temporary keys and any others will be adaptor
 specific keys such as permanent keys stored in NVRAM.
 .Pp
 Note that you must set a default transmit key with
 .Cm deftxkey
 for the system to know which key to use in encrypting outbound traffic.
 .It Cm wme
 Enable Wireless Multimedia Extensions (WME) support, if available,
 for the specified interface.
 WME is a subset of the IEEE 802.11e standard to support the
 efficient communication of realtime and multimedia data.
 To disable WME support, use
 .Fl wme .
 Another name for this parameter is
 .Cm wmm .
 .Pp
 The following parameters are meaningful only when WME support is in use.
 Parameters are specified per-AC (Access Category) and
 split into those that are used by a station when acting
 as an access point and those for client stations in the BSS.
 The latter are received from the access point and may not be changed
 (at the station).
 The following Access Categories are recognized:
 .Pp
 .Bl -tag -width ".Cm AC_BK" -compact
 .It Cm AC_BE
 (or
 .Cm BE )
 best effort delivery,
 .It Cm AC_BK
 (or
 .Cm BK )
 background traffic,
 .It Cm AC_VI
 (or
 .Cm VI )
 video traffic,
 .It Cm AC_VO
 (or
 .Cm VO )
 voice traffic.
 .El
 .Pp
 AC parameters are case-insensitive.
 Traffic classification is done in the operating system using the
 vlan priority associated with data frames or the
 ToS (Type of Service) indication in IP-encapsulated frames.
 If neither information is present, traffic is assigned to the
 Best Effort (BE) category.
 .Bl -tag -width indent
 .It Cm ack Ar ac
 Set the ACK policy for QoS transmissions by the local station;
 this controls whether or not data frames transmitted by a station
 require an ACK response from the receiving station.
 To disable waiting for an ACK use
 .Fl ack .
 This parameter is applied only to the local station.
 .It Cm acm Ar ac
 Enable the Admission Control Mandatory (ACM) mechanism
 for transmissions by the local station.
 To disable the ACM use
 .Fl acm .
 On stations in a BSS this parameter is read-only and indicates
 the setting received from the access point.
 NB: ACM is not supported right now.
 .It Cm aifs Ar ac Ar count
 Set the Arbitration Inter Frame Spacing (AIFS)
 channel access parameter to use for transmissions
 by the local station.
 On stations in a BSS this parameter is read-only and indicates
 the setting received from the access point.
 .It Cm cwmin Ar ac Ar count
 Set the CWmin channel access parameter to use for transmissions
 by the local station.
 On stations in a BSS this parameter is read-only and indicates
 the setting received from the access point.
 .It Cm cwmax Ar ac Ar count
 Set the CWmax channel access parameter to use for transmissions
 by the local station.
 On stations in a BSS this parameter is read-only and indicates
 the setting received from the access point.
 .It Cm txoplimit Ar ac Ar limit
 Set the Transmission Opportunity Limit channel access parameter
 to use for transmissions by the local station.
 This parameter defines an interval of time when a WME station
 has the right to initiate transmissions onto the wireless medium.
 On stations in a BSS this parameter is read-only and indicates
 the setting received from the access point.
 .It Cm bss:aifs Ar ac Ar count
 Set the AIFS channel access parameter to send to stations in a BSS.
 This parameter is meaningful only when operating in ap mode.
 .It Cm bss:cwmin Ar ac Ar count
 Set the CWmin channel access parameter to send to stations in a BSS.
 This parameter is meaningful only when operating in ap mode.
 .It Cm bss:cwmax Ar ac Ar count
 Set the CWmax channel access parameter to send to stations in a BSS.
 This parameter is meaningful only when operating in ap mode.
 .It Cm bss:txoplimit Ar ac Ar limit
 Set the TxOpLimit channel access parameter to send to stations in a BSS.
 This parameter is meaningful only when operating in ap mode.
 .El
 .It Cm wps
 Enable Wireless Privacy Subscriber support.
 Note that WPS support requires a WPS-capable supplicant.
 To disable this function use
 .Fl wps .
 .El
 .Pp
 The following parameters support an optional access control list
 feature available with some adapters when operating in ap mode; see
 .Xr wlan_acl 4 .
 This facility allows an access point to accept/deny association
 requests based on the MAC address of the station.
 Note that this feature does not significantly enhance security
 as MAC address spoofing is easy to do.
 .Bl -tag -width indent
 .It Cm mac:add Ar address
 Add the specified MAC address to the database.
 Depending on the policy setting association requests from the
 specified station will be allowed or denied.
 .It Cm mac:allow
 Set the ACL policy to permit association only by
 stations registered in the database.
 .It Cm mac:del Ar address
 Delete the specified MAC address from the database.
 .It Cm mac:deny
 Set the ACL policy to deny association only by
 stations registered in the database.
 .It Cm mac:kick Ar address
 Force the specified station to be deauthenticated.
 This typically is done to block a station after updating the
 address database.
 .It Cm mac:open
 Set the ACL policy to allow all stations to associate.
 .It Cm mac:flush
 Delete all entries in the database.
 .It Cm mac:radius
 Set the ACL policy to permit association only by
 stations approved by a RADIUS server.
 Note that this feature requires the
 .Xr hostapd 8
 program be configured to do the right thing
 as it handles the RADIUS processing
 (and marks stations as authorized).
 .El
 .Pp
 The following parameters are related to a wireless interface operating in mesh
 mode:
 .Bl -tag -width indent
 .It Cm meshid Ar meshid
 Set the desired Mesh Identifier.
 The Mesh ID is a string up to 32 characters in length.
 A mesh interface must have a Mesh Identifier specified
 to reach an operational state.
 .It Cm meshttl Ar ttl
 Set the desired ``time to live'' for mesh forwarded packets;
 this is the number of hops a packet may be forwarded before
 it is discarded.
 The default setting for
 .Cm meshttl
 is 31.
 .It Cm meshpeering
 Enable or disable peering with neighbor mesh stations.
 Stations must peer before any data packets can be exchanged.
 By default
 .Cm meshpeering
 is enabled.
 .It Cm meshforward
 Enable or disable forwarding packets by a mesh interface.
 By default
 .Cm meshforward
 is enabled.
 .It Cm meshgate
 This attribute specifies whether or not the mesh STA activates mesh gate
 announcements.
 By default
 .Cm meshgate
 is disabled.
 .It Cm meshmetric Ar protocol
 Set the specified
 .Ar protocol
 as the link metric protocol used on a mesh network.
 The default protocol is called
 .Ar AIRTIME .
 The mesh interface will restart after changing this setting.
 .It Cm meshpath Ar protocol
 Set the specified
 .Ar protocol
 as the path selection protocol used on a mesh network.
 The only available protocol at the moment is called
 .Ar HWMP
 (Hybrid Wireless Mesh Protocol).
 The mesh interface will restart after changing this setting.
 .It Cm hwmprootmode Ar mode
 Stations on a mesh network can operate as ``root nodes.''
 Root nodes try to find paths to all mesh nodes and advertise themselves
 regularly.
 When there is a root mesh node on a network, other mesh nodes can setup
 paths between themselves faster because they can use the root node
 to find the destination.
 This path may not be the best, but on-demand
 routing will eventually find the best path.
 The following modes are recognized:
 .Pp
 .Bl -tag -width ".Cm PROACTIVE" -compact
 .It Cm DISABLED
 Disable root mode.
 .It Cm NORMAL
 Send broadcast path requests every two seconds.
 Nodes on the mesh without a path to this root mesh station with try to
 discover a path to us.
 .It Cm PROACTIVE
 Send broadcast path requests every two seconds and every node must reply
 with a path reply even if it already has a path to this root mesh station.
 .It Cm RANN
 Send broadcast root announcement (RANN) frames.
 Nodes on the mesh without a path to this root mesh station with try to
 discover a path to us.
 .El
 By default
 .Cm hwmprootmode
 is set to
 .Ar DISABLED .
 .It Cm hwmpmaxhops Ar cnt
 Set the maximum number of hops allowed in an HMWP path to
 .Ar cnt .
 The default setting for
 .Cm hwmpmaxhops
 is 31.
 .El
 .Pp
 The following parameters are for compatibility with other systems:
 .Bl -tag -width indent
 .It Cm nwid Ar ssid
 Another name for the
 .Cm ssid
 parameter.
 Included for
 .Nx
 compatibility.
 .It Cm stationname Ar name
 Set the name of this station.
 The station name is not part of the IEEE 802.11
 protocol though some interfaces support it.
 As such it only
 seems to be meaningful to identical or virtually identical equipment.
 Setting the station name is identical in syntax to setting the SSID.
 One can also use
 .Cm station
 for
 .Bsx
 compatibility.
 .It Cm wep
 Another way of saying
 .Cm wepmode on .
 Included for
 .Bsx
 compatibility.
 .It Fl wep
 Another way of saying
 .Cm wepmode off .
 Included for
 .Bsx
 compatibility.
 .It Cm nwkey key
 Another way of saying:
 .Dq Li "wepmode on weptxkey 1 wepkey 1:key wepkey 2:- wepkey 3:- wepkey 4:-" .
 Included for
 .Nx
 compatibility.
 .It Cm nwkey Xo
 .Sm off
 .Ar n : k1 , k2 , k3 , k4
 .Sm on
 .Xc
 Another way of saying
 .Dq Li "wepmode on weptxkey n wepkey 1:k1 wepkey 2:k2 wepkey 3:k3 wepkey 4:k4" .
 Included for
 .Nx
 compatibility.
 .It Fl nwkey
 Another way of saying
 .Cm wepmode off .
 Included for
 .Nx
 compatibility.
 .El
 .Pp
 The following parameters are specific to bridge interfaces:
 .Bl -tag -width indent
 .It Cm addm Ar interface
 Add the interface named by
 .Ar interface
 as a member of the bridge.
 The interface is put into promiscuous mode
 so that it can receive every packet sent on the network.
 .It Cm deletem Ar interface
 Remove the interface named by
 .Ar interface
 from the bridge.
 Promiscuous mode is disabled on the interface when
 it is removed from the bridge.
 .It Cm maxaddr Ar size
 Set the size of the bridge address cache to
 .Ar size .
 The default is 2000 entries.
 .It Cm timeout Ar seconds
 Set the timeout of address cache entries to
 .Ar seconds
 seconds.
 If
 .Ar seconds
 is zero, then address cache entries will not be expired.
 The default is 1200 seconds.
 .It Cm addr
 Display the addresses that have been learned by the bridge.
 .It Cm static Ar interface-name Ar address
 Add a static entry into the address cache pointing to
 .Ar interface-name .
 Static entries are never aged out of the cache or re-placed, even if the
 address is seen on a different interface.
 .It Cm deladdr Ar address
 Delete
 .Ar address
 from the address cache.
 .It Cm flush
 Delete all dynamically-learned addresses from the address cache.
 .It Cm flushall
 Delete all addresses, including static addresses, from the address cache.
 .It Cm discover Ar interface
 Mark an interface as a
 .Dq discovering
 interface.
 When the bridge has no address cache entry
 (either dynamic or static)
 for the destination address of a packet,
 the bridge will forward the packet to all
 member interfaces marked as
 .Dq discovering .
 This is the default for all interfaces added to a bridge.
 .It Cm -discover Ar interface
 Clear the
 .Dq discovering
 attribute on a member interface.
 For packets without the
 .Dq discovering
 attribute, the only packets forwarded on the interface are broadcast
 or multicast packets and packets for which the destination address
 is known to be on the interface's segment.
 .It Cm learn Ar interface
 Mark an interface as a
 .Dq learning
 interface.
 When a packet arrives on such an interface, the source
 address of the packet is entered into the address cache as being a
 destination address on the interface's segment.
 This is the default for all interfaces added to a bridge.
 .It Cm -learn Ar interface
 Clear the
 .Dq learning
 attribute on a member interface.
 .It Cm sticky Ar interface
 Mark an interface as a
 .Dq sticky
 interface.
 Dynamically learned address entries are treated at static once entered into
 the cache.
 Sticky entries are never aged out of the cache or replaced, even if the
 address is seen on a different interface.
 .It Cm -sticky Ar interface
 Clear the
 .Dq sticky
 attribute on a member interface.
 .It Cm private Ar interface
 Mark an interface as a
 .Dq private
 interface.
 A private interface does not forward any traffic to any other port that is also
 a private interface.
 .It Cm -private Ar interface
 Clear the
 .Dq private
 attribute on a member interface.
 .It Cm span Ar interface
 Add the interface named by
 .Ar interface
 as a span port on the bridge.
 Span ports transmit a copy of every frame received by the bridge.
 This is most useful for snooping a bridged network passively on
 another host connected to one of the span ports of the bridge.
 .It Cm -span Ar interface
 Delete the interface named by
 .Ar interface
 from the list of span ports of the bridge.
 .It Cm stp Ar interface
 Enable Spanning Tree protocol on
 .Ar interface .
 The
 .Xr if_bridge 4
 driver has support for the IEEE 802.1D Spanning Tree protocol (STP).
 Spanning Tree is used to detect and remove loops in a network topology.
 .It Cm -stp Ar interface
 Disable Spanning Tree protocol on
 .Ar interface .
 This is the default for all interfaces added to a bridge.
 .It Cm edge Ar interface
 Set
 .Ar interface
 as an edge port.
 An edge port connects directly to end stations cannot create bridging
 loops in the network, this allows it to transition straight to forwarding.
 .It Cm -edge Ar interface
 Disable edge status on
 .Ar interface .
 .It Cm autoedge Ar interface
 Allow
 .Ar interface
 to automatically detect edge status.
 This is the default for all interfaces added to a bridge.
 .It Cm -autoedge Ar interface
 Disable automatic edge status on
 .Ar interface .
 .It Cm ptp Ar interface
 Set the
 .Ar interface
 as a point to point link.
 This is required for straight transitions to forwarding and
 should be enabled on a direct link to another RSTP capable switch.
 .It Cm -ptp Ar interface
 Disable point to point link status on
 .Ar interface .
 This should be disabled for a half duplex link and for an interface
 connected to a shared network segment,
 like a hub or a wireless network.
 .It Cm autoptp Ar interface
 Automatically detect the point to point status on
 .Ar interface
 by checking the full duplex link status.
 This is the default for interfaces added to the bridge.
 .It Cm -autoptp Ar interface
 Disable automatic point to point link detection on
 .Ar interface .
 .It Cm maxage Ar seconds
 Set the time that a Spanning Tree protocol configuration is valid.
 The default is 20 seconds.
 The minimum is 6 seconds and the maximum is 40 seconds.
 .It Cm fwddelay Ar seconds
 Set the time that must pass before an interface begins forwarding
 packets when Spanning Tree is enabled.
 The default is 15 seconds.
 The minimum is 4 seconds and the maximum is 30 seconds.
 .It Cm hellotime Ar seconds
 Set the time between broadcasting of Spanning Tree protocol
 configuration messages.
 The hello time may only be changed when operating in legacy stp mode.
 The default is 2 seconds.
 The minimum is 1 second and the maximum is 2 seconds.
 .It Cm priority Ar value
 Set the bridge priority for Spanning Tree.
 The default is 32768.
 The minimum is 0 and the maximum is 61440.
 .It Cm proto Ar value
 Set the Spanning Tree protocol.
 The default is rstp.
 The available options are stp and rstp.
 .It Cm holdcnt Ar value
 Set the transmit hold count for Spanning Tree.
 This is the number of packets transmitted before being rate limited.
 The default is 6.
 The minimum is 1 and the maximum is 10.
 .It Cm ifpriority Ar interface Ar value
 Set the Spanning Tree priority of
 .Ar interface
 to
 .Ar value .
 The default is 128.
 The minimum is 0 and the maximum is 240.
 .It Cm ifpathcost Ar interface Ar value
 Set the Spanning Tree path cost of
 .Ar interface
 to
 .Ar value .
 The default is calculated from the link speed.
 To change a previously selected path cost back to automatic, set the
 cost to 0.
 The minimum is 1 and the maximum is 200000000.
 .It Cm ifmaxaddr Ar interface Ar size
 Set the maximum number of hosts allowed from an interface, packets with unknown
 source addresses are dropped until an existing host cache entry expires or is
 removed.
 Set to 0 to disable.
 .El
 .Pp
 The following parameters are specific to lagg interfaces:
 .Bl -tag -width indent
+.It Cm laggtype Ar type
+When creating a lagg interface the type can be specified as either
+.Cm ethernet
+or
+.Cm infiniband .
+If not specified ethernet is the default lagg type.
 .It Cm laggport Ar interface
 Add the interface named by
 .Ar interface
 as a port of the aggregation interface.
 .It Cm -laggport Ar interface
 Remove the interface named by
 .Ar interface
 from the aggregation interface.
 .It Cm laggproto Ar proto
 Set the aggregation protocol.
 The default is
 .Li failover .
 The available options are
 .Li failover ,
 .Li lacp ,
 .Li loadbalance ,
 .Li roundrobin ,
 .Li broadcast
 and
 .Li none .
 .It Cm lagghash Ar option Ns Oo , Ns Ar option Oc
 Set the packet layers to hash for aggregation protocols which load balance.
 The default is
 .Dq l2,l3,l4 .
 The options can be combined using commas.
 .Pp
 .Bl -tag -width ".Cm l2" -compact
 .It Cm l2
 src/dst mac address and optional vlan number.
 .It Cm l3
 src/dst address for IPv4 or IPv6.
 .It Cm l4
 src/dst port for TCP/UDP/SCTP.
 .El
 .It Cm -use_flowid
 Enable local hash computation for RSS hash on the interface.
 The
 .Li loadbalance
 and
 .Li lacp
 modes will use the RSS hash from the network card if available
 to avoid computing one, this may give poor traffic distribution
 if the hash is invalid or uses less of the protocol header information.
 .Cm -use_flowid
 disables use of RSS hash from the network card.
 The default value can be set via the
 .Va net.link.lagg.default_use_flowid
 .Xr sysctl 8
 variable.
 .Li 0
 means
 .Dq disabled
 and
 .Li 1
 means
 .Dq enabled .
 .It Cm use_flowid
 Use the RSS hash from the network card if available.
 .It Cm flowid_shift Ar number
 Set a shift parameter for RSS local hash computation.
 Hash is calculated by using flowid bits in a packet header mbuf
 which are shifted by the number of this parameter.
 .It Cm use_numa
 Enable selection of egress ports based on the native
 .Xr NUMA 4
 domain for the packets being transmitted.
 This is currently only implemented for lacp mode.
 This works only on
 .Xr NUMA 4
 hardware, running a kernel compiled with the
 .Xr NUMA 4
 option, and when interfaces from multiple
 .Xr NUMA 4
 domains are ports of the aggregation interface.
 .It Cm -use_numa
 Disable selection of egress ports based on the native
 .Xr NUMA 4
 domain for the packets being transmitted.
 .It Cm lacp_fast_timeout
 Enable lacp fast-timeout on the interface.
 .It Cm -lacp_fast_timeout
 Disable lacp fast-timeout on the interface.
 .It Cm lacp_strict
 Enable lacp strict compliance on the interface.
 The default value can be set via the
 .Va net.link.lagg.lacp.default_strict_mode
 .Xr sysctl 8
 variable.
 .Li 0
 means
 .Dq disabled
 and
 .Li 1
 means
 .Dq enabled .
 .It Cm -lacp_strict
 Disable lacp strict compliance on the interface.
 .It Cm rr_limit Ar number
 Configure a stride for an interface in round-robin mode.
 The default stride is 1.
 .El
 .Pp
 The following parameters apply to IP tunnel interfaces,
 .Xr gif 4 :
 .Bl -tag -width indent
 .It Cm tunnel Ar src_addr dest_addr
 Configure the physical source and destination address for IP tunnel
 interfaces.
 The arguments
 .Ar src_addr
 and
 .Ar dest_addr
 are interpreted as the outer source/destination for the encapsulating
 IPv4/IPv6 header.
 .It Fl tunnel
 Unconfigure the physical source and destination address for IP tunnel
 interfaces previously configured with
 .Cm tunnel .
 .It Cm deletetunnel
 Another name for the
 .Fl tunnel
 parameter.
 .It Cm accept_rev_ethip_ver
 Set a flag to accept both correct EtherIP packets and ones
 with reversed version field.
 Enabled by default.
 This is for backward compatibility with
 .Fx 6.1 ,
 6.2, 6.3, 7.0, and 7.1.
 .It Cm -accept_rev_ethip_ver
 Clear a flag
 .Cm accept_rev_ethip_ver .
 .It Cm ignore_source
 Set a flag to accept encapsulated packets destined to this host
 independently from source address.
 This may be useful for hosts, that receive encapsulated packets
 from the load balancers.
 .It Cm -ignore_source
 Clear a flag
 .Cm ignore_source .
 .It Cm send_rev_ethip_ver
 Set a flag to send EtherIP packets with reversed version
 field intentionally.
 Disabled by default.
 This is for backward compatibility with
 .Fx 6.1 ,
 6.2, 6.3, 7.0, and 7.1.
 .It Cm -send_rev_ethip_ver
 Clear a flag
 .Cm send_rev_ethip_ver .
 .El
 .Pp
 The following parameters apply to GRE tunnel interfaces,
 .Xr gre 4 :
 .Bl -tag -width indent
 .It Cm tunnel Ar src_addr dest_addr
 Configure the physical source and destination address for GRE tunnel
 interfaces.
 The arguments
 .Ar src_addr
 and
 .Ar dest_addr
 are interpreted as the outer source/destination for the encapsulating
 IPv4/IPv6 header.
 .It Fl tunnel
 Unconfigure the physical source and destination address for GRE tunnel
 interfaces previously configured with
 .Cm tunnel .
 .It Cm deletetunnel
 Another name for the
 .Fl tunnel
 parameter.
 .It Cm grekey Ar key
 Configure the GRE key to be used for outgoing packets.
 Note that
 .Xr gre 4 will always accept GRE packets with invalid or absent keys.
 This command will result in a four byte MTU reduction on the interface.
 .El
 .Pp
 The following parameters are specific to
 .Xr pfsync 4
 interfaces:
 .Bl -tag -width indent
 .It Cm syncdev Ar iface
 Use the specified interface
 to send and receive pfsync state synchronisation messages.
 .It Fl syncdev
 Stop sending pfsync state synchronisation messages over the network.
 .It Cm syncpeer Ar peer_address
 Make the pfsync link point-to-point rather than using
 multicast to broadcast the state synchronisation messages.
 The peer_address is the IP address of the other host taking part in
 the pfsync cluster.
 .It Fl syncpeer
 Broadcast the packets using multicast.
 .It Cm maxupd Ar n
 Set the maximum number of updates for a single state which
 can be collapsed into one.
 This is an 8-bit number; the default value is 128.
 .It Cm defer
 Defer transmission of the first packet in a state until a peer has
 acknowledged that the associated state has been inserted.
 .It Fl defer
 Do not defer the first packet in a state.
 This is the default.
 .El
 .Pp
 The following parameters are specific to
 .Xr vlan 4
 interfaces:
 .Bl -tag -width indent
 .It Cm vlan Ar vlan_tag
 Set the VLAN tag value to
 .Ar vlan_tag .
 This value is a 12-bit VLAN Identifier (VID) which is used to create an 802.1Q
 or 802.1ad VLAN header for packets sent from the
 .Xr vlan 4
 interface.
 Note that
 .Cm vlan
 and
 .Cm vlandev
 must both be set at the same time.
 .It Cm vlanproto Ar vlan_proto
 Set the VLAN encapsulation protocol to
 .Ar vlan_proto .
 Supported encapsulation protocols are currently
 .Dq 802.1Q
 and
 .Dq 802.1ad .
 The default encapsulation protocol is
 .Dq 802.1Q .
 .It Cm vlanpcp Ar priority_code_point
 Priority code point
 .Pq Dv PCP
 is an 3-bit field which refers to the IEEE 802.1p
 class of service and maps to the frame priority level.
 .Pp
 Values in order of priority are:
 .Cm 1
 .Pq Dv Background (lowest) ,
 .Cm 0
 .Pq Dv Best effort (default) ,
 .Cm 2
 .Pq Dv Excellent effort ,
 .Cm 3
 .Pq Dv Critical applications ,
 .Cm 4
 .Pq Dv Video, < 100ms latency ,
 .Cm 5
 .Pq Dv Video, < 10ms latency ,
 .Cm 6
 .Pq Dv Internetwork control ,
 .Cm 7
 .Pq Dv Network control (highest) .
 .It Cm vlandev Ar iface
 Associate the physical interface
 .Ar iface
 with a
 .Xr vlan 4
 interface.
 Packets transmitted through the
 .Xr vlan 4
 interface will be
 diverted to the specified physical interface
 .Ar iface
 with 802.1Q VLAN encapsulation.
 Packets with 802.1Q encapsulation received
 by the parent interface with the correct VLAN Identifier will be diverted to
 the associated
 .Xr vlan 4
 pseudo-interface.
 The
 .Xr vlan 4
 interface is assigned a
 copy of the parent interface's flags and the parent's Ethernet address.
 The
 .Cm vlandev
 and
 .Cm vlan
 must both be set at the same time.
 If the
 .Xr vlan 4
 interface already has
 a physical interface associated with it, this command will fail.
 To
 change the association to another physical interface, the existing
 association must be cleared first.
 .Pp
 Note: if the hardware tagging capability
 is set on the parent interface, the
 .Xr vlan 4
 pseudo
 interface's behavior changes:
 the
 .Xr vlan 4
 interface recognizes that the
 parent interface supports insertion and extraction of VLAN tags on its
 own (usually in firmware) and that it should pass packets to and from
 the parent unaltered.
 .It Fl vlandev Op Ar iface
 If the driver is a
 .Xr vlan 4
 pseudo device, disassociate the parent interface from it.
 This breaks the link between the
 .Xr vlan 4
 interface and its parent,
 clears its VLAN Identifier, flags and its link address and shuts the interface
 down.
 The
 .Ar iface
 argument is useless and hence deprecated.
 .El
 .Pp
 The following parameters are used to configure
 .Xr vxlan 4
 interfaces.
 .Bl -tag -width indent
 .It Cm vxlanid Ar identifier
 This value is a 24-bit VXLAN Network Identifier (VNI) that identifies the
 virtual network segment membership of the interface.
 .It Cm vxlanlocal Ar address
 The source address used in the encapsulating IPv4/IPv6 header.
 The address should already be assigned to an existing interface.
 When the interface is configured in unicast mode, the listening socket
 is bound to this address.
 .It Cm vxlanremote Ar address
 The interface can be configured in a unicast, or point-to-point, mode
 to create a tunnel between two hosts.
 This is the IP address of the remote end of the tunnel.
 .It Cm vxlangroup Ar address
 The interface can be configured in a multicast mode
 to create a virtual network of hosts.
 This is the IP multicast group address the interface will join.
 .It Cm vxlanlocalport Ar port
 The port number the interface will listen on.
 The default port number is 4789.
 .It Cm vxlanremoteport Ar port
 The destination port number used in the encapsulating IPv4/IPv6 header.
 The remote host should be listening on this port.
 The default port number is 4789.
 Note some other implementations, such as Linux,
 do not default to the IANA assigned port,
 but instead listen on port 8472.
 .It Cm vxlanportrange Ar low high
 The range of source ports used in the encapsulating IPv4/IPv6 header.
 The port selected within the range is based on a hash of the inner frame.
 A range is useful to provide entropy within the outer IP header
 for more effective load balancing.
 The default range is between the
 .Xr sysctl 8
 variables
 .Va net.inet.ip.portrange.first
 and
 .Va net.inet.ip.portrange.last
 .It Cm vxlantimeout Ar timeout
 The maximum time, in seconds, before an entry in the forwarding table
 is pruned.
 The default is 1200 seconds (20 minutes).
 .It Cm vxlanmaxaddr Ar max
 The maximum number of entries in the forwarding table.
 The default is 2000.
 .It Cm vxlandev Ar dev
 When the interface is configured in multicast mode, the
 .Cm dev
 interface is used to transmit IP multicast packets.
 .It Cm vxlanttl Ar ttl
 The TTL used in the encapsulating IPv4/IPv6 header.
 The default is 64.
 .It Cm vxlanlearn
 The source IP address and inner source Ethernet MAC address of
 received packets are used to dynamically populate the forwarding table.
 When in multicast mode, an entry in the forwarding table allows the
 interface to send the frame directly to the remote host instead of
 broadcasting the frame to the multicast group.
 This is the default.
 .It Fl vxlanlearn
 The forwarding table is not populated by received packets.
 .It Cm vxlanflush
 Delete all dynamically-learned addresses from the forwarding table.
 .It Cm vxlanflushall
 Delete all addresses, including static addresses, from the forwarding table.
 .El
 .Pp
 The following parameters are used to configure
 .Xr carp 4
 protocol on an interface:
 .Bl -tag -width indent
 .It Cm vhid Ar n
 Set the virtual host ID.
 This is a required setting to initiate
 .Xr carp 4 .
 If the virtual host ID does not exist yet, it is created and attached to the
 interface, otherwise configuration of an existing vhid is adjusted.
 If the
 .Cm vhid
 keyword is supplied along with an
 .Dq inet6
 or
 .Dq inet
 address, then this address is configured to be run under control of the
 specified vhid.
 Whenever a last address that refers to a particular vhid is removed from an
 interface, the vhid is automatically removed from interface and destroyed.
 Any other configuration parameters for the
 .Xr carp 4
 protocol should be supplied along with the
 .Cm vhid
 keyword.
 Acceptable values for vhid are 1 to 255.
 .It Cm advbase Ar seconds
 Specifies the base of the advertisement interval in seconds.
 The acceptable values are 1 to 255.
 The default value is 1.
 .It Cm advskew Ar interval
 Specifies the skew to add to the base advertisement interval to
 make one host advertise slower than another host.
 It is specified in 1/256 of seconds.
 The acceptable values are 1 to 254.
 The default value is 0.
 .It Cm pass Ar phrase
 Set the authentication key to
 .Ar phrase .
 .It Cm state Ar MASTER|BACKUP
 Forcibly change state of a given vhid.
 .El
 .Pp
 The
 .Nm
 utility displays the current configuration for a network interface
 when no optional parameters are supplied.
 If a protocol family is specified,
 .Nm
 will report only the details specific to that protocol family.
 .Pp
 If the
 .Fl m
 flag is passed before an interface name,
 .Nm
 will display the capability list and all
 of the supported media for the specified interface.
 If
 .Fl L
 flag is supplied, address lifetime is displayed for IPv6 addresses,
 as time offset string.
 .Pp
 Optionally, the
 .Fl a
 flag may be used instead of an interface name.
 This flag instructs
 .Nm
 to display information about all interfaces in the system.
 The
 .Fl d
 flag limits this to interfaces that are down,
 .Fl u
 limits this to interfaces that are up,
 .Fl g
 limits this to members of the specified group of interfaces, and
 .Fl G
 excludes members of the specified group from the list.
 Both
 .Fl g
 and
 .Fl G
 flags may be specified to apply both conditions.
 Only one option
 .Fl g
 should be specified as later override previous ones
 (same for
 .Fl G ) .
 .Sy groupname
 may contain shell patterns in which case it should be quoted.
 When no arguments are given,
 .Fl a
 is implied.
 .Pp
 The
 .Fl l
 flag may be used to list all available interfaces on the system, with
 no other additional information.
 If an
 .Ar address_family
 is specified, only interfaces of that type will be listed.
 .Fl l Dq ether
 will list only Ethernet adapters, excluding the loopback interface.
 Use of this flag is mutually exclusive
 with all other flags and commands, except for
 .Fl d
 (only list interfaces that are down)
 and
 .Fl u
 (only list interfaces that are up).
 .Pp
 The
 .Fl v
 flag may be used to get more verbose status for an interface.
 .Pp
 The
 .Fl C
 flag may be used to list all of the interface cloners available on
 the system, with no additional information.
 Use of this flag is mutually exclusive with all other flags and commands.
 .Pp
 The
 .Fl k
 flag causes keying information for the interface, if available, to be
 printed.
 For example, the values of 802.11 WEP keys and
 .Xr carp 4
 passphrases will be printed, if accessible to the current user.
 This information is not printed by default, as it may be considered
 sensitive.
 .Pp
 If the network interface driver is not present in the kernel then
 .Nm
 will attempt to load it.
 The
 .Fl n
 flag disables this behavior.
 .Pp
 Only the super-user may modify the configuration of a network interface.
 .Sh EXAMPLES
 Assign the IPv4 address
 .Li 192.0.2.10 ,
 with a network mask of
 .Li 255.255.255.0 ,
 to the interface
 .Li em0 :
 .Dl # ifconfig em0 inet 192.0.2.10 netmask 255.255.255.0
 .Pp
 Add the IPv4 address
 .Li 192.0.2.45 ,
 with the CIDR network prefix
 .Li /28 ,
 to the interface
 .Li em0 ,
 using
 .Cm add
 as a synonym for the canonical form of the option
 .Cm alias :
 .Dl # ifconfig em0 inet 192.0.2.45/28 add
 .Pp
 Remove the IPv4 address
 .Li 192.0.2.45
 from the interface
 .Li em0 :
 .Dl # ifconfig em0 inet 192.0.2.45 -alias
 .Pp
 Enable IPv6 functionality of the interface:
 .Dl # ifconfig em0 inet6 -ifdisabled
 .Pp
 Add the IPv6 address
 .Li 2001:DB8:DBDB::123/48
 to the interface
 .Li em0 :
 .Dl # ifconfig em0 inet6 2001:db8:bdbd::123 prefixlen 48 alias
 Note that lower case hexadecimal IPv6 addresses are acceptable.
 .Pp
 Remove the IPv6 address added in the above example,
 using the
 .Li /
 character as shorthand for the network prefix,
 and using
 .Cm delete
 as a synonym for the canonical form of the option
 .Fl alias :
 .Dl # ifconfig em0 inet6 2001:db8:bdbd::123/48 delete
 .Pp
 Configure a single CARP redundant address on igb0, and then switch it
 to be master:
 .Dl # ifconfig igb0 vhid 1 10.0.0.1/24 pass foobar up
 .Dl # ifconfig igb0 vhid 1 state master
 .Pp
 Configure the interface
 .Li xl0 ,
 to use 100baseTX, full duplex Ethernet media options:
 .Dl # ifconfig xl0 media 100baseTX mediaopt full-duplex
 .Pp
 Label the em0 interface as an uplink:
 .Dl # ifconfig em0 description \&"Uplink to Gigabit Switch 2\&"
 .Pp
 Create the software network interface
 .Li gif1 :
 .Dl # ifconfig gif1 create
 .Pp
 Destroy the software network interface
 .Li gif1 :
 .Dl # ifconfig gif1 destroy
 .Pp
 Display available wireless networks using
 .Li wlan0 :
 .Dl # ifconfig wlan0 list scan
 .Pp
 Display inet and inet6 address subnet masks in CIDR notation
 .Dl # ifconfig -f inet:cidr,inet6:cidr
 .Pp
 Display interfaces that are up with the exception of loopback
 .Dl # ifconfig -a -u -G lo
 .Sh DIAGNOSTICS
 Messages indicating the specified interface does not exist, the
 requested address is unknown, or the user is not privileged and
 tried to alter an interface's configuration.
 .Sh SEE ALSO
 .Xr netstat 1 ,
 .Xr carp 4 ,
 .Xr gif 4 ,
 .Xr netintro 4 ,
 .Xr pfsync 4 ,
 .Xr polling 4 ,
 .Xr vlan 4 ,
 .Xr vxlan 4 ,
 .Xr devd.conf 5 ,
 .\" .Xr eon 5 ,
 .Xr devd 8 ,
 .Xr jail 8 ,
 .Xr rc 8 ,
 .Xr routed 8 ,
 .Xr sysctl 8
 .Sh HISTORY
 The
 .Nm
 utility appeared in
 .Bx 4.2 .
 .Sh BUGS
 Basic IPv6 node operation requires a link-local address on each
 interface configured for IPv6.
 Normally, such an address is automatically configured by the
 kernel on each interface added to the system or enabled; this behavior may
 be disabled by setting per-interface flag
 .Cm -auto_linklocal .
 The default value of this flag is 1 and can be disabled by using the sysctl
 MIB variable
 .Va net.inet6.ip6.auto_linklocal .
 .Pp
 Do not configure IPv6 addresses with no link-local address by using
 .Nm .
 It can result in unexpected behaviors of the kernel.
Index: head/sbin/ifconfig/iflagg.c
===================================================================
--- head/sbin/ifconfig/iflagg.c	(revision 366932)
+++ head/sbin/ifconfig/iflagg.c	(revision 366933)
@@ -1,338 +1,367 @@
 /*-
  */
 
 #ifndef lint
 static const char rcsid[] =
   "$FreeBSD$";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <stdlib.h>
 #include <unistd.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_lagg.h>
 #include <net/ieee8023ad_lacp.h>
 #include <net/route.h>
 
 #include <ctype.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <err.h>
 #include <errno.h>
 
 #include "ifconfig.h"
 
-char lacpbuf[120];	/* LACP peer '[(a,a,a),(p,p,p)]' */
+static struct iflaggparam params = {
+	.lagg_type = LAGG_TYPE_DEFAULT,
+};
 
+static char lacpbuf[120];	/* LACP peer '[(a,a,a),(p,p,p)]' */
+
 static void
 setlaggport(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct lagg_reqport rp;
 
 	bzero(&rp, sizeof(rp));
 	strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname));
 	strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname));
 
 	/*
 	 * Do not exit with an error here.  Doing so permits a
 	 * failed NIC to take down an entire lagg.
 	 *
 	 * Don't error at all if the port is already in the lagg.
 	 */
 	if (ioctl(s, SIOCSLAGGPORT, &rp) && errno != EEXIST) {
 		warnx("%s %s: SIOCSLAGGPORT: %s",
 		    name, val, strerror(errno));
 		exit_code = 1;
 	}
 }
 
 static void
 unsetlaggport(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct lagg_reqport rp;
 
 	bzero(&rp, sizeof(rp));
 	strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname));
 	strlcpy(rp.rp_portname, val, sizeof(rp.rp_portname));
 
 	if (ioctl(s, SIOCSLAGGDELPORT, &rp))
 		err(1, "SIOCSLAGGDELPORT");
 }
 
 static void
 setlaggproto(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct lagg_protos lpr[] = LAGG_PROTOS;
 	struct lagg_reqall ra;
 	int i;
 
 	bzero(&ra, sizeof(ra));
 	ra.ra_proto = LAGG_PROTO_MAX;
 
 	for (i = 0; i < nitems(lpr); i++) {
 		if (strcmp(val, lpr[i].lpr_name) == 0) {
 			ra.ra_proto = lpr[i].lpr_proto;
 			break;
 		}
 	}
 	if (ra.ra_proto == LAGG_PROTO_MAX)
 		errx(1, "Invalid aggregation protocol: %s", val);
 
 	strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname));
 	if (ioctl(s, SIOCSLAGG, &ra) != 0)
 		err(1, "SIOCSLAGG");
 }
 
 static void
 setlaggflowidshift(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct lagg_reqopts ro;
 
 	bzero(&ro, sizeof(ro));
 	ro.ro_opts = LAGG_OPT_FLOWIDSHIFT;
 	strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname));
 	ro.ro_flowid_shift = (int)strtol(val, NULL, 10);
 	if (ro.ro_flowid_shift & ~LAGG_OPT_FLOWIDSHIFT_MASK)
 		errx(1, "Invalid flowid_shift option: %s", val);
 	
 	if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0)
 		err(1, "SIOCSLAGGOPTS");
 }
 
 static void
 setlaggrr_limit(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct lagg_reqopts ro;
 	
 	bzero(&ro, sizeof(ro));
 	strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname));
 	ro.ro_opts = LAGG_OPT_RR_LIMIT;
 	ro.ro_bkt = (uint32_t)strtoul(val, NULL, 10);
 	if (ro.ro_bkt == 0)
 		errx(1, "Invalid round-robin stride: %s", val);
 
 	if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0)
 		err(1, "SIOCSLAGGOPTS");
 }
 
 static void
 setlaggsetopt(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct lagg_reqopts ro;
 
 	bzero(&ro, sizeof(ro));
 	ro.ro_opts = d;
 	switch (ro.ro_opts) {
 	case LAGG_OPT_USE_FLOWID:
 	case -LAGG_OPT_USE_FLOWID:
 	case LAGG_OPT_USE_NUMA:
 	case -LAGG_OPT_USE_NUMA:
 	case LAGG_OPT_LACP_STRICT:
 	case -LAGG_OPT_LACP_STRICT:
 	case LAGG_OPT_LACP_TXTEST:
 	case -LAGG_OPT_LACP_TXTEST:
 	case LAGG_OPT_LACP_RXTEST:
 	case -LAGG_OPT_LACP_RXTEST:
 	case LAGG_OPT_LACP_FAST_TIMO:
 	case -LAGG_OPT_LACP_FAST_TIMO:
 		break;
 	default:
 		err(1, "Invalid lagg option");
 	}
 	strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname));
 	
 	if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0)
 		err(1, "SIOCSLAGGOPTS");
 }
 
 static void
 setlagghash(const char *val, int d, int s, const struct afswtch *afp)
 {
 	struct lagg_reqflags rf;
 	char *str, *tmp, *tok;
 
 
 	rf.rf_flags = 0;
 	str = tmp = strdup(val);
 	while ((tok = strsep(&tmp, ",")) != NULL) {
 		if (strcmp(tok, "l2") == 0)
 			rf.rf_flags |= LAGG_F_HASHL2;
 		else if (strcmp(tok, "l3") == 0)
 			rf.rf_flags |= LAGG_F_HASHL3;
 		else if (strcmp(tok, "l4") == 0)
 			rf.rf_flags |= LAGG_F_HASHL4;
 		else
 			errx(1, "Invalid lagghash option: %s", tok);
 	}
 	free(str);
 	if (rf.rf_flags == 0)
 		errx(1, "No lagghash options supplied");
 
 	strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname));
 	if (ioctl(s, SIOCSLAGGHASH, &rf))
 		err(1, "SIOCSLAGGHASH");
 }
 
 static char *
 lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
 {
 	snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
 	    (int)mac[0], (int)mac[1], (int)mac[2], (int)mac[3],
 	    (int)mac[4], (int)mac[5]);
 
 	return (buf);
 }
 
 static char *
 lacp_format_peer(struct lacp_opreq *req, const char *sep)
 {
 	char macbuf1[20];
 	char macbuf2[20];
 
 	snprintf(lacpbuf, sizeof(lacpbuf),
 	    "[(%04X,%s,%04X,%04X,%04X),%s(%04X,%s,%04X,%04X,%04X)]",
 	    req->actor_prio,
 	    lacp_format_mac(req->actor_mac, macbuf1, sizeof(macbuf1)),
 	    req->actor_key, req->actor_portprio, req->actor_portno, sep,
 	    req->partner_prio,
 	    lacp_format_mac(req->partner_mac, macbuf2, sizeof(macbuf2)),
 	    req->partner_key, req->partner_portprio, req->partner_portno);
 
 	return(lacpbuf);
 }
 
 static void
 lagg_status(int s)
 {
 	struct lagg_protos lpr[] = LAGG_PROTOS;
 	struct lagg_reqport rpbuf[LAGG_MAX_PORTS];
 	struct lagg_reqall ra;
 	struct lagg_reqopts ro;
 	struct lagg_reqflags rf;
 	struct lacp_opreq *lp;
 	const char *proto = "<unknown>";
 	int i;
 
 	bzero(&ra, sizeof(ra));
 	bzero(&ro, sizeof(ro));
 
 	strlcpy(ra.ra_ifname, name, sizeof(ra.ra_ifname));
 	ra.ra_size = sizeof(rpbuf);
 	ra.ra_port = rpbuf;
 
 	strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname));
 	ioctl(s, SIOCGLAGGOPTS, &ro);
 
 	strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname));
 	if (ioctl(s, SIOCGLAGGFLAGS, &rf) != 0)
 		rf.rf_flags = 0;
 
 	if (ioctl(s, SIOCGLAGG, &ra) == 0) {
 		lp = (struct lacp_opreq *)&ra.ra_lacpreq;
 
 		for (i = 0; i < nitems(lpr); i++) {
 			if (ra.ra_proto == lpr[i].lpr_proto) {
 				proto = lpr[i].lpr_name;
 				break;
 			}
 		}
 
 		printf("\tlaggproto %s", proto);
 		if (rf.rf_flags & LAGG_F_HASHMASK) {
 			const char *sep = "";
 
 			printf(" lagghash ");
 			if (rf.rf_flags & LAGG_F_HASHL2) {
 				printf("%sl2", sep);
 				sep = ",";
 			}
 			if (rf.rf_flags & LAGG_F_HASHL3) {
 				printf("%sl3", sep);
 				sep = ",";
 			}
 			if (rf.rf_flags & LAGG_F_HASHL4) {
 				printf("%sl4", sep);
 				sep = ",";
 			}
 		}
 		putchar('\n');
 		if (verbose) {
 			printf("\tlagg options:\n");
 			printb("\t\tflags", ro.ro_opts, LAGG_OPT_BITS);
 			putchar('\n');
 			printf("\t\tflowid_shift: %d\n", ro.ro_flowid_shift);
 			if (ra.ra_proto == LAGG_PROTO_ROUNDROBIN)
 				printf("\t\trr_limit: %d\n", ro.ro_bkt);
 			printf("\tlagg statistics:\n");
 			printf("\t\tactive ports: %d\n", ro.ro_active);
 			printf("\t\tflapping: %u\n", ro.ro_flapping);
 			if (ra.ra_proto == LAGG_PROTO_LACP) {
 				printf("\tlag id: %s\n",
 				    lacp_format_peer(lp, "\n\t\t "));
 			}
 		}
 
 		for (i = 0; i < ra.ra_ports; i++) {
 			lp = (struct lacp_opreq *)&rpbuf[i].rp_lacpreq;
 			printf("\tlaggport: %s ", rpbuf[i].rp_portname);
 			printb("flags", rpbuf[i].rp_flags, LAGG_PORT_BITS);
 			if (verbose && ra.ra_proto == LAGG_PROTO_LACP)
 				printb(" state", lp->actor_state,
 				    LACP_STATE_BITS);
 			putchar('\n');
 			if (verbose && ra.ra_proto == LAGG_PROTO_LACP)
 				printf("\t\t%s\n",
 				    lacp_format_peer(lp, "\n\t\t "));
 		}
 
 		if (0 /* XXX */) {
 			printf("\tsupported aggregation protocols:\n");
 			for (i = 0; i < nitems(lpr); i++)
 				printf("\t\tlaggproto %s\n", lpr[i].lpr_name);
 		}
 	}
 }
 
+static
+DECL_CMD_FUNC(setlaggtype, arg, d)
+{
+	static const struct lagg_types lt[] = LAGG_TYPES;
+	int i;
+
+	for (i = 0; i < nitems(lt); i++) {
+		if (strcmp(arg, lt[i].lt_name) == 0) {
+			params.lagg_type = lt[i].lt_value;
+			return;
+		}
+	}
+	errx(1, "invalid lagg type: %s", arg);
+}
+
+static void
+lagg_create(int s, struct ifreq *ifr)
+{
+	ifr->ifr_data = (caddr_t) &params;
+	if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
+		err(1, "SIOCIFCREATE2");
+}
+
 static struct cmd lagg_cmds[] = {
+	DEF_CLONE_CMD_ARG("laggtype",   setlaggtype),
 	DEF_CMD_ARG("laggport",		setlaggport),
 	DEF_CMD_ARG("-laggport",	unsetlaggport),
 	DEF_CMD_ARG("laggproto",	setlaggproto),
 	DEF_CMD_ARG("lagghash",		setlagghash),
 	DEF_CMD("use_flowid",	LAGG_OPT_USE_FLOWID,	setlaggsetopt),
 	DEF_CMD("-use_flowid",	-LAGG_OPT_USE_FLOWID,	setlaggsetopt),
 	DEF_CMD("use_numa",	LAGG_OPT_USE_NUMA,	setlaggsetopt),
 	DEF_CMD("-use_numa",	-LAGG_OPT_USE_NUMA,	setlaggsetopt),
 	DEF_CMD("lacp_strict",	LAGG_OPT_LACP_STRICT,	setlaggsetopt),
 	DEF_CMD("-lacp_strict",	-LAGG_OPT_LACP_STRICT,	setlaggsetopt),
 	DEF_CMD("lacp_txtest",	LAGG_OPT_LACP_TXTEST,	setlaggsetopt),
 	DEF_CMD("-lacp_txtest",	-LAGG_OPT_LACP_TXTEST,	setlaggsetopt),
 	DEF_CMD("lacp_rxtest",	LAGG_OPT_LACP_RXTEST,	setlaggsetopt),
 	DEF_CMD("-lacp_rxtest",	-LAGG_OPT_LACP_RXTEST,	setlaggsetopt),
 	DEF_CMD("lacp_fast_timeout",	LAGG_OPT_LACP_FAST_TIMO,	setlaggsetopt),
 	DEF_CMD("-lacp_fast_timeout",	-LAGG_OPT_LACP_FAST_TIMO,	setlaggsetopt),
 	DEF_CMD_ARG("flowid_shift",	setlaggflowidshift),
 	DEF_CMD_ARG("rr_limit",		setlaggrr_limit),
 };
 static struct afswtch af_lagg = {
 	.af_name	= "af_lagg",
 	.af_af		= AF_UNSPEC,
 	.af_other_status = lagg_status,
 };
 
 static __constructor void
 lagg_ctor(void)
 {
 	int i;
 
 	for (i = 0; i < nitems(lagg_cmds);  i++)
 		cmd_register(&lagg_cmds[i]);
 	af_register(&af_lagg);
+	clone_setdefcallback("lagg", lagg_create);
 }
Index: head/share/man/man4/lagg.4
===================================================================
--- head/share/man/man4/lagg.4	(revision 366932)
+++ head/share/man/man4/lagg.4	(revision 366933)
@@ -1,220 +1,229 @@
 .\"	$OpenBSD: trunk.4,v 1.18 2006/06/09 13:53:34 jmc Exp $
 .\"
 .\" Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
 .\"
 .\" Permission to use, copy, modify, and distribute this software for any
 .\" purpose with or without fee is hereby granted, provided that the above
 .\" copyright notice and this permission notice appear in all copies.
 .\"
 .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 18, 2017
+.Dd October 21, 2020
 .Dt LAGG 4
 .Os
 .Sh NAME
 .Nm lagg
 .Nd link aggregation and link failover interface
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following line in your
 kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "device lagg"
 .Ed
 .Pp
 Alternatively, to load the driver as a
 module at boot time, place the following line in
 .Xr loader.conf 5 :
 .Bd -literal -offset indent
 if_lagg_load="YES"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 interface allows aggregation of multiple network interfaces as one virtual
 .Nm
 interface for the purpose of providing fault-tolerance and high-speed links.
 .Pp
 A
 .Nm
 interface can be created using the
 .Ic ifconfig lagg Ns Ar N Ic create
 command.
 It can use different link aggregation protocols specified
 using the
 .Ic laggproto Ar proto
 option.
 Child interfaces can be added using the
 .Ic laggport Ar child-iface
 option and removed using the
 .Ic -laggport Ar child-iface
 option.
 .Pp
 The driver currently supports the aggregation protocols
 .Ic failover
 (the default),
 .Ic lacp ,
 .Ic loadbalance ,
 .Ic roundrobin ,
 .Ic broadcast ,
 and
 .Ic none .
 The protocols determine which ports are used for outgoing traffic
 and whether a specific port accepts incoming traffic.
 The interface link state is used to validate if the port is active or
 not.
 .Bl -tag -width loadbalance
 .It Ic failover
 Sends traffic only through the active port.
 If the master port becomes unavailable,
 the next active port is used.
 The first interface added is the master port;
 any interfaces added after that are used as failover devices.
 .Pp
 By default, received traffic is only accepted when they are received
 through the active port.
 This constraint can be relaxed by setting the
 .Va net.link.lagg.failover_rx_all
 .Xr sysctl 8
 variable to a nonzero value,
 which is useful for certain bridged network setups.
 .It Ic lacp
 Supports the IEEE 802.1AX (formerly 802.3ad) Link Aggregation Control Protocol
 (LACP) and the Marker Protocol.
 LACP will negotiate a set of aggregable links with the peer in to one or more
 Link Aggregated Groups.
 Each LAG is composed of ports of the same speed, set to full-duplex operation.
 The traffic will be balanced across the ports in the LAG with the greatest
 total speed, in most cases there will only be one LAG which contains all ports.
 In the event of changes in physical connectivity, Link Aggregation will quickly
 converge to a new configuration.
 .It Ic loadbalance
 Balances outgoing traffic across the active ports based on hashed
 protocol header information and accepts incoming traffic from
 any active port.
 This is a static setup and does not negotiate aggregation with the peer or
 exchange frames to monitor the link.
 The hash includes the Ethernet source and destination address, and, if
 available, the VLAN tag, and the IP source and destination address.
 .It Ic roundrobin
 Distributes outgoing traffic using a round-robin scheduler
 through all active ports and accepts incoming traffic from
 any active port.
 Using
 .Ic roundrobin
 mode can cause unordered packet arrival at the client.
 Throughput might be limited as the client performs CPU-intensive packet
 reordering.
 .It Ic broadcast
 Sends frames to all ports of the LAG and receives frames on
 any port of the LAG.
 .It Ic none
 This protocol is intended to do nothing: it disables any traffic without
 disabling the
 .Nm
 interface itself.
 .El
 .Pp
 Each
 .Nm
 interface is created at runtime using interface cloning.
 This is
 most easily done with the
 .Xr ifconfig 8
 .Cm create
 command or using the
 .Va cloned_interfaces
 variable in
 .Xr rc.conf 5 .
 .Pp
 The MTU of the first interface to be added is used as the lagg MTU.
 All additional interfaces are required to have exactly the same value.
 .Pp
 The
 .Ic loadbalance
 and
 .Ic lacp
 modes will use the RSS hash from the network card if available to avoid
 computing one, this may give poor traffic distribution if the hash is invalid
 or uses less of the protocol header information.
 Local hash computation can be forced per interface by setting the
 .Cm -use_flowid
 .Xr ifconfig 8
 flag.
 The default for new interfaces is set via the
 .Va net.link.lagg.default_use_flowid
 .Xr sysctl 8 .
 .Sh EXAMPLES
 Create a link aggregation using LACP with two
 .Xr bge 4
 Gigabit Ethernet interfaces:
 .Bd -literal -offset indent
 # ifconfig bge0 up
 # ifconfig bge1 up
 # ifconfig lagg0 create
 # ifconfig lagg0 laggproto lacp laggport bge0 laggport bge1 \e
 	192.168.1.1 netmask 255.255.255.0
 .Ed
 .Pp
 Create a link aggregation using ROUNDROBIN with two
 .Xr bge 4
 Gigabit Ethernet interfaces and set a stride of 500 packets
 per interface:
 .Bd -literal -offset indent
 # ifconfig bge0 up
 # ifconfig bge1 up
 # ifconfig lagg0 create
 # ifconfig lagg0 laggproto roundrobin laggport bge0 laggport bge1 \e
 	192.168.1.1 netmask 255.255.255.0
 # ifconfig lagg0 rr_limit 500
 .Ed
 .Pp
 The following example uses an active failover interface to set up roaming
 between wired and wireless networks using two network devices.
 Whenever the wired master interface is unplugged, the wireless failover
 device will be used:
 .Bd -literal -offset indent
 # ifconfig em0 up
 # ifconfig ath0 ether 00:11:22:33:44:55
 # ifconfig create wlan0 wlandev ath0 ssid my_net up
 # ifconfig lagg0 create
 # ifconfig lagg0 laggproto failover laggport em0 laggport wlan0 \e
 	192.168.1.1 netmask 255.255.255.0
 .Ed
 .Pp
 (Note the mac address of the wireless device is forced to match the wired
 device as a workaround.)
+.Pp
+The following example shows how to create an infiniband failover interface.
+.Bd -literal -offset indent
+# ifconfig ib0 up
+# ifconfig ib1 up
+# ifconfig lagg0 create laggtype infiniband
+# ifconfig lagg0 laggproto failover laggport ib0 laggport ib1 \e
+	1.1.1.1 netmask 255.255.255.0
+.Ed
 .Sh SEE ALSO
 .Xr ng_one2many 4 ,
 .Xr ifconfig 8 ,
 .Xr sysctl 8
 .Sh HISTORY
 The
 .Nm
 device first appeared in
 .Fx 6.3 .
 .Sh AUTHORS
 .An -nosplit
 The
 .Nm
 driver was written under the name
 .Nm trunk
 by
 .An Reyk Floeter Aq Mt reyk@openbsd.org .
 The LACP implementation was written by
 .An YAMAMOTO Takashi
 for
 .Nx .
 .Sh BUGS
 There is no way to configure LACP administrative variables, including system
 and port priorities.
 The current implementation always performs active-mode LACP and uses 0x8000 as
 system and port priorities.
Index: head/sys/net/ieee8023ad_lacp.c
===================================================================
--- head/sys/net/ieee8023ad_lacp.c	(revision 366932)
+++ head/sys/net/ieee8023ad_lacp.c	(revision 366933)
@@ -1,2202 +1,2203 @@
 /*	$NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
  *
  * Copyright (c)2005 YAMAMOTO Takashi,
  * Copyright (c)2008 Andrew Thompson <thompsa@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kern_tls.h"
 #include "opt_ratelimit.h"
 
 #include <sys/param.h>
 #include <sys/callout.h>
 #include <sys/eventhandler.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h> /* hz */
 #include <sys/socket.h> /* for net/if.h */
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <machine/stdarg.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/ethernet.h>
+#include <net/infiniband.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 
 #include <net/if_lagg.h>
 #include <net/ieee8023ad_lacp.h>
 
 /*
  * actor system priority and port priority.
  * XXX should be configurable.
  */
 
 #define	LACP_SYSTEM_PRIO	0x8000
 #define	LACP_PORT_PRIO		0x8000
 
 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
     { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
 
 static const struct tlv_template lacp_info_tlv_template[] = {
 	{ LACP_TYPE_ACTORINFO,
 	    sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
 	{ LACP_TYPE_PARTNERINFO,
 	    sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
 	{ LACP_TYPE_COLLECTORINFO,
 	    sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
 	{ 0, 0 },
 };
 
 static const struct tlv_template marker_info_tlv_template[] = {
 	{ MARKER_TYPE_INFO,
 	    sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
 	{ 0, 0 },
 };
 
 static const struct tlv_template marker_response_tlv_template[] = {
 	{ MARKER_TYPE_RESPONSE,
 	    sizeof(struct tlvhdr) + sizeof(struct lacp_markerinfo) },
 	{ 0, 0 },
 };
 
 typedef void (*lacp_timer_func_t)(struct lacp_port *);
 
 static void	lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
 static void	lacp_fill_markerinfo(struct lacp_port *,
 		    struct lacp_markerinfo *);
 
 static uint64_t	lacp_aggregator_bandwidth(struct lacp_aggregator *);
 static void	lacp_suppress_distributing(struct lacp_softc *,
 		    struct lacp_aggregator *);
 static void	lacp_transit_expire(void *);
 static void	lacp_update_portmap(struct lacp_softc *);
 static void	lacp_select_active_aggregator(struct lacp_softc *);
 static uint16_t	lacp_compose_key(struct lacp_port *);
 static int	tlv_check(const void *, size_t, const struct tlvhdr *,
 		    const struct tlv_template *, boolean_t);
 static void	lacp_tick(void *);
 
 static void	lacp_fill_aggregator_id(struct lacp_aggregator *,
 		    const struct lacp_port *);
 static void	lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
 		    const struct lacp_peerinfo *);
 static int	lacp_aggregator_is_compatible(const struct lacp_aggregator *,
 		    const struct lacp_port *);
 static int	lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
 		    const struct lacp_peerinfo *);
 
 static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
 		    struct lacp_port *);
 static void	lacp_aggregator_addref(struct lacp_softc *,
 		    struct lacp_aggregator *);
 static void	lacp_aggregator_delref(struct lacp_softc *,
 		    struct lacp_aggregator *);
 
 /* receive machine */
 
 static int	lacp_pdu_input(struct lacp_port *, struct mbuf *);
 static int	lacp_marker_input(struct lacp_port *, struct mbuf *);
 static void	lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
 static void	lacp_sm_rx_timer(struct lacp_port *);
 static void	lacp_sm_rx_set_expired(struct lacp_port *);
 static void	lacp_sm_rx_update_ntt(struct lacp_port *,
 		    const struct lacpdu *);
 static void	lacp_sm_rx_record_pdu(struct lacp_port *,
 		    const struct lacpdu *);
 static void	lacp_sm_rx_update_selected(struct lacp_port *,
 		    const struct lacpdu *);
 static void	lacp_sm_rx_record_default(struct lacp_port *);
 static void	lacp_sm_rx_update_default_selected(struct lacp_port *);
 static void	lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
 		    const struct lacp_peerinfo *);
 
 /* mux machine */
 
 static void	lacp_sm_mux(struct lacp_port *);
 static void	lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
 static void	lacp_sm_mux_timer(struct lacp_port *);
 
 /* periodic transmit machine */
 
 static void	lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
 static void	lacp_sm_ptx_tx_schedule(struct lacp_port *);
 static void	lacp_sm_ptx_timer(struct lacp_port *);
 
 /* transmit machine */
 
 static void	lacp_sm_tx(struct lacp_port *);
 static void	lacp_sm_assert_ntt(struct lacp_port *);
 
 static void	lacp_run_timers(struct lacp_port *);
 static int	lacp_compare_peerinfo(const struct lacp_peerinfo *,
 		    const struct lacp_peerinfo *);
 static int	lacp_compare_systemid(const struct lacp_systemid *,
 		    const struct lacp_systemid *);
 static void	lacp_port_enable(struct lacp_port *);
 static void	lacp_port_disable(struct lacp_port *);
 static void	lacp_select(struct lacp_port *);
 static void	lacp_unselect(struct lacp_port *);
 static void	lacp_disable_collecting(struct lacp_port *);
 static void	lacp_enable_collecting(struct lacp_port *);
 static void	lacp_disable_distributing(struct lacp_port *);
 static void	lacp_enable_distributing(struct lacp_port *);
 static int	lacp_xmit_lacpdu(struct lacp_port *);
 static int	lacp_xmit_marker(struct lacp_port *);
 
 /* Debugging */
 
 static void	lacp_dump_lacpdu(const struct lacpdu *);
 static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
 		    size_t);
 static const char *lacp_format_lagid(const struct lacp_peerinfo *,
 		    const struct lacp_peerinfo *, char *, size_t);
 static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
 		    char *, size_t);
 static const char *lacp_format_state(uint8_t, char *, size_t);
 static const char *lacp_format_mac(const uint8_t *, char *, size_t);
 static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
 		    size_t);
 static const char *lacp_format_portid(const struct lacp_portid *, char *,
 		    size_t);
 static void	lacp_dprintf(const struct lacp_port *, const char *, ...)
 		    __attribute__((__format__(__printf__, 2, 3)));
 
 VNET_DEFINE_STATIC(int, lacp_debug);
 #define	V_lacp_debug	VNET(lacp_debug)
 SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "ieee802.3ad");
 SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
     &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
 
 VNET_DEFINE_STATIC(int, lacp_default_strict_mode) = 1;
 SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode,
     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_default_strict_mode), 0,
     "LACP strict protocol compliance default");
 #define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; }
 #define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); }
 #define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; }
 
 /*
  * partner administration variables.
  * XXX should be configurable.
  */
 
 static const struct lacp_peerinfo lacp_partner_admin_optimistic = {
 	.lip_systemid = { .lsi_prio = 0xffff },
 	.lip_portid = { .lpi_prio = 0xffff },
 	.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
 	    LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
 };
 
 static const struct lacp_peerinfo lacp_partner_admin_strict = {
 	.lip_systemid = { .lsi_prio = 0xffff },
 	.lip_portid = { .lpi_prio = 0xffff },
 	.lip_state = 0,
 };
 
 static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
 	[LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
 	[LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
 	[LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
 };
 
 struct mbuf *
 lacp_input(struct lagg_port *lgp, struct mbuf *m)
 {
 	struct lacp_port *lp = LACP_PORT(lgp);
 	uint8_t subtype;
 
 	if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) {
 		m_freem(m);
 		return (NULL);
 	}
 
 	m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype);
 	switch (subtype) {
 		case SLOWPROTOCOLS_SUBTYPE_LACP:
 			lacp_pdu_input(lp, m);
 			return (NULL);
 
 		case SLOWPROTOCOLS_SUBTYPE_MARKER:
 			lacp_marker_input(lp, m);
 			return (NULL);
 	}
 
 	/* Not a subtype we are interested in */
 	return (m);
 }
 
 /*
  * lacp_pdu_input: process lacpdu
  */
 static int
 lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
 {
 	struct lacp_softc *lsc = lp->lp_lsc;
 	struct lacpdu *du;
 	int error = 0;
 
 	if (m->m_pkthdr.len != sizeof(*du)) {
 		goto bad;
 	}
 
 	if ((m->m_flags & M_MCAST) == 0) {
 		goto bad;
 	}
 
 	if (m->m_len < sizeof(*du)) {
 		m = m_pullup(m, sizeof(*du));
 		if (m == NULL) {
 			return (ENOMEM);
 		}
 	}
 
 	du = mtod(m, struct lacpdu *);
 
 	if (memcmp(&du->ldu_eh.ether_dhost,
 	    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
 		goto bad;
 	}
 
 	/*
 	 * ignore the version for compatibility with
 	 * the future protocol revisions.
 	 */
 #if 0
 	if (du->ldu_sph.sph_version != 1) {
 		goto bad;
 	}
 #endif
 
 	/*
 	 * ignore tlv types for compatibility with
 	 * the future protocol revisions.
 	 */
 	if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
 	    lacp_info_tlv_template, FALSE)) {
 		goto bad;
 	}
 
         if (V_lacp_debug > 0) {
 		lacp_dprintf(lp, "lacpdu receive\n");
 		lacp_dump_lacpdu(du);
 	}
 
 	if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) {
 		LACP_TPRINTF((lp, "Dropping RX PDU\n"));
 		goto bad;
 	}
 
 	LACP_LOCK(lsc);
 	lacp_sm_rx(lp, du);
 	LACP_UNLOCK(lsc);
 
 	m_freem(m);
 	return (error);
 
 bad:
 	m_freem(m);
 	return (EINVAL);
 }
 
 static void
 lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
 {
 	struct lagg_port *lgp = lp->lp_lagg;
 	struct lagg_softc *sc = lgp->lp_softc;
 
 	info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
 	memcpy(&info->lip_systemid.lsi_mac,
 	    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
 	info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
 	info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
 	info->lip_state = lp->lp_state;
 }
 
 static void
 lacp_fill_markerinfo(struct lacp_port *lp, struct lacp_markerinfo *info)
 {
 	struct ifnet *ifp = lp->lp_ifp;
 
 	/* Fill in the port index and system id (encoded as the MAC) */
 	info->mi_rq_port = htons(ifp->if_index);
 	memcpy(&info->mi_rq_system, lp->lp_systemid.lsi_mac, ETHER_ADDR_LEN);
 	info->mi_rq_xid = htonl(0);
 }
 
 static int
 lacp_xmit_lacpdu(struct lacp_port *lp)
 {
 	struct lagg_port *lgp = lp->lp_lagg;
 	struct mbuf *m;
 	struct lacpdu *du;
 	int error;
 
 	LACP_LOCK_ASSERT(lp->lp_lsc);
 
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		return (ENOMEM);
 	}
 	m->m_len = m->m_pkthdr.len = sizeof(*du);
 
 	du = mtod(m, struct lacpdu *);
 	memset(du, 0, sizeof(*du));
 
 	memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
 	    ETHER_ADDR_LEN);
 	memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
 	du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
 
 	du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
 	du->ldu_sph.sph_version = 1;
 
 	TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
 	du->ldu_actor = lp->lp_actor;
 
 	TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
 	    sizeof(du->ldu_partner));
 	du->ldu_partner = lp->lp_partner;
 
 	TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
 	    sizeof(du->ldu_collector));
 	du->ldu_collector.lci_maxdelay = 0;
 
 	if (V_lacp_debug > 0) {
 		lacp_dprintf(lp, "lacpdu transmit\n");
 		lacp_dump_lacpdu(du);
 	}
 
 	m->m_flags |= M_MCAST;
 
 	/*
 	 * XXX should use higher priority queue.
 	 * otherwise network congestion can break aggregation.
 	 */
 
 	error = lagg_enqueue(lp->lp_ifp, m);
 	return (error);
 }
 
 static int
 lacp_xmit_marker(struct lacp_port *lp)
 {
 	struct lagg_port *lgp = lp->lp_lagg;
 	struct mbuf *m;
 	struct markerdu *mdu;
 	int error;
 
 	LACP_LOCK_ASSERT(lp->lp_lsc);
 
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		return (ENOMEM);
 	}
 	m->m_len = m->m_pkthdr.len = sizeof(*mdu);
 
 	mdu = mtod(m, struct markerdu *);
 	memset(mdu, 0, sizeof(*mdu));
 
 	memcpy(&mdu->mdu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
 	    ETHER_ADDR_LEN);
 	memcpy(&mdu->mdu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
 	mdu->mdu_eh.ether_type = htons(ETHERTYPE_SLOW);
 
 	mdu->mdu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_MARKER;
 	mdu->mdu_sph.sph_version = 1;
 
 	/* Bump the transaction id and copy over the marker info */
 	lp->lp_marker.mi_rq_xid = htonl(ntohl(lp->lp_marker.mi_rq_xid) + 1);
 	TLV_SET(&mdu->mdu_tlv, MARKER_TYPE_INFO, sizeof(mdu->mdu_info));
 	mdu->mdu_info = lp->lp_marker;
 
 	LACP_DPRINTF((lp, "marker transmit, port=%u, sys=%6D, id=%u\n",
 	    ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system, ":",
 	    ntohl(mdu->mdu_info.mi_rq_xid)));
 
 	m->m_flags |= M_MCAST;
 	error = lagg_enqueue(lp->lp_ifp, m);
 	return (error);
 }
 
 void
 lacp_linkstate(struct lagg_port *lgp)
 {
 	struct lacp_port *lp = LACP_PORT(lgp);
 	struct lacp_softc *lsc = lp->lp_lsc;
 	struct ifnet *ifp = lgp->lp_ifp;
 	struct ifmediareq ifmr;
 	int error = 0;
 	u_int media;
 	uint8_t old_state;
 	uint16_t old_key;
 
 	bzero((char *)&ifmr, sizeof(ifmr));
 	error = (*ifp->if_ioctl)(ifp, SIOCGIFXMEDIA, (caddr_t)&ifmr);
 	if (error != 0) {
 		bzero((char *)&ifmr, sizeof(ifmr));
 		error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
 	}
 	if (error != 0)
 		return;
 
 	LACP_LOCK(lsc);
 	media = ifmr.ifm_active;
 	LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, "
 	    "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER,
 	    (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP));
 	old_state = lp->lp_state;
 	old_key = lp->lp_key;
 
 	lp->lp_media = media;
 	/*
 	 * If the port is not an active full duplex Ethernet link then it can
 	 * not be aggregated.
 	 */
 	if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 ||
 	    ifp->if_link_state != LINK_STATE_UP) {
 		lacp_port_disable(lp);
 	} else {
 		lacp_port_enable(lp);
 	}
 	lp->lp_key = lacp_compose_key(lp);
 
 	if (old_state != lp->lp_state || old_key != lp->lp_key) {
 		LACP_DPRINTF((lp, "-> UNSELECTED\n"));
 		lp->lp_selected = LACP_UNSELECTED;
 	}
 	LACP_UNLOCK(lsc);
 }
 
 static void
 lacp_tick(void *arg)
 {
 	struct lacp_softc *lsc = arg;
 	struct lacp_port *lp;
 
 	LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
 		if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
 			continue;
 
 		CURVNET_SET(lp->lp_ifp->if_vnet);
 		lacp_run_timers(lp);
 
 		lacp_select(lp);
 		lacp_sm_mux(lp);
 		lacp_sm_tx(lp);
 		lacp_sm_ptx_tx_schedule(lp);
 		CURVNET_RESTORE();
 	}
 	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
 }
 
 int
 lacp_port_create(struct lagg_port *lgp)
 {
 	struct lagg_softc *sc = lgp->lp_softc;
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
 	struct lacp_port *lp;
 	struct ifnet *ifp = lgp->lp_ifp;
 	struct sockaddr_dl sdl;
 	struct ifmultiaddr *rifma = NULL;
 	int error;
 
 	link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER);
 	sdl.sdl_alen = ETHER_ADDR_LEN;
 
 	bcopy(&ethermulticastaddr_slowprotocols,
 	    LLADDR(&sdl), ETHER_ADDR_LEN);
 	error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
 	if (error) {
 		printf("%s: ADDMULTI failed on %s\n", __func__,
 		    lgp->lp_ifp->if_xname);
 		return (error);
 	}
 
 	lp = malloc(sizeof(struct lacp_port),
 	    M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (lp == NULL)
 		return (ENOMEM);
 
 	LACP_LOCK(lsc);
 	lgp->lp_psc = lp;
 	lp->lp_ifp = ifp;
 	lp->lp_lagg = lgp;
 	lp->lp_lsc = lsc;
 	lp->lp_ifma = rifma;
 
 	LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
 
 	lacp_fill_actorinfo(lp, &lp->lp_actor);
 	lacp_fill_markerinfo(lp, &lp->lp_marker);
 	lp->lp_state = LACP_STATE_ACTIVITY;
 	lp->lp_aggregator = NULL;
 	lacp_sm_rx_set_expired(lp);
 	LACP_UNLOCK(lsc);
 	lacp_linkstate(lgp);
 
 	return (0);
 }
 
 void
 lacp_port_destroy(struct lagg_port *lgp)
 {
 	struct lacp_port *lp = LACP_PORT(lgp);
 	struct lacp_softc *lsc = lp->lp_lsc;
 	int i;
 
 	LACP_LOCK(lsc);
 	for (i = 0; i < LACP_NTIMER; i++) {
 		LACP_TIMER_DISARM(lp, i);
 	}
 
 	lacp_disable_collecting(lp);
 	lacp_disable_distributing(lp);
 	lacp_unselect(lp);
 
 	LIST_REMOVE(lp, lp_next);
 	LACP_UNLOCK(lsc);
 
 	/* The address may have already been removed by if_purgemaddrs() */
 	if (!lgp->lp_detaching)
 		if_delmulti_ifma(lp->lp_ifma);
 
 	free(lp, M_DEVBUF);
 }
 
 void
 lacp_req(struct lagg_softc *sc, void *data)
 {
 	struct lacp_opreq *req = (struct lacp_opreq *)data;
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
 	struct lacp_aggregator *la;
 
 	bzero(req, sizeof(struct lacp_opreq));
 
 	/*
 	 * If the LACP softc is NULL, return with the opreq structure full of
 	 * zeros.  It is normal for the softc to be NULL while the lagg is
 	 * being destroyed.
 	 */
 	if (NULL == lsc)
 		return;
 
 	la = lsc->lsc_active_aggregator;
 	LACP_LOCK(lsc);
 	if (la != NULL) {
 		req->actor_prio = ntohs(la->la_actor.lip_systemid.lsi_prio);
 		memcpy(&req->actor_mac, &la->la_actor.lip_systemid.lsi_mac,
 		    ETHER_ADDR_LEN);
 		req->actor_key = ntohs(la->la_actor.lip_key);
 		req->actor_portprio = ntohs(la->la_actor.lip_portid.lpi_prio);
 		req->actor_portno = ntohs(la->la_actor.lip_portid.lpi_portno);
 		req->actor_state = la->la_actor.lip_state;
 
 		req->partner_prio = ntohs(la->la_partner.lip_systemid.lsi_prio);
 		memcpy(&req->partner_mac, &la->la_partner.lip_systemid.lsi_mac,
 		    ETHER_ADDR_LEN);
 		req->partner_key = ntohs(la->la_partner.lip_key);
 		req->partner_portprio = ntohs(la->la_partner.lip_portid.lpi_prio);
 		req->partner_portno = ntohs(la->la_partner.lip_portid.lpi_portno);
 		req->partner_state = la->la_partner.lip_state;
 	}
 	LACP_UNLOCK(lsc);
 }
 
 void
 lacp_portreq(struct lagg_port *lgp, void *data)
 {
 	struct lacp_opreq *req = (struct lacp_opreq *)data;
 	struct lacp_port *lp = LACP_PORT(lgp);
 	struct lacp_softc *lsc = lp->lp_lsc;
 
 	LACP_LOCK(lsc);
 	req->actor_prio = ntohs(lp->lp_actor.lip_systemid.lsi_prio);
 	memcpy(&req->actor_mac, &lp->lp_actor.lip_systemid.lsi_mac,
 	    ETHER_ADDR_LEN);
 	req->actor_key = ntohs(lp->lp_actor.lip_key);
 	req->actor_portprio = ntohs(lp->lp_actor.lip_portid.lpi_prio);
 	req->actor_portno = ntohs(lp->lp_actor.lip_portid.lpi_portno);
 	req->actor_state = lp->lp_actor.lip_state;
 
 	req->partner_prio = ntohs(lp->lp_partner.lip_systemid.lsi_prio);
 	memcpy(&req->partner_mac, &lp->lp_partner.lip_systemid.lsi_mac,
 	    ETHER_ADDR_LEN);
 	req->partner_key = ntohs(lp->lp_partner.lip_key);
 	req->partner_portprio = ntohs(lp->lp_partner.lip_portid.lpi_prio);
 	req->partner_portno = ntohs(lp->lp_partner.lip_portid.lpi_portno);
 	req->partner_state = lp->lp_partner.lip_state;
 	LACP_UNLOCK(lsc);
 }
 
 static void
 lacp_disable_collecting(struct lacp_port *lp)
 {
 	LACP_DPRINTF((lp, "collecting disabled\n"));
 	lp->lp_state &= ~LACP_STATE_COLLECTING;
 }
 
 static void
 lacp_enable_collecting(struct lacp_port *lp)
 {
 	LACP_DPRINTF((lp, "collecting enabled\n"));
 	lp->lp_state |= LACP_STATE_COLLECTING;
 }
 
 static void
 lacp_disable_distributing(struct lacp_port *lp)
 {
 	struct lacp_aggregator *la = lp->lp_aggregator;
 	struct lacp_softc *lsc = lp->lp_lsc;
 	struct lagg_softc *sc = lsc->lsc_softc;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_LOCK_ASSERT(lsc);
 
 	if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
 		return;
 	}
 
 	KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
 	KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
 	KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
 
 	LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
 	    "nports %d -> %d\n",
 	    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
 	    la->la_nports, la->la_nports - 1));
 
 	TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
 	la->la_nports--;
 	sc->sc_active = la->la_nports;
 
 	if (lsc->lsc_active_aggregator == la) {
 		lacp_suppress_distributing(lsc, la);
 		lacp_select_active_aggregator(lsc);
 		/* regenerate the port map, the active aggregator has changed */
 		lacp_update_portmap(lsc);
 	}
 
 	lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
 	if_link_state_change(sc->sc_ifp,
 	    sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN);
 }
 
 static void
 lacp_enable_distributing(struct lacp_port *lp)
 {
 	struct lacp_aggregator *la = lp->lp_aggregator;
 	struct lacp_softc *lsc = lp->lp_lsc;
 	struct lagg_softc *sc = lsc->lsc_softc;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_LOCK_ASSERT(lsc);
 
 	if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
 		return;
 	}
 
 	LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
 	    "nports %d -> %d\n",
 	    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
 	    la->la_nports, la->la_nports + 1));
 
 	KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
 	TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
 	la->la_nports++;
 	sc->sc_active = la->la_nports;
 
 	lp->lp_state |= LACP_STATE_DISTRIBUTING;
 
 	if (lsc->lsc_active_aggregator == la) {
 		lacp_suppress_distributing(lsc, la);
 		lacp_update_portmap(lsc);
 	} else
 		/* try to become the active aggregator */
 		lacp_select_active_aggregator(lsc);
 
 	if_link_state_change(sc->sc_ifp,
 	    sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN);
 }
 
 static void
 lacp_transit_expire(void *vp)
 {
 	struct lacp_softc *lsc = vp;
 
 	LACP_LOCK_ASSERT(lsc);
 
 	CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet);
 	LACP_TRACE(NULL);
 	CURVNET_RESTORE();
 
 	lsc->lsc_suppress_distributing = FALSE;
 }
 
 void
 lacp_attach(struct lagg_softc *sc)
 {
 	struct lacp_softc *lsc;
 
 	lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
 
 	sc->sc_psc = lsc;
 	lsc->lsc_softc = sc;
 
 	lsc->lsc_hashkey = m_ether_tcpip_hash_init();
 	lsc->lsc_active_aggregator = NULL;
 	lsc->lsc_strict_mode = VNET(lacp_default_strict_mode);
 	LACP_LOCK_INIT(lsc);
 	TAILQ_INIT(&lsc->lsc_aggregators);
 	LIST_INIT(&lsc->lsc_ports);
 
 	callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0);
 	callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0);
 
 	/* if the lagg is already up then do the same */
 	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
 		lacp_init(sc);
 }
 
 void
 lacp_detach(void *psc)
 {
 	struct lacp_softc *lsc = (struct lacp_softc *)psc;
 
 	KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
 	    ("aggregators still active"));
 	KASSERT(lsc->lsc_active_aggregator == NULL,
 	    ("aggregator still attached"));
 
 	callout_drain(&lsc->lsc_transit_callout);
 	callout_drain(&lsc->lsc_callout);
 
 	LACP_LOCK_DESTROY(lsc);
 	free(lsc, M_DEVBUF);
 }
 
 void
 lacp_init(struct lagg_softc *sc)
 {
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
 
 	LACP_LOCK(lsc);
 	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
 	LACP_UNLOCK(lsc);
 }
 
 void
 lacp_stop(struct lagg_softc *sc)
 {
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
 
 	LACP_LOCK(lsc);
 	callout_stop(&lsc->lsc_transit_callout);
 	callout_stop(&lsc->lsc_callout);
 	LACP_UNLOCK(lsc);
 }
 
 struct lagg_port *
 lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash, uint8_t numa_domain)
 {
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
 	struct lacp_portmap *pm;
 	struct lacp_port *lp;
 	struct lacp_port **map;
 	int count;
 
 	if (__predict_false(lsc->lsc_suppress_distributing)) {
 		LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
 		return (NULL);
 	}
 
 	pm = &lsc->lsc_pmap[lsc->lsc_activemap];
 	if (pm->pm_count == 0) {
 		LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
 		return (NULL);
 	}
 
 #ifdef NUMA
 	if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
 	    pm->pm_num_dom > 1 && numa_domain < MAXMEMDOM) {
 		count = pm->pm_numa[numa_domain].count;
 		if (count > 0) {
 			map = pm->pm_numa[numa_domain].map;
 		} else {
 			/* No ports on this domain; use global hash. */
 			map = pm->pm_map;
 			count = pm->pm_count;
 		}
 	} else
 #endif
 	{
 		map = pm->pm_map;
 		count = pm->pm_count;
 	}
 
 	hash %= count;
 	lp = map[hash];
 
 	KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
 	    ("aggregated port is not distributing"));
 
 	return (lp->lp_lagg);
 }
 
 struct lagg_port *
 lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
 {
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
 	uint32_t hash;
 	uint8_t numa_domain;
 
 	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
 	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		hash = m->m_pkthdr.flowid >> sc->flowid_shift;
 	else
 		hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
 
 	numa_domain = m->m_pkthdr.numa_domain;
 	return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
 }
 
 /*
  * lacp_suppress_distributing: drop transmit packets for a while
  * to preserve packet ordering.
  */
 
 static void
 lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
 {
 	struct lacp_port *lp;
 
 	if (lsc->lsc_active_aggregator != la) {
 		return;
 	}
 
 	LACP_TRACE(NULL);
 
 	lsc->lsc_suppress_distributing = TRUE;
 
 	/* send a marker frame down each port to verify the queues are empty */
 	LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
 		lp->lp_flags |= LACP_PORT_MARK;
 		lacp_xmit_marker(lp);
 	}
 
 	/* set a timeout for the marker frames */
 	callout_reset(&lsc->lsc_transit_callout,
 	    LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
 }
 
 static int
 lacp_compare_peerinfo(const struct lacp_peerinfo *a,
     const struct lacp_peerinfo *b)
 {
 	return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
 }
 
 static int
 lacp_compare_systemid(const struct lacp_systemid *a,
     const struct lacp_systemid *b)
 {
 	return (memcmp(a, b, sizeof(*a)));
 }
 
 #if 0	/* unused */
 static int
 lacp_compare_portid(const struct lacp_portid *a,
     const struct lacp_portid *b)
 {
 	return (memcmp(a, b, sizeof(*a)));
 }
 #endif
 
 static uint64_t
 lacp_aggregator_bandwidth(struct lacp_aggregator *la)
 {
 	struct lacp_port *lp;
 	uint64_t speed;
 
 	lp = TAILQ_FIRST(&la->la_ports);
 	if (lp == NULL) {
 		return (0);
 	}
 
 	speed = ifmedia_baudrate(lp->lp_media);
 	speed *= la->la_nports;
 	if (speed == 0) {
 		LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
 		    lp->lp_media, la->la_nports));
 	}
 
 	return (speed);
 }
 
 /*
  * lacp_select_active_aggregator: select an aggregator to be used to transmit
  * packets from lagg(4) interface.
  */
 
 static void
 lacp_select_active_aggregator(struct lacp_softc *lsc)
 {
 	struct lacp_aggregator *la;
 	struct lacp_aggregator *best_la = NULL;
 	uint64_t best_speed = 0;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_TRACE(NULL);
 
 	TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
 		uint64_t speed;
 
 		if (la->la_nports == 0) {
 			continue;
 		}
 
 		speed = lacp_aggregator_bandwidth(la);
 		LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
 		    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
 		    speed, la->la_nports));
 
 		/*
 		 * This aggregator is chosen if the partner has a better
 		 * system priority or, the total aggregated speed is higher
 		 * or, it is already the chosen aggregator
 		 */
 		if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
 		    LACP_SYS_PRI(best_la->la_partner)) ||
 		    speed > best_speed ||
 		    (speed == best_speed &&
 		    la == lsc->lsc_active_aggregator)) {
 			best_la = la;
 			best_speed = speed;
 		}
 	}
 
 	KASSERT(best_la == NULL || best_la->la_nports > 0,
 	    ("invalid aggregator refcnt"));
 	KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
 	    ("invalid aggregator list"));
 
 	if (lsc->lsc_active_aggregator != best_la) {
 		LACP_DPRINTF((NULL, "active aggregator changed\n"));
 		LACP_DPRINTF((NULL, "old %s\n",
 		    lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
 		    buf, sizeof(buf))));
 	} else {
 		LACP_DPRINTF((NULL, "active aggregator not changed\n"));
 	}
 	LACP_DPRINTF((NULL, "new %s\n",
 	    lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
 
 	if (lsc->lsc_active_aggregator != best_la) {
 		lsc->lsc_active_aggregator = best_la;
 		lacp_update_portmap(lsc);
 		if (best_la) {
 			lacp_suppress_distributing(lsc, best_la);
 		}
 	}
 }
 
 /*
  * Updated the inactive portmap array with the new list of ports and
  * make it live.
  */
 static void
 lacp_update_portmap(struct lacp_softc *lsc)
 {
 	struct lagg_softc *sc = lsc->lsc_softc;
 	struct lacp_aggregator *la;
 	struct lacp_portmap *p;
 	struct lacp_port *lp;
 	uint64_t speed;
 	u_int newmap;
 	int i;
 #ifdef NUMA
 	int count;
 	uint8_t domain;
 #endif
 
 	newmap = lsc->lsc_activemap == 0 ? 1 : 0;
 	p = &lsc->lsc_pmap[newmap];
 	la = lsc->lsc_active_aggregator;
 	speed = 0;
 	bzero(p, sizeof(struct lacp_portmap));
 
 	if (la != NULL && la->la_nports > 0) {
 		p->pm_count = la->la_nports;
 		i = 0;
 		TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) {
 			p->pm_map[i++] = lp;
 #ifdef NUMA
 			domain = lp->lp_ifp->if_numa_domain;
 			if (domain >= MAXMEMDOM)
 				continue;
 			count = p->pm_numa[domain].count;
 			p->pm_numa[domain].map[count] = lp;
 			p->pm_numa[domain].count++;
 #endif
 		}
 		KASSERT(i == p->pm_count, ("Invalid port count"));
 
 #ifdef NUMA
 		for (i = 0; i < MAXMEMDOM; i++) {
 			if (p->pm_numa[i].count != 0)
 				p->pm_num_dom++;
 		}
 #endif
 		speed = lacp_aggregator_bandwidth(la);
 	}
 	sc->sc_ifp->if_baudrate = speed;
 
 	/* switch the active portmap over */
 	atomic_store_rel_int(&lsc->lsc_activemap, newmap);
 	LACP_DPRINTF((NULL, "Set table %d with %d ports\n",
 		    lsc->lsc_activemap,
 		    lsc->lsc_pmap[lsc->lsc_activemap].pm_count));
 }
 
 static uint16_t
 lacp_compose_key(struct lacp_port *lp)
 {
 	struct lagg_port *lgp = lp->lp_lagg;
 	struct lagg_softc *sc = lgp->lp_softc;
 	u_int media = lp->lp_media;
 	uint16_t key;
 
 	if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
 		/*
 		 * non-aggregatable links should have unique keys.
 		 *
 		 * XXX this isn't really unique as if_index is 16 bit.
 		 */
 
 		/* bit 0..14:	(some bits of) if_index of this port */
 		key = lp->lp_ifp->if_index;
 		/* bit 15:	1 */
 		key |= 0x8000;
 	} else {
 		u_int subtype = IFM_SUBTYPE(media);
 
 		KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
 		KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
 
 		/* bit 0..4:	IFM_SUBTYPE modulo speed */
 		switch (subtype) {
 		case IFM_10_T:
 		case IFM_10_2:
 		case IFM_10_5:
 		case IFM_10_STP:
 		case IFM_10_FL:
 			key = IFM_10_T;
 			break;
 		case IFM_100_TX:
 		case IFM_100_FX:
 		case IFM_100_T4:
 		case IFM_100_VG:
 		case IFM_100_T2:
 		case IFM_100_T:
 		case IFM_100_SGMII:
 			key = IFM_100_TX;
 			break;
 		case IFM_1000_SX:
 		case IFM_1000_LX:
 		case IFM_1000_CX:
 		case IFM_1000_T:
 		case IFM_1000_KX:
 		case IFM_1000_SGMII:
 		case IFM_1000_CX_SGMII:
 			key = IFM_1000_SX;
 			break;
 		case IFM_10G_LR:
 		case IFM_10G_SR:
 		case IFM_10G_CX4:
 		case IFM_10G_TWINAX:
 		case IFM_10G_TWINAX_LONG:
 		case IFM_10G_LRM:
 		case IFM_10G_T:
 		case IFM_10G_KX4:
 		case IFM_10G_KR:
 		case IFM_10G_CR1:
 		case IFM_10G_ER:
 		case IFM_10G_SFI:
 		case IFM_10G_AOC:
 			key = IFM_10G_LR;
 			break;
 		case IFM_20G_KR2:
 			key = IFM_20G_KR2;
 			break;
 		case IFM_2500_KX:
 		case IFM_2500_T:
 		case IFM_2500_X:
 			key = IFM_2500_KX;
 			break;
 		case IFM_5000_T:
 		case IFM_5000_KR:
 		case IFM_5000_KR_S:
 		case IFM_5000_KR1:
 			key = IFM_5000_T;
 			break;
 		case IFM_50G_PCIE:
 		case IFM_50G_CR2:
 		case IFM_50G_KR2:
 		case IFM_50G_KR4:
 		case IFM_50G_SR2:
 		case IFM_50G_LR2:
 		case IFM_50G_LAUI2_AC:
 		case IFM_50G_LAUI2:
 		case IFM_50G_AUI2_AC:
 		case IFM_50G_AUI2:
 		case IFM_50G_CP:
 		case IFM_50G_SR:
 		case IFM_50G_LR:
 		case IFM_50G_FR:
 		case IFM_50G_KR_PAM4:
 		case IFM_50G_AUI1_AC:
 		case IFM_50G_AUI1:
 			key = IFM_50G_PCIE;
 			break;
 		case IFM_56G_R4:
 			key = IFM_56G_R4;
 			break;
 		case IFM_25G_PCIE:
 		case IFM_25G_CR:
 		case IFM_25G_KR:
 		case IFM_25G_SR:
 		case IFM_25G_LR:
 		case IFM_25G_ACC:
 		case IFM_25G_AOC:
 		case IFM_25G_T:
 		case IFM_25G_CR_S:
 		case IFM_25G_CR1:
 		case IFM_25G_KR_S:
 		case IFM_25G_AUI:
 		case IFM_25G_KR1:
 			key = IFM_25G_PCIE;
 			break;
 		case IFM_40G_CR4:
 		case IFM_40G_SR4:
 		case IFM_40G_LR4:
 		case IFM_40G_LM4:
 		case IFM_40G_XLPPI:
 		case IFM_40G_KR4:
 		case IFM_40G_XLAUI:
 		case IFM_40G_XLAUI_AC:
 		case IFM_40G_ER4:
 			key = IFM_40G_CR4;
 			break;
 		case IFM_100G_CR4:
 		case IFM_100G_SR4:
 		case IFM_100G_KR4:
 		case IFM_100G_LR4:
 		case IFM_100G_CAUI4_AC:
 		case IFM_100G_CAUI4:
 		case IFM_100G_AUI4_AC:
 		case IFM_100G_AUI4:
 		case IFM_100G_CR_PAM4:
 		case IFM_100G_KR_PAM4:
 		case IFM_100G_CP2:
 		case IFM_100G_SR2:
 		case IFM_100G_DR:
 		case IFM_100G_KR2_PAM4:
 		case IFM_100G_CAUI2_AC:
 		case IFM_100G_CAUI2:
 		case IFM_100G_AUI2_AC:
 		case IFM_100G_AUI2:
 			key = IFM_100G_CR4;
 			break;
 		case IFM_200G_CR4_PAM4:
 		case IFM_200G_SR4:
 		case IFM_200G_FR4:
 		case IFM_200G_LR4:
 		case IFM_200G_DR4:
 		case IFM_200G_KR4_PAM4:
 		case IFM_200G_AUI4_AC:
 		case IFM_200G_AUI4:
 		case IFM_200G_AUI8_AC:
 		case IFM_200G_AUI8:
 			key = IFM_200G_CR4_PAM4;
 			break;
 		case IFM_400G_FR8:
 		case IFM_400G_LR8:
 		case IFM_400G_DR4:
 		case IFM_400G_AUI8_AC:
 		case IFM_400G_AUI8:
 			key = IFM_400G_FR8;
 			break;
 		default:
 			key = subtype;
 			break;
 		}
 		/* bit 5..14:	(some bits of) if_index of lagg device */
 		key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
 		/* bit 15:	0 */
 	}
 	return (htons(key));
 }
 
 static void
 lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
 {
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
 	    __func__,
 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
 	    buf, sizeof(buf)),
 	    la->la_refcnt, la->la_refcnt + 1));
 
 	KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
 	la->la_refcnt++;
 	KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
 }
 
 static void
 lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
 {
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
 	    __func__,
 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
 	    buf, sizeof(buf)),
 	    la->la_refcnt, la->la_refcnt - 1));
 
 	KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
 	la->la_refcnt--;
 	if (la->la_refcnt > 0) {
 		return;
 	}
 
 	KASSERT(la->la_refcnt == 0, ("refcount not zero"));
 	KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
 
 	TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
 
 	free(la, M_DEVBUF);
 }
 
 /*
  * lacp_aggregator_get: allocate an aggregator.
  */
 
 static struct lacp_aggregator *
 lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
 {
 	struct lacp_aggregator *la;
 
 	la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
 	if (la) {
 		la->la_refcnt = 1;
 		la->la_nports = 0;
 		TAILQ_INIT(&la->la_ports);
 		la->la_pending = 0;
 		TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
 	}
 
 	return (la);
 }
 
 /*
  * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
  */
 
 static void
 lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
 {
 	lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
 	lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
 
 	la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
 }
 
 static void
 lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
     const struct lacp_peerinfo *lpi_port)
 {
 	memset(lpi_aggr, 0, sizeof(*lpi_aggr));
 	lpi_aggr->lip_systemid = lpi_port->lip_systemid;
 	lpi_aggr->lip_key = lpi_port->lip_key;
 }
 
 /*
  * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
  */
 
 static int
 lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
     const struct lacp_port *lp)
 {
 	if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
 	    !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
 		return (0);
 	}
 
 	if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) {
 		return (0);
 	}
 
 	if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) {
 		return (0);
 	}
 
 	if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) {
 		return (0);
 	}
 
 	return (1);
 }
 
 static int
 lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
     const struct lacp_peerinfo *b)
 {
 	if (memcmp(&a->lip_systemid, &b->lip_systemid,
 	    sizeof(a->lip_systemid))) {
 		return (0);
 	}
 
 	if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) {
 		return (0);
 	}
 
 	return (1);
 }
 
 static void
 lacp_port_enable(struct lacp_port *lp)
 {
 	lp->lp_state |= LACP_STATE_AGGREGATION;
 }
 
 static void
 lacp_port_disable(struct lacp_port *lp)
 {
 	lacp_set_mux(lp, LACP_MUX_DETACHED);
 
 	lp->lp_state &= ~LACP_STATE_AGGREGATION;
 	lp->lp_selected = LACP_UNSELECTED;
 	lacp_sm_rx_record_default(lp);
 	lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
 	lp->lp_state &= ~LACP_STATE_EXPIRED;
 }
 
 /*
  * lacp_select: select an aggregator.  create one if necessary.
  */
 static void
 lacp_select(struct lacp_port *lp)
 {
 	struct lacp_softc *lsc = lp->lp_lsc;
 	struct lacp_aggregator *la;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	if (lp->lp_aggregator) {
 		return;
 	}
 
 	/* If we haven't heard from our peer, skip this step. */
 	if (lp->lp_state & LACP_STATE_DEFAULTED)
 		return;
 
 	KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
 	    ("timer_wait_while still active"));
 
 	LACP_DPRINTF((lp, "port lagid=%s\n",
 	    lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
 	    buf, sizeof(buf))));
 
 	TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
 		if (lacp_aggregator_is_compatible(la, lp)) {
 			break;
 		}
 	}
 
 	if (la == NULL) {
 		la = lacp_aggregator_get(lsc, lp);
 		if (la == NULL) {
 			LACP_DPRINTF((lp, "aggregator creation failed\n"));
 
 			/*
 			 * will retry on the next tick.
 			 */
 
 			return;
 		}
 		lacp_fill_aggregator_id(la, lp);
 		LACP_DPRINTF((lp, "aggregator created\n"));
 	} else {
 		LACP_DPRINTF((lp, "compatible aggregator found\n"));
 		if (la->la_refcnt == LACP_MAX_PORTS)
 			return;
 		lacp_aggregator_addref(lsc, la);
 	}
 
 	LACP_DPRINTF((lp, "aggregator lagid=%s\n",
 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
 	    buf, sizeof(buf))));
 
 	lp->lp_aggregator = la;
 	lp->lp_selected = LACP_SELECTED;
 }
 
 /*
  * lacp_unselect: finish unselect/detach process.
  */
 
 static void
 lacp_unselect(struct lacp_port *lp)
 {
 	struct lacp_softc *lsc = lp->lp_lsc;
 	struct lacp_aggregator *la = lp->lp_aggregator;
 
 	KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
 	    ("timer_wait_while still active"));
 
 	if (la == NULL) {
 		return;
 	}
 
 	lp->lp_aggregator = NULL;
 	lacp_aggregator_delref(lsc, la);
 }
 
 /* mux machine */
 
 static void
 lacp_sm_mux(struct lacp_port *lp)
 {
 	struct lagg_port *lgp = lp->lp_lagg;
 	struct lagg_softc *sc = lgp->lp_softc;
 	enum lacp_mux_state new_state;
 	boolean_t p_sync =
 		    (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
 	boolean_t p_collecting =
 	    (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
 	enum lacp_selected selected = lp->lp_selected;
 	struct lacp_aggregator *la;
 
 	if (V_lacp_debug > 1)
 		lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, "
 		    "p_sync= 0x%x, p_collecting= 0x%x\n", __func__,
 		    lp->lp_mux_state, selected, p_sync, p_collecting);
 
 re_eval:
 	la = lp->lp_aggregator;
 	KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
 	    ("MUX not detached"));
 	new_state = lp->lp_mux_state;
 	switch (lp->lp_mux_state) {
 	case LACP_MUX_DETACHED:
 		if (selected != LACP_UNSELECTED) {
 			new_state = LACP_MUX_WAITING;
 		}
 		break;
 	case LACP_MUX_WAITING:
 		KASSERT(la->la_pending > 0 ||
 		    !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
 		    ("timer_wait_while still active"));
 		if (selected == LACP_SELECTED && la->la_pending == 0) {
 			new_state = LACP_MUX_ATTACHED;
 		} else if (selected == LACP_UNSELECTED) {
 			new_state = LACP_MUX_DETACHED;
 		}
 		break;
 	case LACP_MUX_ATTACHED:
 		if (selected == LACP_SELECTED && p_sync) {
 			new_state = LACP_MUX_COLLECTING;
 		} else if (selected != LACP_SELECTED) {
 			new_state = LACP_MUX_DETACHED;
 		}
 		break;
 	case LACP_MUX_COLLECTING:
 		if (selected == LACP_SELECTED && p_sync && p_collecting) {
 			new_state = LACP_MUX_DISTRIBUTING;
 		} else if (selected != LACP_SELECTED || !p_sync) {
 			new_state = LACP_MUX_ATTACHED;
 		}
 		break;
 	case LACP_MUX_DISTRIBUTING:
 		if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
 			new_state = LACP_MUX_COLLECTING;
 			lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n");
 			sc->sc_flapping++;
 		}
 		break;
 	default:
 		panic("%s: unknown state", __func__);
 	}
 
 	if (lp->lp_mux_state == new_state) {
 		return;
 	}
 
 	lacp_set_mux(lp, new_state);
 	goto re_eval;
 }
 
 static void
 lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
 {
 	struct lacp_aggregator *la = lp->lp_aggregator;
 
 	if (lp->lp_mux_state == new_state) {
 		return;
 	}
 
 	switch (new_state) {
 	case LACP_MUX_DETACHED:
 		lp->lp_state &= ~LACP_STATE_SYNC;
 		lacp_disable_distributing(lp);
 		lacp_disable_collecting(lp);
 		lacp_sm_assert_ntt(lp);
 		/* cancel timer */
 		if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
 			KASSERT(la->la_pending > 0,
 			    ("timer_wait_while not active"));
 			la->la_pending--;
 		}
 		LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
 		lacp_unselect(lp);
 		break;
 	case LACP_MUX_WAITING:
 		LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
 		    LACP_AGGREGATE_WAIT_TIME);
 		la->la_pending++;
 		break;
 	case LACP_MUX_ATTACHED:
 		lp->lp_state |= LACP_STATE_SYNC;
 		lacp_disable_collecting(lp);
 		lacp_sm_assert_ntt(lp);
 		break;
 	case LACP_MUX_COLLECTING:
 		lacp_enable_collecting(lp);
 		lacp_disable_distributing(lp);
 		lacp_sm_assert_ntt(lp);
 		break;
 	case LACP_MUX_DISTRIBUTING:
 		lacp_enable_distributing(lp);
 		break;
 	default:
 		panic("%s: unknown state", __func__);
 	}
 
 	LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
 
 	lp->lp_mux_state = new_state;
 }
 
 static void
 lacp_sm_mux_timer(struct lacp_port *lp)
 {
 	struct lacp_aggregator *la = lp->lp_aggregator;
 	char buf[LACP_LAGIDSTR_MAX+1];
 
 	KASSERT(la->la_pending > 0, ("no pending event"));
 
 	LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
 	    buf, sizeof(buf)),
 	    la->la_pending, la->la_pending - 1));
 
 	la->la_pending--;
 }
 
 /* periodic transmit machine */
 
 static void
 lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
 {
 	if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
 	    LACP_STATE_TIMEOUT)) {
 		return;
 	}
 
 	LACP_DPRINTF((lp, "partner timeout changed\n"));
 
 	/*
 	 * FAST_PERIODIC -> SLOW_PERIODIC
 	 * or
 	 * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
 	 *
 	 * let lacp_sm_ptx_tx_schedule to update timeout.
 	 */
 
 	LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
 
 	/*
 	 * if timeout has been shortened, assert NTT.
 	 */
 
 	if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
 		lacp_sm_assert_ntt(lp);
 	}
 }
 
 static void
 lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
 {
 	int timeout;
 
 	if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
 	    !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
 		/*
 		 * NO_PERIODIC
 		 */
 
 		LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
 		return;
 	}
 
 	if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
 		return;
 	}
 
 	timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
 	    LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
 
 	LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
 }
 
 static void
 lacp_sm_ptx_timer(struct lacp_port *lp)
 {
 	lacp_sm_assert_ntt(lp);
 }
 
 static void
 lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
 {
 	int timeout;
 
 	/*
 	 * check LACP_DISABLED first
 	 */
 
 	if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
 		return;
 	}
 
 	/*
 	 * check loopback condition.
 	 */
 
 	if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
 	    &lp->lp_actor.lip_systemid)) {
 		return;
 	}
 
 	/*
 	 * EXPIRED, DEFAULTED, CURRENT -> CURRENT
 	 */
 
 	lacp_sm_rx_update_selected(lp, du);
 	lacp_sm_rx_update_ntt(lp, du);
 	lacp_sm_rx_record_pdu(lp, du);
 
 	timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
 	    LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
 	LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
 
 	lp->lp_state &= ~LACP_STATE_EXPIRED;
 
 	/*
 	 * kick transmit machine without waiting the next tick.
 	 */
 
 	lacp_sm_tx(lp);
 }
 
 static void
 lacp_sm_rx_set_expired(struct lacp_port *lp)
 {
 	lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
 	lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
 	LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
 	lp->lp_state |= LACP_STATE_EXPIRED;
 }
 
 static void
 lacp_sm_rx_timer(struct lacp_port *lp)
 {
 	if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
 		/* CURRENT -> EXPIRED */
 		LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
 		lacp_sm_rx_set_expired(lp);
 	} else {
 		/* EXPIRED -> DEFAULTED */
 		LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
 		lacp_sm_rx_update_default_selected(lp);
 		lacp_sm_rx_record_default(lp);
 		lp->lp_state &= ~LACP_STATE_EXPIRED;
 	}
 }
 
 static void
 lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
 {
 	boolean_t active;
 	uint8_t oldpstate;
 	char buf[LACP_STATESTR_MAX+1];
 
 	LACP_TRACE(lp);
 
 	oldpstate = lp->lp_partner.lip_state;
 
 	active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
 	    || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
 	    (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
 
 	lp->lp_partner = du->ldu_actor;
 	if (active &&
 	    ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
 	    LACP_STATE_AGGREGATION) &&
 	    !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
 	    || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
 		/*
 		 * XXX Maintain legacy behavior of leaving the
 		 * LACP_STATE_SYNC bit unchanged from the partner's
 		 * advertisement if lsc_strict_mode is false.
 		 * TODO: We should re-examine the concept of the "strict mode"
 		 * to ensure it makes sense to maintain a non-strict mode.
 		 */
 		if (lp->lp_lsc->lsc_strict_mode)
 			lp->lp_partner.lip_state |= LACP_STATE_SYNC;
 	} else {
 		lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
 	}
 
 	lp->lp_state &= ~LACP_STATE_DEFAULTED;
 
 	if (oldpstate != lp->lp_partner.lip_state) {
 		LACP_DPRINTF((lp, "old pstate %s\n",
 		    lacp_format_state(oldpstate, buf, sizeof(buf))));
 		LACP_DPRINTF((lp, "new pstate %s\n",
 		    lacp_format_state(lp->lp_partner.lip_state, buf,
 		    sizeof(buf))));
 	}
 
 	lacp_sm_ptx_update_timeout(lp, oldpstate);
 }
 
 static void
 lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
 {
 
 	LACP_TRACE(lp);
 
 	if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
 	    !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
 	    LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
 		LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
 		lacp_sm_assert_ntt(lp);
 	}
 }
 
 static void
 lacp_sm_rx_record_default(struct lacp_port *lp)
 {
 	uint8_t oldpstate;
 
 	LACP_TRACE(lp);
 
 	oldpstate = lp->lp_partner.lip_state;
 	if (lp->lp_lsc->lsc_strict_mode)
 		lp->lp_partner = lacp_partner_admin_strict;
 	else
 		lp->lp_partner = lacp_partner_admin_optimistic;
 	lp->lp_state |= LACP_STATE_DEFAULTED;
 	lacp_sm_ptx_update_timeout(lp, oldpstate);
 }
 
 static void
 lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
     const struct lacp_peerinfo *info)
 {
 
 	LACP_TRACE(lp);
 
 	if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
 	    !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
 	    LACP_STATE_AGGREGATION)) {
 		lp->lp_selected = LACP_UNSELECTED;
 		/* mux machine will clean up lp->lp_aggregator */
 	}
 }
 
 static void
 lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
 {
 
 	LACP_TRACE(lp);
 
 	lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
 }
 
 static void
 lacp_sm_rx_update_default_selected(struct lacp_port *lp)
 {
 
 	LACP_TRACE(lp);
 
 	if (lp->lp_lsc->lsc_strict_mode)
 		lacp_sm_rx_update_selected_from_peerinfo(lp,
 		    &lacp_partner_admin_strict);
 	else
 		lacp_sm_rx_update_selected_from_peerinfo(lp,
 		    &lacp_partner_admin_optimistic);
 }
 
 /* transmit machine */
 
 static void
 lacp_sm_tx(struct lacp_port *lp)
 {
 	int error = 0;
 
 	if (!(lp->lp_state & LACP_STATE_AGGREGATION)
 #if 1
 	    || (!(lp->lp_state & LACP_STATE_ACTIVITY)
 	    && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
 #endif
 	    ) {
 		lp->lp_flags &= ~LACP_PORT_NTT;
 	}
 
 	if (!(lp->lp_flags & LACP_PORT_NTT)) {
 		return;
 	}
 
 	/* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
 	if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
 		    (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
 		LACP_DPRINTF((lp, "rate limited pdu\n"));
 		return;
 	}
 
 	if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) {
 		error = lacp_xmit_lacpdu(lp);
 	} else {
 		LACP_TPRINTF((lp, "Dropping TX PDU\n"));
 	}
 
 	if (error == 0) {
 		lp->lp_flags &= ~LACP_PORT_NTT;
 	} else {
 		LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
 		    error));
 	}
 }
 
 static void
 lacp_sm_assert_ntt(struct lacp_port *lp)
 {
 
 	lp->lp_flags |= LACP_PORT_NTT;
 }
 
 static void
 lacp_run_timers(struct lacp_port *lp)
 {
 	int i;
 
 	for (i = 0; i < LACP_NTIMER; i++) {
 		KASSERT(lp->lp_timer[i] >= 0,
 		    ("invalid timer value %d", lp->lp_timer[i]));
 		if (lp->lp_timer[i] == 0) {
 			continue;
 		} else if (--lp->lp_timer[i] <= 0) {
 			if (lacp_timer_funcs[i]) {
 				(*lacp_timer_funcs[i])(lp);
 			}
 		}
 	}
 }
 
 int
 lacp_marker_input(struct lacp_port *lp, struct mbuf *m)
 {
 	struct lacp_softc *lsc = lp->lp_lsc;
 	struct lagg_port *lgp = lp->lp_lagg;
 	struct lacp_port *lp2;
 	struct markerdu *mdu;
 	int error = 0;
 	int pending = 0;
 
 	if (m->m_pkthdr.len != sizeof(*mdu)) {
 		goto bad;
 	}
 
 	if ((m->m_flags & M_MCAST) == 0) {
 		goto bad;
 	}
 
 	if (m->m_len < sizeof(*mdu)) {
 		m = m_pullup(m, sizeof(*mdu));
 		if (m == NULL) {
 			return (ENOMEM);
 		}
 	}
 
 	mdu = mtod(m, struct markerdu *);
 
 	if (memcmp(&mdu->mdu_eh.ether_dhost,
 	    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
 		goto bad;
 	}
 
 	if (mdu->mdu_sph.sph_version != 1) {
 		goto bad;
 	}
 
 	switch (mdu->mdu_tlv.tlv_type) {
 	case MARKER_TYPE_INFO:
 		if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
 		    marker_info_tlv_template, TRUE)) {
 			goto bad;
 		}
 		mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
 		memcpy(&mdu->mdu_eh.ether_dhost,
 		    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
 		memcpy(&mdu->mdu_eh.ether_shost,
 		    lgp->lp_lladdr, ETHER_ADDR_LEN);
 		error = lagg_enqueue(lp->lp_ifp, m);
 		break;
 
 	case MARKER_TYPE_RESPONSE:
 		if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
 		    marker_response_tlv_template, TRUE)) {
 			goto bad;
 		}
 		LACP_DPRINTF((lp, "marker response, port=%u, sys=%6D, id=%u\n",
 		    ntohs(mdu->mdu_info.mi_rq_port), mdu->mdu_info.mi_rq_system,
 		    ":", ntohl(mdu->mdu_info.mi_rq_xid)));
 
 		/* Verify that it is the last marker we sent out */
 		if (memcmp(&mdu->mdu_info, &lp->lp_marker,
 		    sizeof(struct lacp_markerinfo)))
 			goto bad;
 
 		LACP_LOCK(lsc);
 		lp->lp_flags &= ~LACP_PORT_MARK;
 
 		if (lsc->lsc_suppress_distributing) {
 			/* Check if any ports are waiting for a response */
 			LIST_FOREACH(lp2, &lsc->lsc_ports, lp_next) {
 				if (lp2->lp_flags & LACP_PORT_MARK) {
 					pending = 1;
 					break;
 				}
 			}
 
 			if (pending == 0) {
 				/* All interface queues are clear */
 				LACP_DPRINTF((NULL, "queue flush complete\n"));
 				lsc->lsc_suppress_distributing = FALSE;
 			}
 		}
 		LACP_UNLOCK(lsc);
 		m_freem(m);
 		break;
 
 	default:
 		goto bad;
 	}
 
 	return (error);
 
 bad:
 	LACP_DPRINTF((lp, "bad marker frame\n"));
 	m_freem(m);
 	return (EINVAL);
 }
 
 static int
 tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
     const struct tlv_template *tmpl, boolean_t check_type)
 {
 	while (/* CONSTCOND */ 1) {
 		if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
 			return (EINVAL);
 		}
 		if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
 		    tlv->tlv_length != tmpl->tmpl_length) {
 			return (EINVAL);
 		}
 		if (tmpl->tmpl_type == 0) {
 			break;
 		}
 		tlv = (const struct tlvhdr *)
 		    ((const char *)tlv + tlv->tlv_length);
 		tmpl++;
 	}
 
 	return (0);
 }
 
 /* Debugging */
 const char *
 lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
 {
 	snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
 	    (int)mac[0],
 	    (int)mac[1],
 	    (int)mac[2],
 	    (int)mac[3],
 	    (int)mac[4],
 	    (int)mac[5]);
 
 	return (buf);
 }
 
 const char *
 lacp_format_systemid(const struct lacp_systemid *sysid,
     char *buf, size_t buflen)
 {
 	char macbuf[LACP_MACSTR_MAX+1];
 
 	snprintf(buf, buflen, "%04X,%s",
 	    ntohs(sysid->lsi_prio),
 	    lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
 
 	return (buf);
 }
 
 const char *
 lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
 {
 	snprintf(buf, buflen, "%04X,%04X",
 	    ntohs(portid->lpi_prio),
 	    ntohs(portid->lpi_portno));
 
 	return (buf);
 }
 
 const char *
 lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
 {
 	char sysid[LACP_SYSTEMIDSTR_MAX+1];
 	char portid[LACP_PORTIDSTR_MAX+1];
 
 	snprintf(buf, buflen, "(%s,%04X,%s)",
 	    lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
 	    ntohs(peer->lip_key),
 	    lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
 
 	return (buf);
 }
 
 const char *
 lacp_format_lagid(const struct lacp_peerinfo *a,
     const struct lacp_peerinfo *b, char *buf, size_t buflen)
 {
 	char astr[LACP_PARTNERSTR_MAX+1];
 	char bstr[LACP_PARTNERSTR_MAX+1];
 
 #if 0
 	/*
 	 * there's a convention to display small numbered peer
 	 * in the left.
 	 */
 
 	if (lacp_compare_peerinfo(a, b) > 0) {
 		const struct lacp_peerinfo *t;
 
 		t = a;
 		a = b;
 		b = t;
 	}
 #endif
 
 	snprintf(buf, buflen, "[%s,%s]",
 	    lacp_format_partner(a, astr, sizeof(astr)),
 	    lacp_format_partner(b, bstr, sizeof(bstr)));
 
 	return (buf);
 }
 
 const char *
 lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
     char *buf, size_t buflen)
 {
 	if (la == NULL) {
 		return ("(none)");
 	}
 
 	return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
 }
 
 const char *
 lacp_format_state(uint8_t state, char *buf, size_t buflen)
 {
 	snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
 	return (buf);
 }
 
 static void
 lacp_dump_lacpdu(const struct lacpdu *du)
 {
 	char buf[LACP_PARTNERSTR_MAX+1];
 	char buf2[LACP_STATESTR_MAX+1];
 
 	printf("actor=%s\n",
 	    lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
 	printf("actor.state=%s\n",
 	    lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
 	printf("partner=%s\n",
 	    lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
 	printf("partner.state=%s\n",
 	    lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
 
 	printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
 }
 
 static void
 lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
 {
 	va_list va;
 
 	if (lp) {
 		printf("%s: ", lp->lp_ifp->if_xname);
 	}
 
 	va_start(va, fmt);
 	vprintf(fmt, va);
 	va_end(va);
 }
Index: head/sys/net/if_ethersubr.c
===================================================================
--- head/sys/net/if_ethersubr.c	(revision 366932)
+++ head/sys/net/if_ethersubr.c	(revision 366933)
@@ -1,1476 +1,1476 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_netgraph.h"
 #include "opt_mbuf_profiling.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/devctl.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/priv.h>
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 
 #include <net/ieee_oui.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 #include <security/mac/mac_framework.h>
 
 #include <crypto/sha1.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
 VNET_DEFINE(pfil_head_t, link_pfil_head);	/* Packet filter hooks */
 
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
 int	(*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_attach_p)(struct ifnet *ifp);
 void	(*ng_ether_detach_p)(struct ifnet *ifp);
 
 void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 
 /* if_bridge(4) support */
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
-struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); 
+struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static	int ether_resolvemulti(struct ifnet *, struct sockaddr **,
 		struct sockaddr *);
 #ifdef VIMAGE
 static	void ether_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static	int ether_requestencap(struct ifnet *, struct if_encap_req *);
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
 static void
 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
 {
 	int csum_flags = 0;
 
 	if (src->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
 	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
 		csum_flags |= CSUM_SCTP_VALID;
 	dst->m_pkthdr.csum_flags |= csum_flags;
 	if (csum_flags & CSUM_DATA_VALID)
 		dst->m_pkthdr.csum_data = 0xffff;
 }
 
 /*
  * Handle link-layer encapsulation requests.
  */
 static int
 ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
 {
 	struct ether_header *eh;
 	struct arphdr *ah;
 	uint16_t etype;
 	const u_char *lladdr;
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < ETHER_HDR_LEN)
 		return (ENOMEM);
 
 	eh = (struct ether_header *)req->buf;
 	lladdr = req->lladdr;
 	req->lladdr_off = 0;
 
 	switch (req->family) {
 	case AF_INET:
 		etype = htons(ETHERTYPE_IP);
 		break;
 	case AF_INET6:
 		etype = htons(ETHERTYPE_IPV6);
 		break;
 	case AF_ARP:
 		ah = (struct arphdr *)req->hdata;
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			etype = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			etype = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (req->flags & IFENCAP_FLAG_BROADCAST)
 			lladdr = ifp->if_broadcastaddr;
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
 	memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 	req->bufsize = sizeof(struct ether_header);
 
 	return (0);
 }
 
 static int
 ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro, u_char *phdr,
 	uint32_t *pflags, struct llentry **plle)
 {
 	struct ether_header *eh;
 	uint32_t lleflags = 0;
 	int error = 0;
 #if defined(INET) || defined(INET6)
 	uint16_t etype;
 #endif
 
 	if (plle)
 		*plle = NULL;
 	eh = (struct ether_header *)phdr;
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
 			error = arpresolve(ifp, 0, m, dst, phdr, &lleflags,
 			    plle);
 		else {
 			if (m->m_flags & M_BCAST)
 				memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
 				    ETHER_ADDR_LEN);
 			else {
 				const struct in_addr *a;
 				a = &(((const struct sockaddr_in *)dst)->sin_addr);
 				ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
 			}
 			etype = htons(ETHERTYPE_IP);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) == 0)
 			error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags,
 			    plle);
 		else {
 			const struct in6_addr *a6;
 			a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
 			ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
 			etype = htons(ETHERTYPE_IPV6);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		if (m != NULL)
 			m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 
 	if (error == EHOSTDOWN) {
 		if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
 			error = EHOSTUNREACH;
 	}
 
 	if (error != 0)
 		return (error);
 
 	*pflags = RT_MAY_LOOP;
 	if (lleflags & LLE_IFADDR)
 		*pflags |= RT_L2_ME;
 
 	return (0);
 }
 
 /*
  * Ethernet output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	int error = 0;
 	char linkhdr[ETHER_HDR_LEN], *phdr;
 	struct ether_header *eh;
 	struct pf_mtag *t;
 	int loop_copy = 1;
 	int hlen;	/* link layer header length */
 	uint32_t pflags;
 	struct llentry *lle = NULL;
 	int addref = 0;
 
 	phdr = NULL;
 	pflags = 0;
 	if (ro != NULL) {
 		/* XXX BPF uses ro_prepend */
 		if (ro->ro_prepend != NULL) {
 			phdr = ro->ro_prepend;
 			hlen = ro->ro_plen;
 		} else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
 			if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
 				lle = ro->ro_lle;
 				if (lle != NULL &&
 				    (lle->la_flags & LLE_VALID) == 0) {
 					LLE_FREE(lle);
 					lle = NULL;	/* redundant */
 					ro->ro_lle = NULL;
 				}
 				if (lle == NULL) {
 					/* if we lookup, keep cache */
 					addref = 1;
 				} else
 					/*
 					 * Notify LLE code that
 					 * the entry was used
 					 * by datapath.
 					 */
 					llentry_mark_used(lle);
 			}
 			if (lle != NULL) {
 				phdr = lle->r_linkdata;
 				hlen = lle->r_hdrlen;
 				pflags = lle->r_flags;
 			}
 		}
 	}
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
 	if (phdr == NULL) {
 		/* No prepend data supplied. Try to calculate ourselves. */
 		phdr = linkhdr;
 		hlen = ETHER_HDR_LEN;
 		error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
 		    addref ? &lle : NULL);
 		if (addref && lle != NULL)
 			ro->ro_lle = lle;
 		if (error != 0)
 			return (error == EWOULDBLOCK ? 0 : error);
 	}
 
 	if ((pflags & RT_L2_ME) != 0) {
 		update_mbuf_csumflags(m, m);
 		return (if_simloop(ifp, m, dst->sa_family, 0));
 	}
 	loop_copy = pflags & RT_MAY_LOOP;
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 *
 	 * Note that we do prepend regardless of RT_HAS_HEADER flag.
 	 * This is done because BPF code shifts m_data pointer
 	 * to the end of ethernet header prior to calling if_output().
 	 */
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	if ((pflags & RT_HAS_HEADER) == 0) {
 		eh = mtod(m, struct ether_header *);
 		memcpy(eh, phdr, hlen);
 	}
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
 		struct mbuf *n;
 
 		/*
 		 * Because if_simloop() modifies the packet, we need a
 		 * writable copy through m_dup() instead of a readonly
 		 * one as m_copy[m] would give us. The alternative would
 		 * be to modify if_simloop() to handle the readonly mbuf,
 		 * but performancewise it is mostly equivalent (trading
 		 * extra data copying vs. extra locking).
 		 *
 		 * XXX This is a local workaround.  A number of less
 		 * often used kernel parts suffer from the same bug.
 		 * See PR kern/105943 for a proposed general solution.
 		 */
 		if ((n = m_dup(m, M_NOWAIT)) != NULL) {
 			update_mbuf_csumflags(m, n);
 			(void)if_simloop(ifp, n, dst->sa_family, hlen);
 		} else
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	}
 
        /*
 	* Bridges require special output handling.
 	*/
 	if (ifp->if_bridge) {
 		BRIDGE_OUTPUT(ifp, m, error);
 		return (error);
 	}
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
 	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
 	/* Handle ng_ether(4) processing, if any */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_output_p != NULL,
 		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
 bad:			if (m != NULL)
 				m_freem(m);
 			return (error);
 		}
 		if (m == NULL)
 			return (0);
 	}
 
 	/* Continue with link-layer output */
 	return ether_output_frame(ifp, m);
 }
 
 static bool
 ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp)
 {
 	struct ether_8021q_tag qtag;
 	struct ether_header *eh;
 
 	eh = mtod(*mp, struct ether_header *);
 	if (ntohs(eh->ether_type) == ETHERTYPE_VLAN ||
 	    ntohs(eh->ether_type) == ETHERTYPE_QINQ)
 		return (true);
 
 	qtag.vid = 0;
 	qtag.pcp = pcp;
 	qtag.proto = ETHERTYPE_VLAN;
 	if (ether_8021q_frame(mp, ifp, ifp, &qtag))
 		return (true);
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	return (false);
 }
 
 /*
  * Ethernet link layer output routine to send a raw frame to the device.
  *
  * This assumes that the 14 byte Ethernet header is present and contiguous
  * in the first mbuf (if BRIDGE'ing).
  */
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
 	uint8_t pcp;
 
 	pcp = ifp->if_pcp;
 	if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN &&
 	    !ether_set_pcp(&m, ifp, pcp))
 		return (0);
 
 	if (PFIL_HOOKED_OUT(V_link_pfil_head))
 		switch (pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_OUT,
 		    NULL)) {
 		case PFIL_DROPPED:
 			return (EACCES);
 		case PFIL_CONSUMED:
 			return (0);
 		}
 
 #ifdef EXPERIMENTAL
 #if defined(INET6) && defined(INET)
 	/* draft-ietf-6man-ipv6only-flag */
 	/* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
 		struct ether_header *eh;
 
 		eh = mtod(m, struct ether_header *);
 		switch (ntohs(eh->ether_type)) {
 		case ETHERTYPE_IP:
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			m_freem(m);
 			return (EAFNOSUPPORT);
 			/* NOTREACHED */
 			break;
 		};
 	}
 #endif
 #endif
 
 	/*
 	 * Queue message on interface, update output statistics if
 	 * successful, and start output if interface not yet active.
 	 */
 	return ((ifp->if_transmit)(ifp, m));
 }
 
 /*
  * Process a received Ethernet packet; the packet is in the
  * mbuf chain m with the ethernet header at the front.
  */
 static void
 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	u_short etype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
 		m_freem(m);
 		return;
 	}
 #endif
 	if (m->m_len < ETHER_HDR_LEN) {
 		/* XXX maybe should pullup? */
 		if_printf(ifp, "discard frame w/o leading ethernet "
 				"header (len %u pkt len %u)\n",
 				m->m_len, m->m_pkthdr.len);
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	random_harvest_queue_ether(m, sizeof(*m));
 
 #ifdef EXPERIMENTAL
 #if defined(INET6) && defined(INET)
 	/* draft-ietf-6man-ipv6only-flag */
 	/* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) {
 		switch (etype) {
 		case ETHERTYPE_IP:
 		case ETHERTYPE_ARP:
 		case ETHERTYPE_REVARP:
 			m_freem(m);
 			return;
 			/* NOTREACHED */
 			break;
 		};
 	}
 #endif
 #endif
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	ETHER_BPF_MTAP(ifp, m);
 
 	/*
 	 * If the CRC is still on the packet, trim it off. We do this once
 	 * and once only in case we are re-entered. Nothing else on the
 	 * Ethernet receive path expects to see the FCS.
 	 */
 	if (m->m_flags & M_HASFCS) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 
 	if (!(ifp->if_capenable & IFCAP_HWSTATS))
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		CURVNET_RESTORE();
 		return;
 	}
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
-		KASSERT(lagg_input_p != NULL,
+		KASSERT(lagg_input_ethernet_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
-		m = (*lagg_input_p)(ifp, m);
+		m = (*lagg_input_ethernet_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 
 	/*
 	 * If the hardware did not process an 802.1Q tag, do this now,
 	 * to allow 802.1P priority frames to be passed to the main input
 	 * path correctly.
 	 */
 	if ((m->m_flags & M_VLANTAG) == 0 &&
 	    ((etype == ETHERTYPE_VLAN) || (etype == ETHERTYPE_QINQ))) {
 		struct ether_vlan_header *evl;
 
 		if (m->m_len < sizeof(*evl) &&
 		    (m = m_pullup(m, sizeof(*evl))) == NULL) {
 #ifdef DIAGNOSTIC
 			if_printf(ifp, "cannot pullup VLAN header\n");
 #endif
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		evl = mtod(m, struct ether_vlan_header *);
 		m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
 		m->m_flags |= M_VLANTAG;
 
 		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
 		eh = mtod(m, struct ether_header *);
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Allow ng_ether(4) to claim this frame. */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
 		    ("%s: ng_ether_input_p is NULL", __func__));
 		m->m_flags &= ~M_PROMISC;
 		(*ng_ether_input_p)(ifp, &m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 	/*
 	 * Allow if_bridge(4) to claim this frame.
 	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
 	 * and the frame should be delivered locally.
 	 */
 	if (ifp->if_bridge != NULL) {
 		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 #if defined(INET) || defined(INET6)
 	/*
 	 * Clear M_PROMISC on frame so that carp(4) will see it when the
 	 * mbuf flows up to Layer 3.
 	 * FreeBSD's implementation of carp(4) uses the inprotosw
 	 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
 	 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
 	 * is outside the scope of the M_PROMISC test below.
 	 * TODO: Maintain a hash table of ethernet addresses other than
 	 * ether_dhost which may be active on this ifp.
 	 */
 	if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
 		m->m_flags &= ~M_PROMISC;
 	} else
 #endif
 	{
 		/*
 		 * If the frame received was not for our MAC address, set the
 		 * M_PROMISC flag on the mbuf chain. The frame may need to
 		 * be seen by the rest of the Ethernet input path in case of
 		 * re-entry (e.g. bridge, vlan, netgraph) but should not be
 		 * seen by upper protocol layers.
 		 */
 		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
 		    bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
 			m->m_flags |= M_PROMISC;
 	}
 
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
 }
 
 /*
  * Ethernet input dispatch; by default, direct dispatch here regardless of
  * global configuration.  However, if RSS is enabled, hook up RSS affinity
  * so that when deferred or hybrid dispatch is enabled, we can redistribute
  * load based on RSS.
  *
  * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
  * not it had already done work distribution via multi-queue.  Then we could
  * direct dispatch in the event load balancing was already complete and
  * handle the case of interfaces with different capabilities better.
  *
  * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
  * at multiple layers?
  *
  * XXXRW: For now, enable all this only if RSS is compiled in, although it
  * works fine without RSS.  Need to characterise the performance overhead
  * of the detour through the netisr code in the event the result is always
  * direct dispatch.
  */
 static void
 ether_nh_input(struct mbuf *m)
 {
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL,
 	    ("%s: NULL interface pointer", __func__));
 	ether_input_internal(m->m_pkthdr.rcvif, m);
 }
 
 static struct netisr_handler	ether_nh = {
 	.nh_name = "ether",
 	.nh_handler = ether_nh_input,
 	.nh_proto = NETISR_ETHER,
 #ifdef RSS
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 	.nh_m2cpuid = rss_m2cpuid,
 #else
 	.nh_policy = NETISR_POLICY_SOURCE,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 #endif
 };
 
 static void
 ether_init(__unused void *arg)
 {
 
 	netisr_register(&ether_nh);
 }
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
 vnet_ether_init(__unused void *arg)
 {
 	struct pfil_head_args args;
 
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = PFIL_IN | PFIL_OUT;
 	args.pa_type = PFIL_TYPE_ETHERNET;
 	args.pa_headname = PFIL_ETHER_NAME;
 	V_link_pfil_head = pfil_head_register(&args);
 
 #ifdef VIMAGE
 	netisr_register_vnet(&ether_nh);
 #endif
 }
 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_init, NULL);
 
 #ifdef VIMAGE
 static void
 vnet_ether_pfil_destroy(__unused void *arg)
 {
 
 	pfil_head_unregister(V_link_pfil_head);
 }
 VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY,
     vnet_ether_pfil_destroy, NULL);
 
 static void
 vnet_ether_destroy(__unused void *arg)
 {
 
 	netisr_unregister_vnet(&ether_nh);
 }
 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_destroy, NULL);
 #endif
 
 static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct epoch_tracker et;
 	struct mbuf *mn;
 	bool needs_epoch;
 
 	needs_epoch = !(ifp->if_flags & IFF_KNOWSEPOCH);
 
 	/*
 	 * The drivers are allowed to pass in a chain of packets linked with
 	 * m_nextpkt. We split them up into separate packets here and pass
 	 * them up. This allows the drivers to amortize the receive lock.
 	 */
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	if (__predict_false(needs_epoch))
 		NET_EPOCH_ENTER(et);
 	while (m) {
 		mn = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		/*
 		 * We will rely on rcvif being set properly in the deferred
 		 * context, so assert it is correct here.
 		 */
 		MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
 		KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
 		    "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
 		netisr_dispatch(NETISR_ETHER, m);
 		m = mn;
 	}
 	if (__predict_false(needs_epoch))
 		NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 }
 
 /*
  * Upper layer processing for a received Ethernet packet.
  */
 void
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	int i, isr;
 	u_short ether_type;
 
 	NET_EPOCH_ASSERT();
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
 	/* Do not grab PROMISC frames in case we are re-entered. */
 	if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) {
 		i = pfil_run_hooks(V_link_pfil_head, &m, ifp, PFIL_IN, NULL);
 		if (i != 0 || m == NULL)
 			return;
 	}
 
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
 	/*
 	 * If this frame has a VLAN tag other than 0, call vlan_input()
 	 * if its module is loaded. Otherwise, drop.
 	 */
 	if ((m->m_flags & M_VLANTAG) &&
 	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
 		if (ifp->if_vlantrunk == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			m_freem(m);
 			return;
 		}
 		KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
 		    __func__));
 		/* Clear before possibly re-entering ether_input(). */
 		m->m_flags &= ~M_PROMISC;
 		(*vlan_input_p)(ifp, m);
 		return;
 	}
 
 	/*
 	 * Pass promiscuously received frames to the upper layer if the user
 	 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
 	 */
 	if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper layers.
 	 * Strip off Ethernet header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 	m_adj(m, ETHER_HDR_LEN);
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		goto discard;
 	}
 	netisr_dispatch(isr, m);
 	return;
 
 discard:
 	/*
 	 * Packet is to be discarded.  If netgraph is present,
 	 * hand the packet to it for last chance processing;
 	 * otherwise dispose of it.
 	 */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_orphan_p != NULL,
 		    ("ng_ether_input_orphan_p is NULL"));
 		/*
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
 		 */
 		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
 	m_freem(m);
 }
 
 /*
  * Convert Ethernet address to printable (loggable) representation.
  * This routine is for compatibility; it's better to just use
  *
  *	printf("%6D", <pointer to address>, ":");
  *
  * since there's no static buffer involved.
  */
 char *
 ether_sprintf(const u_char *ap)
 {
 	static char etherbuf[18];
 	snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
 	return (etherbuf);
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 {
 	int i;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_addrlen = ETHER_ADDR_LEN;
 	ifp->if_hdrlen = ETHER_HDR_LEN;
 	ifp->if_mtu = ETHERMTU;
 	if_attach(ifp);
 	ifp->if_output = ether_output;
 	ifp->if_input = ether_input;
 	ifp->if_resolvemulti = ether_resolvemulti;
 	ifp->if_requestencap = ether_requestencap;
 #ifdef VIMAGE
 	ifp->if_reassign = ether_reassign;
 #endif
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
 	ifp->if_broadcastaddr = etherbroadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ETHER;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	if (ifp->if_hw_addr != NULL)
 		bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
 
 	/* Announce Ethernet MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
 
 	uuid_ether_add(LLADDR(sdl));
 
 	/* Add necessary bits are setup; announce it now. */
 	EVENTHANDLER_INVOKE(ether_ifattach_event, ifp);
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL);
 }
 
 /*
  * Perform common duties while detaching an Ethernet interface
  */
 void
 ether_ifdetach(struct ifnet *ifp)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
 	uuid_ether_del(LLADDR(sdl));
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 #ifdef VIMAGE
 void
 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 {
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	if (ng_ether_attach_p != NULL) {
 		CURVNET_SET_QUIET(new_vnet);
 		(*ng_ether_attach_p)(ifp);
 		CURVNET_RESTORE();
 	}
 }
 #endif
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Ethernet");
 
 #if 0
 /*
  * This is for reference.  We have a table-driven version
  * of the little-endian crc32 generator, which is faster
  * than the double-loop.
  */
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = (crc ^ data) & 1;
 			crc >>= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_LE);
 		}
 	}
 
 	return (crc);
 }
 #else
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	static const uint32_t crctab[] = {
 		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 	};
 	size_t i;
 	uint32_t crc;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		crc ^= buf[i];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 	}
 
 	return (crc);
 }
 #endif
 
 uint32_t
 ether_crc32_be(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc, carry;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 			crc <<= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 		}
 	}
 
 	return (crc);
 }
 
 int
 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
 		    ETHER_ADDR_LEN);
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > ETHERMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 
 	case SIOCSLANPCP:
 		error = priv_check(curthread, PRIV_NET_SETLANPCP);
 		if (error != 0)
 			break;
 		if (ifr->ifr_lan_pcp > 7 &&
 		    ifr->ifr_lan_pcp != IFNET_PCP_NONE) {
 			error = EINVAL;
 		} else {
 			ifp->if_pcp = ifr->ifr_lan_pcp;
 			/* broadcast event about PCP change */
 			EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP);
 		}
 		break;
 
 	case SIOCGLANPCP:
 		ifr->ifr_lan_pcp = ifp->if_pcp;
 		break;
 
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!ETHER_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
 		*llsa = NULL;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = NULL;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static moduledata_t ether_mod = {
 	.name = "ether",
 };
 
 void
 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 {
 	struct ether_vlan_header vlan;
 	struct mbuf mv, mb;
 
 	KASSERT((m->m_flags & M_VLANTAG) != 0,
 	    ("%s: vlan information not present", __func__));
 	KASSERT(m->m_len >= sizeof(struct ether_header),
 	    ("%s: mbuf not large enough for header", __func__));
 	bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 	vlan.evl_proto = vlan.evl_encap_proto;
 	vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 	vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 	m->m_len -= sizeof(struct ether_header);
 	m->m_data += sizeof(struct ether_header);
 	/*
 	 * If a data link has been supplied by the caller, then we will need to
 	 * re-create a stack allocated mbuf chain with the following structure:
 	 *
 	 * (1) mbuf #1 will contain the supplied data link
 	 * (2) mbuf #2 will contain the vlan header
 	 * (3) mbuf #3 will contain the original mbuf's packet data
 	 *
 	 * Otherwise, submit the packet and vlan header via bpf_mtap2().
 	 */
 	if (data != NULL) {
 		mv.m_next = m;
 		mv.m_data = (caddr_t)&vlan;
 		mv.m_len = sizeof(vlan);
 		mb.m_next = &mv;
 		mb.m_data = data;
 		mb.m_len = dlen;
 		bpf_mtap(bp, &mb);
 	} else
 		bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 	m->m_len += sizeof(struct ether_header);
 	m->m_data -= sizeof(struct ether_header);
 }
 
 struct mbuf *
 ether_vlanencap_proto(struct mbuf *m, uint16_t tag, uint16_t proto)
 {
 	struct ether_vlan_header *evl;
 
 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 
 	if (m->m_len < sizeof(*evl)) {
 		m = m_pullup(m, sizeof(*evl));
 		if (m == NULL)
 			return (NULL);
 	}
 
 	/*
 	 * Transform the Ethernet header into an Ethernet header
 	 * with 802.1Q encapsulation.
 	 */
 	evl = mtod(m, struct ether_vlan_header *);
 	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 	    (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	evl->evl_encap_proto = htons(proto);
 	evl->evl_tag = htons(tag);
 	return (m);
 }
 
 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IEEE 802.1Q VLAN");
 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "for consistency");
 
 VNET_DEFINE_STATIC(int, soft_pad);
 #define	V_soft_pad	VNET(soft_pad)
 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(soft_pad), 0,
     "pad short frames before tagging");
 
 /*
  * For now, make preserving PCP via an mbuf tag optional, as it increases
  * per-packet memory allocations and frees.  In the future, it would be
  * preferable to reuse ether_vtag for this, or similar.
  */
 int vlan_mtag_pcp = 0;
 SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW,
     &vlan_mtag_pcp, 0,
     "Retain VLAN PCP information as packets are passed up the stack");
 
 bool
 ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p,
     struct ether_8021q_tag *qtag)
 {
 	struct m_tag *mtag;
 	int n;
 	uint16_t tag;
 	static const char pad[8];	/* just zeros */
 
 	/*
 	 * Pad the frame to the minimum size allowed if told to.
 	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
 	 * paragraph C.4.4.3.b.  It can help to work around buggy
 	 * bridges that violate paragraph C.4.4.3.a from the same
 	 * document, i.e., fail to pad short frames after untagging.
 	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
 	 * untagging it will produce a 62-byte frame, which is a runt
 	 * and requires padding.  There are VLAN-enabled network
 	 * devices that just discard such runts instead or mishandle
 	 * them somehow.
 	 */
 	if (V_soft_pad && p->if_type == IFT_ETHER) {
 		for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len;
 		     n > 0; n -= sizeof(pad)) {
 			if (!m_append(*mp, min(n, sizeof(pad)), pad))
 				break;
 		}
 		if (n > 0) {
 			m_freem(*mp);
 			*mp = NULL;
 			if_printf(ife, "cannot pad short frame");
 			return (false);
 		}
 	}
 
 	/*
 	 * If PCP is set in mbuf, use it
 	 */
 	if ((*mp)->m_flags & M_VLANTAG) {
 		qtag->pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag);
 	}
 
 	/*
 	 * If underlying interface can do VLAN tag insertion itself,
 	 * just pass the packet along. However, we need some way to
 	 * tell the interface where the packet came from so that it
 	 * knows how to find the VLAN tag to use, so we attach a
 	 * packet tag that holds it.
 	 */
 	if (vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q,
 	    MTAG_8021Q_PCP_OUT, NULL)) != NULL)
 		tag = EVL_MAKETAG(qtag->vid, *(uint8_t *)(mtag + 1), 0);
 	else
 		tag = EVL_MAKETAG(qtag->vid, qtag->pcp, 0);
 	if ((p->if_capenable & IFCAP_VLAN_HWTAGGING) &&
 	    (qtag->proto == ETHERTYPE_VLAN)) {
 		(*mp)->m_pkthdr.ether_vtag = tag;
 		(*mp)->m_flags |= M_VLANTAG;
 	} else {
 		*mp = ether_vlanencap_proto(*mp, tag, qtag->proto);
 		if (*mp == NULL) {
 			if_printf(ife, "unable to prepend 802.1Q header");
 			return (false);
 		}
 	}
 	return (true);
 }
 
 /*
  * Allocate an address from the FreeBSD Foundation OUI.  This uses a
  * cryptographic hash function on the containing jail's name, UUID and the
  * interface name to attempt to provide a unique but stable address.
  * Pseudo-interfaces which require a MAC address should use this function to
  * allocate non-locally-administered addresses.
  */
 void
 ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr)
 {
 	SHA1_CTX ctx;
 	char *buf;
 	char uuid[HOSTUUIDLEN + 1];
 	uint64_t addr;
 	int i, sz;
 	char digest[SHA1_RESULTLEN];
 	char jailname[MAXHOSTNAMELEN];
 
 	getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid));
 	/* If each (vnet) jail would also have a unique hostuuid this would not
 	 * be necessary. */
 	getjailname(curthread->td_ucred, jailname, sizeof(jailname));
 	sz = asprintf(&buf, M_TEMP, "%s-%s-%s", uuid, if_name(ifp),
 	    jailname);
 	if (sz < 0) {
 		/* Fall back to a random mac address. */
 		arc4rand(hwaddr, sizeof(*hwaddr), 0);
 		hwaddr->octet[0] = 0x02;
 		return;
 	}
 
 	SHA1Init(&ctx);
 	SHA1Update(&ctx, buf, sz);
 	SHA1Final(digest, &ctx);
 	free(buf, M_TEMP);
 
 	addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) &
 	    OUI_FREEBSD_GENERATED_MASK;
 	addr = OUI_FREEBSD(addr);
 	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
 		hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) &
 		    0xFF;
 	}
 }
 
 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(ether, 1);
Index: head/sys/net/if_infiniband.c
===================================================================
--- head/sys/net/if_infiniband.c	(revision 366932)
+++ head/sys/net/if_infiniband.c	(revision 366933)
@@ -1,538 +1,552 @@
 /*-
  * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/devctl.h>
 #include <sys/module.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/ethernet.h>
 #include <net/infiniband.h>
 #include <net/bpf.h>
 #include <net/if_llatbl.h>
 #include <net/netisr.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
+#include <net/if_lagg.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip6.h>
 
 #include <netinet6/in6_var.h>
 #include <netinet6/nd6.h>
 
 #include <security/mac/mac_framework.h>
 
+/* if_lagg(4) support */
+struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *); 
+
 #ifdef INET
 static inline void
 infiniband_ipv4_multicast_map(uint32_t addr,
     const uint8_t *broadcast, uint8_t *buf)
 {
 	uint8_t scope;
 
 	addr = ntohl(addr);
 	scope = broadcast[5] & 0xF;
 
 	buf[0] = 0;
 	buf[1] = 0xff;
 	buf[2] = 0xff;
 	buf[3] = 0xff;
 	buf[4] = 0xff;
 	buf[5] = 0x10 | scope;
 	buf[6] = 0x40;
 	buf[7] = 0x1b;
 	buf[8] = broadcast[8];
 	buf[9] = broadcast[9];
 	buf[10] = 0;
 	buf[11] = 0;
 	buf[12] = 0;
 	buf[13] = 0;
 	buf[14] = 0;
 	buf[15] = 0;
 	buf[16] = (addr >> 24) & 0xff;
 	buf[17] = (addr >> 16) & 0xff;
 	buf[18] = (addr >> 8) & 0xff;
 	buf[19] = addr & 0xff;
 }
 #endif
 
 #ifdef INET6
 static inline void
 infiniband_ipv6_multicast_map(const struct in6_addr *addr,
     const uint8_t *broadcast, uint8_t *buf)
 {
 	uint8_t scope;
 
 	scope = broadcast[5] & 0xF;
 
 	buf[0] = 0;
 	buf[1] = 0xff;
 	buf[2] = 0xff;
 	buf[3] = 0xff;
 	buf[4] = 0xff;
 	buf[5] = 0x10 | scope;
 	buf[6] = 0x60;
 	buf[7] = 0x1b;
 	buf[8] = broadcast[8];
 	buf[9] = broadcast[9];
 	memcpy(&buf[10], &addr->s6_addr[6], 10);
 }
 #endif
 
 /*
  * This is for clients that have an infiniband_header in the mbuf.
  */
 void
 infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
 {
 	struct infiniband_header *ibh;
 	struct ether_header eh;
   
 	if (mb->m_len < sizeof(*ibh))
 		return;
 
 	ibh = mtod(mb, struct infiniband_header *);
 	eh.ether_type = ibh->ib_protocol;
 	memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
 	memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
 	mb->m_data += sizeof(*ibh);
 	mb->m_len -= sizeof(*ibh);
 	mb->m_pkthdr.len -= sizeof(*ibh);
 	bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
 	mb->m_data -= sizeof(*ibh);
 	mb->m_len += sizeof(*ibh);
 	mb->m_pkthdr.len += sizeof(*ibh);
 }
 
 /*
  * Infiniband output routine.
  */
 static int
 infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	uint8_t edst[INFINIBAND_ADDR_LEN];
 #if defined(INET) || defined(INET6)
 	struct llentry *lle = NULL;
 #endif
 	struct infiniband_header *ibh;
 	int error = 0;
 	uint16_t type;
 	bool is_gw;
 
 	NET_EPOCH_ASSERT();
 
 	is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		goto bad;
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR) {
 		error = ENETDOWN;
 		goto bad;
 	}
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 		error = ENETDOWN;
 		goto bad;
 	}
 
 	switch (dst->sa_family) {
 	case AF_LINK:
 		goto output;
 #ifdef INET
 	case AF_INET:
 		if (lle != NULL && (lle->la_flags & LLE_VALID)) {
 			memcpy(edst, lle->ll_addr, sizeof(edst));
 		} else if (m->m_flags & M_MCAST) {
 			infiniband_ipv4_multicast_map(
 			    ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
 			    ifp->if_broadcastaddr, edst);
 		} else {
 			error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 			if (error) {
 				if (error == EWOULDBLOCK)
 					error = 0;
 				m = NULL;	/* mbuf is consumed by resolver */
 				goto bad;
 			}
 		}
 		type = htons(ETHERTYPE_IP);
 		break;
 	case AF_ARP: {
 		struct arphdr *ah;
 
 		if (m->m_len < sizeof(*ah)) {
 			error = EINVAL;
 			goto bad;
 		}
 
 		ah = mtod(m, struct arphdr *);
 
 		if (m->m_len < arphdr_len(ah)) {
 			error = EINVAL;
 			goto bad;
 		}
 		ah->ar_hrd = htons(ARPHRD_INFINIBAND);
 
 		switch (ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			type = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			type = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (m->m_flags & M_BCAST) {
 			memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
 		} else {
 			if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
 				error = EINVAL;
 				goto bad;
 			}
 			memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
 		}
 		break;
 	}
 #endif
 #ifdef INET6
 	case AF_INET6: {
 		const struct ip6_hdr *ip6;
 
 		ip6 = mtod(m, const struct ip6_hdr *);
 		if (m->m_len < sizeof(*ip6)) {
 			error = EINVAL;
 			goto bad;
 		} else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
 			memcpy(edst, lle->ll_addr, sizeof(edst));
 		} else if (m->m_flags & M_MCAST) {
 			infiniband_ipv6_multicast_map(
 			    &((const struct sockaddr_in6 *)dst)->sin6_addr,
 			    ifp->if_broadcastaddr, edst);
 		} else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
 			memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
 		} else {
 			error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
 			if (error) {
 				if (error == EWOULDBLOCK)
 					error = 0;
 				m = NULL;	/* mbuf is consumed by resolver */
 				goto bad;
 			}
 		}
 		type = htons(ETHERTYPE_IPV6);
 		break;
 	}
 #endif
 	default:
 		error = EAFNOSUPPORT;
 		goto bad;
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto bad;
 	}
 	ibh = mtod(m, struct infiniband_header *);
 
 	ibh->ib_protocol = type;
 	memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
 
 	/*
 	 * Queue message on interface, update output statistics if
 	 * successful, and start output if interface not yet active.
 	 */
 output:
 	return (ifp->if_transmit(ifp, m));
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Process a received Infiniband packet.
  */
 static void
 infiniband_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct infiniband_header *ibh;
 	struct epoch_tracker et;
 	int isr;
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		goto done;
 	}
 
 	ibh = mtod(m, struct infiniband_header *);
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper
 	 * layers:
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 
 	if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
 		if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
 		    ifp->if_addrlen) == 0)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	/* Let BPF have it before we strip the header. */
 	INFINIBAND_BPF_MTAP(ifp, m);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		goto done;
 	}
 
 	/* Direct packet to correct FIB based on interface config. */
 	M_SETFIB(m, ifp->if_fib);
+
+	/* Handle input from a lagg<N> port */
+	if (ifp->if_type == IFT_INFINIBANDLAG) {
+		KASSERT(lagg_input_infiniband_p != NULL,
+		    ("%s: if_lagg not loaded!", __func__));
+		m = (*lagg_input_infiniband_p)(ifp, m);
+		if (__predict_false(m == NULL))
+			goto done;
+		ifp = m->m_pkthdr.rcvif;
+	}
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ibh->ib_protocol) {
 #ifdef INET
 	case htons(ETHERTYPE_IP):
 		isr = NETISR_IP;
 		break;
 
 	case htons(ETHERTYPE_ARP):
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			goto done;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case htons(ETHERTYPE_IPV6):
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		goto done;
 	}
 
 	/* Strip off the Infiniband header. */
 	m_adj(m, INFINIBAND_HDR_LEN);
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	NET_EPOCH_ENTER(et);
 	netisr_dispatch(isr, m);
 	NET_EPOCH_EXIT(et);
 done:
 	CURVNET_RESTORE();
 }
 
 static int
 infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
     struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	uint8_t *e_addr;
 
 	switch (sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!INFINIBAND_IS_MULTICAST(e_addr))
 			return (EADDRNOTAVAIL);
 		*llsa = NULL;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return (EADDRNOTAVAIL);
 		sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
 		sdl->sdl_alen = INFINIBAND_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		infiniband_ipv4_multicast_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
 		    e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		/*
 		 * An IP6 address of 0 means listen to all of the
 		 * multicast address used for IP6. This has no meaning
 		 * in infiniband.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			return (EADDRNOTAVAIL);
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return (EADDRNOTAVAIL);
 		sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
 		sdl->sdl_alen = INFINIBAND_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		infiniband_ipv6_multicast_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 	default:
 		return (EAFNOSUPPORT);
 	}
 }
 
 void
 infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
 {
 	struct sockaddr_dl *sdl;
 	struct ifaddr *ifa;
 	int i;
 
 	ifp->if_addrlen = INFINIBAND_ADDR_LEN;
 	ifp->if_hdrlen = INFINIBAND_HDR_LEN;
 	ifp->if_mtu = INFINIBAND_MTU;
 	if_attach(ifp);
 	ifp->if_output = infiniband_output;
 	ifp->if_input = infiniband_input;
 	ifp->if_resolvemulti = infiniband_resolvemulti;
 
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Gbps(10);	/* default value */
 	if (llb != NULL)
 		ifp->if_broadcastaddr = llb;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_INFINIBAND;
 	sdl->sdl_alen = ifp->if_addrlen;
 
 	if (lla != NULL) {
 		memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
 
 		if (ifp->if_hw_addr != NULL)
 			memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
 	} else {
 		lla = LLADDR(sdl);
 	}
 
 	/* Attach ethernet compatible network device */
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 
 	/* Announce Infiniband MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break;
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
 
 	/* Add necessary bits are setup; announce it now. */
 	EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
 
 	if (IS_DEFAULT_VNET(curvnet))
 		devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
 }
 
 /*
  * Perform common duties while detaching an Infiniband interface
  */
 void
 infiniband_ifdetach(struct ifnet *ifp)
 {
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 static int
 infiniband_modevent(module_t mod, int type, void *data)
 {
 	switch (type) {
 	case MOD_LOAD:
 	case MOD_UNLOAD:
 		return (0);
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 static moduledata_t infiniband_mod = {
 	.name = "if_infiniband",
 	.evhand = &infiniband_modevent,
 };
 
 DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(if_infiniband, 1);
Index: head/sys/net/if_lagg.c
===================================================================
--- head/sys/net/if_lagg.c	(revision 366932)
+++ head/sys/net/if_lagg.c	(revision 366933)
@@ -1,2425 +1,2622 @@
 /*	$OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
  * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
  * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_ratelimit.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_clone.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/bpf.h>
 #include <net/route.h>
 #include <net/vnet.h>
+#include <net/infiniband.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #endif
 #ifdef INET
 #include <netinet/in_systm.h>
 #include <netinet/if_ether.h>
 #endif
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #endif
 
 #include <net/if_vlan_var.h>
 #include <net/if_lagg.h>
 #include <net/ieee8023ad_lacp.h>
 
 #ifdef INET6
 /*
  * XXX: declare here to avoid to include many inet6 related files..
  * should be more generalized?
  */
 extern void	nd6_setmtu(struct ifnet *);
 #endif
 
 #define	LAGG_RLOCK()	struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et)
 #define	LAGG_RUNLOCK()	epoch_exit_preempt(net_epoch_preempt, &lagg_et)
 #define	LAGG_RLOCK_ASSERT()	NET_EPOCH_ASSERT()
 #define	LAGG_UNLOCK_ASSERT()	MPASS(!in_epoch(net_epoch_preempt))
 
 #define	LAGG_SX_INIT(_sc)	sx_init(&(_sc)->sc_sx, "if_lagg sx")
 #define	LAGG_SX_DESTROY(_sc)	sx_destroy(&(_sc)->sc_sx)
 #define	LAGG_XLOCK(_sc)		sx_xlock(&(_sc)->sc_sx)
 #define	LAGG_XUNLOCK(_sc)	sx_xunlock(&(_sc)->sc_sx)
 #define	LAGG_SXLOCK_ASSERT(_sc)	sx_assert(&(_sc)->sc_sx, SA_LOCKED)
 #define	LAGG_XLOCK_ASSERT(_sc)	sx_assert(&(_sc)->sc_sx, SA_XLOCKED)
 
 /* Special flags we should propagate to the lagg ports. */
 static struct {
 	int flag;
 	int (*func)(struct ifnet *, int);
 } lagg_pflags[] = {
 	{IFF_PROMISC, ifpromisc},
 	{IFF_ALLMULTI, if_allmulti},
 	{0, NULL}
 };
 
 struct lagg_snd_tag {
 	struct m_snd_tag com;
 	struct m_snd_tag *tag;
 };
 
 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
 #define	V_lagg_list	VNET(lagg_list)
 VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
 #define	V_lagg_list_mtx	VNET(lagg_list_mtx)
 #define	LAGG_LIST_LOCK_INIT(x)		mtx_init(&V_lagg_list_mtx, \
 					"if_lagg list", NULL, MTX_DEF)
 #define	LAGG_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_lagg_list_mtx)
 #define	LAGG_LIST_LOCK(x)		mtx_lock(&V_lagg_list_mtx)
 #define	LAGG_LIST_UNLOCK(x)		mtx_unlock(&V_lagg_list_mtx)
 eventhandler_tag	lagg_detach_cookie = NULL;
 
 static int	lagg_clone_create(struct if_clone *, int, caddr_t);
 static void	lagg_clone_destroy(struct ifnet *);
 VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner);
 #define	V_lagg_cloner	VNET(lagg_cloner)
 static const char laggname[] = "lagg";
 static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface");
 
 static void	lagg_capabilities(struct lagg_softc *);
 static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
 static int	lagg_port_destroy(struct lagg_port *, int);
-static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *);
 static void	lagg_linkstate(struct lagg_softc *);
 static void	lagg_port_state(struct ifnet *, int);
 static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
 static int	lagg_port_output(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static void	lagg_port_ifdetach(void *arg __unused, struct ifnet *);
 #ifdef LAGG_PORT_STACKING
 static int	lagg_port_checkstacking(struct lagg_softc *);
 #endif
 static void	lagg_port2req(struct lagg_port *, struct lagg_reqport *);
 static void	lagg_init(void *);
 static void	lagg_stop(struct lagg_softc *);
 static int	lagg_ioctl(struct ifnet *, u_long, caddr_t);
 #if defined(KERN_TLS) || defined(RATELIMIT)
 static int	lagg_snd_tag_alloc(struct ifnet *,
 		    union if_snd_tag_alloc_params *,
 		    struct m_snd_tag **);
 static int	lagg_snd_tag_modify(struct m_snd_tag *,
 		    union if_snd_tag_modify_params *);
 static int	lagg_snd_tag_query(struct m_snd_tag *,
 		    union if_snd_tag_query_params *);
 static void	lagg_snd_tag_free(struct m_snd_tag *);
 static void     lagg_ratelimit_query(struct ifnet *,
 		    struct if_ratelimit_query_results *);
 #endif
 static int	lagg_setmulti(struct lagg_port *);
 static int	lagg_clrmulti(struct lagg_port *);
 static	int	lagg_setcaps(struct lagg_port *, int cap);
 static	int	lagg_setflag(struct lagg_port *, int, int,
 		    int (*func)(struct ifnet *, int));
 static	int	lagg_setflags(struct lagg_port *, int status);
 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
-static int	lagg_transmit(struct ifnet *, struct mbuf *);
+static int	lagg_transmit_ethernet(struct ifnet *, struct mbuf *);
+static int	lagg_transmit_infiniband(struct ifnet *, struct mbuf *);
 static void	lagg_qflush(struct ifnet *);
 static int	lagg_media_change(struct ifnet *);
 static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
 static struct lagg_port *lagg_link_active(struct lagg_softc *,
 	    struct lagg_port *);
 
 /* Simple round robin */
 static void	lagg_rr_attach(struct lagg_softc *);
 static int	lagg_rr_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 
 /* Active failover */
 static int	lagg_fail_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 
 /* Loadbalancing */
 static void	lagg_lb_attach(struct lagg_softc *);
 static void	lagg_lb_detach(struct lagg_softc *);
 static int	lagg_lb_port_create(struct lagg_port *);
 static void	lagg_lb_port_destroy(struct lagg_port *);
 static int	lagg_lb_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 static int	lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
 
 /* Broadcast */
 static int    lagg_bcast_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 
 /* 802.3ad LACP */
 static void	lagg_lacp_attach(struct lagg_softc *);
 static void	lagg_lacp_detach(struct lagg_softc *);
 static int	lagg_lacp_start(struct lagg_softc *, struct mbuf *);
 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
 		    struct mbuf *);
 static void	lagg_lacp_lladdr(struct lagg_softc *);
 
 /* lagg protocol table */
 static const struct lagg_proto {
 	lagg_proto	pr_num;
 	void		(*pr_attach)(struct lagg_softc *);
 	void		(*pr_detach)(struct lagg_softc *);
 	int		(*pr_start)(struct lagg_softc *, struct mbuf *);
 	struct mbuf *	(*pr_input)(struct lagg_softc *, struct lagg_port *,
 			    struct mbuf *);
 	int		(*pr_addport)(struct lagg_port *);
 	void		(*pr_delport)(struct lagg_port *);
 	void		(*pr_linkstate)(struct lagg_port *);
 	void 		(*pr_init)(struct lagg_softc *);
 	void 		(*pr_stop)(struct lagg_softc *);
 	void 		(*pr_lladdr)(struct lagg_softc *);
 	void		(*pr_request)(struct lagg_softc *, void *);
 	void		(*pr_portreq)(struct lagg_port *, void *);
 } lagg_protos[] = {
     {
 	.pr_num = LAGG_PROTO_NONE
     },
     {
 	.pr_num = LAGG_PROTO_ROUNDROBIN,
 	.pr_attach = lagg_rr_attach,
 	.pr_start = lagg_rr_start,
 	.pr_input = lagg_rr_input,
     },
     {
 	.pr_num = LAGG_PROTO_FAILOVER,
 	.pr_start = lagg_fail_start,
 	.pr_input = lagg_fail_input,
     },
     {
 	.pr_num = LAGG_PROTO_LOADBALANCE,
 	.pr_attach = lagg_lb_attach,
 	.pr_detach = lagg_lb_detach,
 	.pr_start = lagg_lb_start,
 	.pr_input = lagg_lb_input,
 	.pr_addport = lagg_lb_port_create,
 	.pr_delport = lagg_lb_port_destroy,
     },
     {
 	.pr_num = LAGG_PROTO_LACP,
 	.pr_attach = lagg_lacp_attach,
 	.pr_detach = lagg_lacp_detach,
 	.pr_start = lagg_lacp_start,
 	.pr_input = lagg_lacp_input,
 	.pr_addport = lacp_port_create,
 	.pr_delport = lacp_port_destroy,
 	.pr_linkstate = lacp_linkstate,
 	.pr_init = lacp_init,
 	.pr_stop = lacp_stop,
 	.pr_lladdr = lagg_lacp_lladdr,
 	.pr_request = lacp_req,
 	.pr_portreq = lacp_portreq,
     },
     {
 	.pr_num = LAGG_PROTO_BROADCAST,
 	.pr_start = lagg_bcast_start,
 	.pr_input = lagg_bcast_input,
     },
 };
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Link Aggregation");
 
 /* Allow input on any failover links */
 VNET_DEFINE_STATIC(int, lagg_failover_rx_all);
 #define	V_lagg_failover_rx_all	VNET(lagg_failover_rx_all)
 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(lagg_failover_rx_all), 0,
     "Accept input from any interface in a failover lagg");
 
 /* Default value for using flowid */
 VNET_DEFINE_STATIC(int, def_use_flowid) = 0;
 #define	V_def_use_flowid	VNET(def_use_flowid)
 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
     &VNET_NAME(def_use_flowid), 0,
     "Default setting for using flow id for load sharing");
 
 /* Default value for using numa */
 VNET_DEFINE_STATIC(int, def_use_numa) = 1;
 #define	V_def_use_numa	VNET(def_use_numa)
 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN,
     &VNET_NAME(def_use_numa), 0,
     "Use numa to steer flows");
 
 /* Default value for flowid shift */
 VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
 #define	V_def_flowid_shift	VNET(def_flowid_shift)
 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
     &VNET_NAME(def_flowid_shift), 0,
     "Default setting for flowid shift for load sharing");
 
 static void
 vnet_lagg_init(const void *unused __unused)
 {
 
 	LAGG_LIST_LOCK_INIT();
 	SLIST_INIT(&V_lagg_list);
 	V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
 	    lagg_clone_destroy, 0);
 }
 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_lagg_init, NULL);
 
 static void
 vnet_lagg_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_lagg_cloner);
 	LAGG_LIST_LOCK_DESTROY();
 }
 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
     vnet_lagg_uninit, NULL);
 
 static int
 lagg_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
-		lagg_input_p = lagg_input;
+		lagg_input_ethernet_p = lagg_input_ethernet;
+		lagg_input_infiniband_p = lagg_input_infiniband;
 		lagg_linkstate_p = lagg_port_state;
 		lagg_detach_cookie = EVENTHANDLER_REGISTER(
 		    ifnet_departure_event, lagg_port_ifdetach, NULL,
 		    EVENTHANDLER_PRI_ANY);
 		break;
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    lagg_detach_cookie);
-		lagg_input_p = NULL;
+		lagg_input_ethernet_p = NULL;
+		lagg_input_infiniband_p = NULL;
 		lagg_linkstate_p = NULL;
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t lagg_mod = {
 	"if_lagg",
 	lagg_modevent,
 	0
 };
 
 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_lagg, 1);
+MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1);
 
 static void
 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
 {
 
 	LAGG_XLOCK_ASSERT(sc);
 	KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
 	    __func__, sc));
 
 	if (sc->sc_ifflags & IFF_DEBUG)
 		if_printf(sc->sc_ifp, "using proto %u\n", pr);
 
 	if (lagg_protos[pr].pr_attach != NULL)
 		lagg_protos[pr].pr_attach(sc);
 	sc->sc_proto = pr;
 }
 
 static void
 lagg_proto_detach(struct lagg_softc *sc)
 {
 	lagg_proto pr;
 
 	LAGG_XLOCK_ASSERT(sc);
 	pr = sc->sc_proto;
 	sc->sc_proto = LAGG_PROTO_NONE;
 
 	if (lagg_protos[pr].pr_detach != NULL)
 		lagg_protos[pr].pr_detach(sc);
 }
 
 static int
 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
 {
 
 	return (lagg_protos[sc->sc_proto].pr_start(sc, m));
 }
 
 static struct mbuf *
 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 {
 
 	return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
 }
 
 static int
 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
 {
 
 	if (lagg_protos[sc->sc_proto].pr_addport == NULL)
 		return (0);
 	else
 		return (lagg_protos[sc->sc_proto].pr_addport(lp));
 }
 
 static void
 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
 {
 
 	if (lagg_protos[sc->sc_proto].pr_delport != NULL)
 		lagg_protos[sc->sc_proto].pr_delport(lp);
 }
 
 static void
 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
 {
 
 	if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
 		lagg_protos[sc->sc_proto].pr_linkstate(lp);
 }
 
 static void
 lagg_proto_init(struct lagg_softc *sc)
 {
 
 	if (lagg_protos[sc->sc_proto].pr_init != NULL)
 		lagg_protos[sc->sc_proto].pr_init(sc);
 }
 
 static void
 lagg_proto_stop(struct lagg_softc *sc)
 {
 
 	if (lagg_protos[sc->sc_proto].pr_stop != NULL)
 		lagg_protos[sc->sc_proto].pr_stop(sc);
 }
 
 static void
 lagg_proto_lladdr(struct lagg_softc *sc)
 {
 
 	if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
 		lagg_protos[sc->sc_proto].pr_lladdr(sc);
 }
 
 static void
 lagg_proto_request(struct lagg_softc *sc, void *v)
 {
 
 	if (lagg_protos[sc->sc_proto].pr_request != NULL)
 		lagg_protos[sc->sc_proto].pr_request(sc, v);
 }
 
 static void
 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
 {
 
 	if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
 		lagg_protos[sc->sc_proto].pr_portreq(lp, v);
 }
 
 /*
  * This routine is run via an vlan
  * config EVENT
  */
 static void
 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
 {
 	struct lagg_softc *sc = ifp->if_softc;
 	struct lagg_port *lp;
 
 	if (ifp->if_softc !=  arg)   /* Not our event */
 		return;
 
 	LAGG_RLOCK();
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
 	LAGG_RUNLOCK();
 }
 
 /*
  * This routine is run via an vlan
  * unconfig EVENT
  */
 static void
 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
 {
 	struct lagg_softc *sc = ifp->if_softc;
 	struct lagg_port *lp;
 
 	if (ifp->if_softc !=  arg)   /* Not our event */
 		return;
 
 	LAGG_RLOCK();
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
 	LAGG_RUNLOCK();
 }
 
 static int
 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
+	struct iflaggparam iflp;
 	struct lagg_softc *sc;
 	struct ifnet *ifp;
-	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
+	int if_type;
+	int error;
+	static const uint8_t eaddr[LAGG_ADDR_LEN];
+	static const uint8_t ib_bcast_addr[INFINIBAND_ADDR_LEN] = {
+		0x00, 0xff, 0xff, 0xff,
+		0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00,	0xff, 0xff, 0xff, 0xff
+	};
 
+	if (params != NULL) {
+		error = copyin(params, &iflp, sizeof(iflp));
+		if (error)
+			return (error);
+
+		switch (iflp.lagg_type) {
+		case LAGG_TYPE_ETHERNET:
+			if_type = IFT_ETHER;
+			break;
+		case LAGG_TYPE_INFINIBAND:
+			if_type = IFT_INFINIBAND;
+			break;
+		default:
+			return (EINVAL);
+		}
+	} else {
+		if_type = IFT_ETHER;
+	}
+
 	sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
-	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+	ifp = sc->sc_ifp = if_alloc(if_type);
 	if (ifp == NULL) {
 		free(sc, M_LAGG);
 		return (ENOSPC);
 	}
 	LAGG_SX_INIT(sc);
 
+	mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF);
+	callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
+
 	LAGG_XLOCK(sc);
 	if (V_def_use_flowid)
 		sc->sc_opts |= LAGG_OPT_USE_FLOWID;
 	if (V_def_use_numa)
 		sc->sc_opts |= LAGG_OPT_USE_NUMA;
 	sc->flowid_shift = V_def_flowid_shift;
 
 	/* Hash all layers by default */
 	sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4;
 
 	lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
 
 	CK_SLIST_INIT(&sc->sc_ports);
 
-	/* Initialise pseudo media types */
-	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
-	    lagg_media_status);
-	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
-	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+	switch (if_type) {
+	case IFT_ETHER:
+		/* Initialise pseudo media types */
+		ifmedia_init(&sc->sc_media, 0, lagg_media_change,
+		    lagg_media_status);
+		ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+		ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
 
-	if_initname(ifp, laggname, unit);
+		if_initname(ifp, laggname, unit);
+		ifp->if_transmit = lagg_transmit_ethernet;
+		break;
+	case IFT_INFINIBAND:
+		if_initname(ifp, laggname, unit);
+		ifp->if_transmit = lagg_transmit_infiniband;
+		break;
+	default:
+		break;
+	}
 	ifp->if_softc = sc;
-	ifp->if_transmit = lagg_transmit;
 	ifp->if_qflush = lagg_qflush;
 	ifp->if_init = lagg_init;
 	ifp->if_ioctl = lagg_ioctl;
 	ifp->if_get_counter = lagg_get_counter;
 	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
 #if defined(KERN_TLS) || defined(RATELIMIT)
 	ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
 	ifp->if_snd_tag_modify = lagg_snd_tag_modify;
 	ifp->if_snd_tag_query = lagg_snd_tag_query;
 	ifp->if_snd_tag_free = lagg_snd_tag_free;
 	ifp->if_ratelimit_query = lagg_ratelimit_query;
 #endif
 	ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
 
 	/*
 	 * Attach as an ordinary ethernet device, children will be attached
-	 * as special device IFT_IEEE8023ADLAG.
+	 * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG.
 	 */
-	ether_ifattach(ifp, eaddr);
+	switch (if_type) {
+	case IFT_ETHER:
+		ether_ifattach(ifp, eaddr);
+		break;
+	case IFT_INFINIBAND:
+		infiniband_ifattach(ifp, eaddr, ib_bcast_addr);
+		break;
+	default:
+		break;
+	}
 
 	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 		lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 
 	/* Insert into the global list of laggs */
 	LAGG_LIST_LOCK();
 	SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
 	LAGG_LIST_UNLOCK();
 	LAGG_XUNLOCK(sc);
 
 	return (0);
 }
 
 static void
 lagg_clone_destroy(struct ifnet *ifp)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_port *lp;
 
 	LAGG_XLOCK(sc);
 	sc->sc_destroying = 1;
 	lagg_stop(sc);
 	ifp->if_flags &= ~IFF_UP;
 
 	EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
 	EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
 
 	/* Shutdown and remove lagg ports */
 	while ((lp = CK_SLIST_FIRST(&sc->sc_ports)) != NULL)
 		lagg_port_destroy(lp, 1);
 
 	/* Unhook the aggregation protocol */
 	lagg_proto_detach(sc);
 	LAGG_XUNLOCK(sc);
 
-	ifmedia_removeall(&sc->sc_media);
-	ether_ifdetach(ifp);
+	switch (ifp->if_type) {
+	case IFT_ETHER:
+		ifmedia_removeall(&sc->sc_media);
+		ether_ifdetach(ifp);
+		break;
+	case IFT_INFINIBAND:
+		infiniband_ifdetach(ifp);
+		break;
+	default:
+		break;
+	}
 	if_free(ifp);
 
 	LAGG_LIST_LOCK();
 	SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
 	LAGG_LIST_UNLOCK();
 
+	mtx_destroy(&sc->sc_mtx);
 	LAGG_SX_DESTROY(sc);
 	free(sc, M_LAGG);
 }
 
 static void
 lagg_capabilities(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
 	int cap, ena, pena;
 	uint64_t hwa;
 	struct ifnet_hw_tsomax hw_tsomax;
 
 	LAGG_XLOCK_ASSERT(sc);
 
 	/* Get common enabled capabilities for the lagg ports */
 	ena = ~0;
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		ena &= lp->lp_ifp->if_capenable;
 	ena = (ena == ~0 ? 0 : ena);
 
 	/*
 	 * Apply common enabled capabilities back to the lagg ports.
 	 * May require several iterations if they are dependent.
 	 */
 	do {
 		pena = ena;
 		CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 			lagg_setcaps(lp, ena);
 			ena &= lp->lp_ifp->if_capenable;
 		}
 	} while (pena != ena);
 
 	/* Get other capabilities from the lagg ports */
 	cap = ~0;
 	hwa = ~(uint64_t)0;
 	memset(&hw_tsomax, 0, sizeof(hw_tsomax));
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		cap &= lp->lp_ifp->if_capabilities;
 		hwa &= lp->lp_ifp->if_hwassist;
 		if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
 	}
 	cap = (cap == ~0 ? 0 : cap);
 	hwa = (hwa == ~(uint64_t)0 ? 0 : hwa);
 
 	if (sc->sc_ifp->if_capabilities != cap ||
 	    sc->sc_ifp->if_capenable != ena ||
 	    sc->sc_ifp->if_hwassist != hwa ||
 	    if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
 		sc->sc_ifp->if_capabilities = cap;
 		sc->sc_ifp->if_capenable = ena;
 		sc->sc_ifp->if_hwassist = hwa;
 		getmicrotime(&sc->sc_ifp->if_lastchange);
 
 		if (sc->sc_ifflags & IFF_DEBUG)
 			if_printf(sc->sc_ifp,
 			    "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
 	}
 }
 
 static int
 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
 {
 	struct lagg_softc *sc_ptr;
 	struct lagg_port *lp, *tlp;
 	struct ifreq ifr;
 	int error, i, oldmtu;
+	int if_type;
 	uint64_t *pval;
 
 	LAGG_XLOCK_ASSERT(sc);
 
 	if (sc->sc_ifp == ifp) {
 		if_printf(sc->sc_ifp,
 		    "cannot add a lagg to itself as a port\n");
 		return (EINVAL);
 	}
 
 	if (sc->sc_destroying == 1)
 		return (ENXIO);
 
 	/* Limit the maximal number of lagg ports */
 	if (sc->sc_count >= LAGG_MAX_PORTS)
 		return (ENOSPC);
 
 	/* Check if port has already been associated to a lagg */
 	if (ifp->if_lagg != NULL) {
 		/* Port is already in the current lagg? */
 		lp = (struct lagg_port *)ifp->if_lagg;
 		if (lp->lp_softc == sc)
 			return (EEXIST);
 		return (EBUSY);
 	}
 
-	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
-	if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
-		return (EPROTONOSUPPORT);
+	switch (sc->sc_ifp->if_type) {
+	case IFT_ETHER:
+		/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
+		if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
+			return (EPROTONOSUPPORT);
+		if_type = IFT_IEEE8023ADLAG;
+		break;
+	case IFT_INFINIBAND:
+		/* XXX Disallow non-infiniband interfaces */
+		if (ifp->if_type != IFT_INFINIBAND)
+			return (EPROTONOSUPPORT);
+		if_type = IFT_INFINIBANDLAG;
+		break;
+	default:
+		break;
+	}
 
 	/* Allow the first Ethernet member to define the MTU */
 	oldmtu = -1;
 	if (CK_SLIST_EMPTY(&sc->sc_ports)) {
 		sc->sc_ifp->if_mtu = ifp->if_mtu;
 	} else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
 		if (ifp->if_ioctl == NULL) {
 			if_printf(sc->sc_ifp, "cannot change MTU for %s\n",
 			    ifp->if_xname);
 			return (EINVAL);
 		}
 		oldmtu = ifp->if_mtu;
 		strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name));
 		ifr.ifr_mtu = sc->sc_ifp->if_mtu;
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
 		if (error != 0) {
 			if_printf(sc->sc_ifp, "invalid MTU for %s\n",
 			    ifp->if_xname);
 			return (error);
 		}
 		ifr.ifr_mtu = oldmtu;
 	}
 
 	lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO);
 	lp->lp_softc = sc;
 
 	/* Check if port is a stacked lagg */
 	LAGG_LIST_LOCK();
 	SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
 		if (ifp == sc_ptr->sc_ifp) {
 			LAGG_LIST_UNLOCK();
 			free(lp, M_LAGG);
 			if (oldmtu != -1)
 				(*ifp->if_ioctl)(ifp, SIOCSIFMTU,
 				    (caddr_t)&ifr);
 			return (EINVAL);
 			/* XXX disable stacking for the moment, its untested */
 #ifdef LAGG_PORT_STACKING
 			lp->lp_flags |= LAGG_PORT_STACK;
 			if (lagg_port_checkstacking(sc_ptr) >=
 			    LAGG_MAX_STACKING) {
 				LAGG_LIST_UNLOCK();
 				free(lp, M_LAGG);
 				if (oldmtu != -1)
 					(*ifp->if_ioctl)(ifp, SIOCSIFMTU,
 					    (caddr_t)&ifr);
 				return (E2BIG);
 			}
 #endif
 		}
 	}
 	LAGG_LIST_UNLOCK();
 
 	if_ref(ifp);
 	lp->lp_ifp = ifp;
 
-	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
+	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen);
 	lp->lp_ifcapenable = ifp->if_capenable;
 	if (CK_SLIST_EMPTY(&sc->sc_ports)) {
-		bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+		bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
 		lagg_proto_lladdr(sc);
 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 	} else {
-		if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+		if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
 	}
 	lagg_setflags(lp, 1);
 
 	if (CK_SLIST_EMPTY(&sc->sc_ports))
 		sc->sc_primary = lp;
 
 	/* Change the interface type */
 	lp->lp_iftype = ifp->if_type;
-	ifp->if_type = IFT_IEEE8023ADLAG;
+	ifp->if_type = if_type;
 	ifp->if_lagg = lp;
 	lp->lp_ioctl = ifp->if_ioctl;
 	ifp->if_ioctl = lagg_port_ioctl;
 	lp->lp_output = ifp->if_output;
 	ifp->if_output = lagg_port_output;
 
 	/* Read port counters */
 	pval = lp->port_counters.val;
 	for (i = 0; i < IFCOUNTERS; i++, pval++)
 		*pval = ifp->if_get_counter(ifp, i);
 
 	/*
 	 * Insert into the list of ports.
 	 * Keep ports sorted by if_index. It is handy, when configuration
 	 * is predictable and `ifconfig laggN create ...` command
 	 * will lead to the same result each time.
 	 */
 	CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
 		if (tlp->lp_ifp->if_index < ifp->if_index && (
 		    CK_SLIST_NEXT(tlp, lp_entries) == NULL ||
 		    ((struct  lagg_port*)CK_SLIST_NEXT(tlp, lp_entries))->lp_ifp->if_index >
 		    ifp->if_index))
 			break;
 	}
 	if (tlp != NULL)
 		CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries);
 	else
 		CK_SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
 	sc->sc_count++;
 
 	lagg_setmulti(lp);
 
 	if ((error = lagg_proto_addport(sc, lp)) != 0) {
 		/* Remove the port, without calling pr_delport. */
 		lagg_port_destroy(lp, 0);
 		if (oldmtu != -1)
 			(*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr);
 		return (error);
 	}
 
 	/* Update lagg capabilities */
 	lagg_capabilities(sc);
 	lagg_linkstate(sc);
 
 	return (0);
 }
 
 #ifdef LAGG_PORT_STACKING
 static int
 lagg_port_checkstacking(struct lagg_softc *sc)
 {
 	struct lagg_softc *sc_ptr;
 	struct lagg_port *lp;
 	int m = 0;
 
 	LAGG_SXLOCK_ASSERT(sc);
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (lp->lp_flags & LAGG_PORT_STACK) {
 			sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
 			m = MAX(m, lagg_port_checkstacking(sc_ptr));
 		}
 	}
 
 	return (m + 1);
 }
 #endif
 
 static void
 lagg_port_destroy_cb(epoch_context_t ec)
 {
 	struct lagg_port *lp;
 	struct ifnet *ifp;
 
 	lp = __containerof(ec, struct lagg_port, lp_epoch_ctx);
 	ifp = lp->lp_ifp;
 
 	if_rele(ifp);
 	free(lp, M_LAGG);
 }
 
 static int
 lagg_port_destroy(struct lagg_port *lp, int rundelport)
 {
 	struct lagg_softc *sc = lp->lp_softc;
 	struct lagg_port *lp_ptr, *lp0;
 	struct ifnet *ifp = lp->lp_ifp;
 	uint64_t *pval, vdiff;
 	int i;
 
 	LAGG_XLOCK_ASSERT(sc);
 
 	if (rundelport)
 		lagg_proto_delport(sc, lp);
 
 	if (lp->lp_detaching == 0)
 		lagg_clrmulti(lp);
 
 	/* Restore interface */
 	ifp->if_type = lp->lp_iftype;
 	ifp->if_ioctl = lp->lp_ioctl;
 	ifp->if_output = lp->lp_output;
 	ifp->if_lagg = NULL;
 
 	/* Update detached port counters */
 	pval = lp->port_counters.val;
 	for (i = 0; i < IFCOUNTERS; i++, pval++) {
 		vdiff = ifp->if_get_counter(ifp, i) - *pval;
 		sc->detached_counters.val[i] += vdiff;
 	}
 
 	/* Finally, remove the port from the lagg */
 	CK_SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
 	sc->sc_count--;
 
 	/* Update the primary interface */
 	if (lp == sc->sc_primary) {
-		uint8_t lladdr[ETHER_ADDR_LEN];
+		uint8_t lladdr[LAGG_ADDR_LEN];
 
 		if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL)
-			bzero(&lladdr, ETHER_ADDR_LEN);
+			bzero(&lladdr, LAGG_ADDR_LEN);
 		else
-			bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN);
+			bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN);
 		sc->sc_primary = lp0;
 		if (sc->sc_destroying == 0) {
-			bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+			bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen);
 			lagg_proto_lladdr(sc);
 			EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 		}
 
 		/*
 		 * Update lladdr for each port (new primary needs update
 		 * as well, to switch from old lladdr to its 'real' one)
 		 */
 		CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
-			if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN);
+			if_setlladdr(lp_ptr->lp_ifp, lladdr, lp_ptr->lp_ifp->if_addrlen);
 	}
 
 	if (lp->lp_ifflags)
 		if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
 
 	if (lp->lp_detaching == 0) {
 		lagg_setflags(lp, 0);
 		lagg_setcaps(lp, lp->lp_ifcapenable);
-		if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN);
+		if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen);
 	}
 
 	/*
 	 * free port and release it's ifnet reference after a grace period has
 	 * elapsed.
 	 */
 	NET_EPOCH_CALL(lagg_port_destroy_cb, &lp->lp_epoch_ctx);
 	/* Update lagg capabilities */
 	lagg_capabilities(sc);
 	lagg_linkstate(sc);
 
 	return (0);
 }
 
 static int
 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct lagg_reqport *rp = (struct lagg_reqport *)data;
 	struct lagg_softc *sc;
 	struct lagg_port *lp = NULL;
 	int error = 0;
 
 	/* Should be checked by the caller */
-	if (ifp->if_type != IFT_IEEE8023ADLAG ||
-	    (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+	switch (ifp->if_type) {
+	case IFT_IEEE8023ADLAG:
+	case IFT_INFINIBANDLAG:
+		if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+			goto fallback;
+		break;
+	default:
 		goto fallback;
+	}
 
 	switch (cmd) {
 	case SIOCGLAGGPORT:
 		if (rp->rp_portname[0] == '\0' ||
 		    ifunit(rp->rp_portname) != ifp) {
 			error = EINVAL;
 			break;
 		}
 
 		LAGG_RLOCK();
 		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
 			error = ENOENT;
 			LAGG_RUNLOCK();
 			break;
 		}
 
 		lagg_port2req(lp, rp);
 		LAGG_RUNLOCK();
 		break;
 
 	case SIOCSIFCAP:
 		if (lp->lp_ioctl == NULL) {
 			error = EINVAL;
 			break;
 		}
 		error = (*lp->lp_ioctl)(ifp, cmd, data);
 		if (error)
 			break;
 
 		/* Update lagg interface capabilities */
 		LAGG_XLOCK(sc);
 		lagg_capabilities(sc);
 		LAGG_XUNLOCK(sc);
 		VLAN_CAPABILITIES(sc->sc_ifp);
 		break;
 
 	case SIOCSIFMTU:
 		/* Do not allow the MTU to be changed once joined */
 		error = EINVAL;
 		break;
 
 	default:
 		goto fallback;
 	}
 
 	return (error);
 
 fallback:
 	if (lp != NULL && lp->lp_ioctl != NULL)
 		return ((*lp->lp_ioctl)(ifp, cmd, data));
 
 	return (EINVAL);
 }
 
 /*
  * Requests counter @cnt data. 
  *
  * Counter value is calculated the following way:
  * 1) for each port, sum  difference between current and "initial" measurements.
  * 2) add lagg logical interface counters.
  * 3) add data from detached_counters array.
  *
  * We also do the following things on ports attach/detach:
  * 1) On port attach we store all counters it has into port_counter array. 
  * 2) On port detach we add the different between "initial" and
  *   current counters data to detached_counters array.
  */
 static uint64_t
 lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
 {
 	struct lagg_softc *sc;
 	struct lagg_port *lp;
 	struct ifnet *lpifp;
 	uint64_t newval, oldval, vsum;
 
 	/* Revise this when we've got non-generic counters. */
 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
 
 	sc = (struct lagg_softc *)ifp->if_softc;
 
 	vsum = 0;
 	LAGG_RLOCK();
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		/* Saved attached value */
 		oldval = lp->port_counters.val[cnt];
 		/* current value */
 		lpifp = lp->lp_ifp;
 		newval = lpifp->if_get_counter(lpifp, cnt);
 		/* Calculate diff and save new */
 		vsum += newval - oldval;
 	}
 	LAGG_RUNLOCK();
 
 	/*
 	 * Add counter data which might be added by upper
 	 * layer protocols operating on logical interface.
 	 */
 	vsum += if_get_counter_default(ifp, cnt);
 
 	/*
 	 * Add counter data from detached ports counters
 	 */
 	vsum += sc->detached_counters.val[cnt];
 
 	return (vsum);
 }
 
 /*
  * For direct output to child ports.
  */
 static int
 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	struct lagg_port *lp = ifp->if_lagg;
 
 	switch (dst->sa_family) {
 		case pseudo_AF_HDRCMPLT:
 		case AF_UNSPEC:
 			return ((*lp->lp_output)(ifp, m, dst, ro));
 	}
 
 	/* drop any other frames */
 	m_freem(m);
 	return (ENETDOWN);
 }
 
 static void
 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
 	struct lagg_port *lp;
 	struct lagg_softc *sc;
 
 	if ((lp = ifp->if_lagg) == NULL)
 		return;
 	/* If the ifnet is just being renamed, don't do anything. */
 	if (ifp->if_flags & IFF_RENAMING)
 		return;
 
 	sc = lp->lp_softc;
 
 	LAGG_XLOCK(sc);
 	lp->lp_detaching = 1;
 	lagg_port_destroy(lp, 1);
 	LAGG_XUNLOCK(sc);
 	VLAN_CAPABILITIES(sc->sc_ifp);
 }
 
 static void
 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
 {
 	struct lagg_softc *sc = lp->lp_softc;
 
 	strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
 	strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
 	rp->rp_prio = lp->lp_prio;
 	rp->rp_flags = lp->lp_flags;
 	lagg_proto_portreq(sc, lp, &rp->rp_psc);
 
 	/* Add protocol specific flags */
 	switch (sc->sc_proto) {
 		case LAGG_PROTO_FAILOVER:
 			if (lp == sc->sc_primary)
 				rp->rp_flags |= LAGG_PORT_MASTER;
 			if (lp == lagg_link_active(sc, sc->sc_primary))
 				rp->rp_flags |= LAGG_PORT_ACTIVE;
 			break;
 
 		case LAGG_PROTO_ROUNDROBIN:
 		case LAGG_PROTO_LOADBALANCE:
 		case LAGG_PROTO_BROADCAST:
 			if (LAGG_PORTACTIVE(lp))
 				rp->rp_flags |= LAGG_PORT_ACTIVE;
 			break;
 
 		case LAGG_PROTO_LACP:
 			/* LACP has a different definition of active */
 			if (lacp_isactive(lp))
 				rp->rp_flags |= LAGG_PORT_ACTIVE;
 			if (lacp_iscollecting(lp))
 				rp->rp_flags |= LAGG_PORT_COLLECTING;
 			if (lacp_isdistributing(lp))
 				rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
 			break;
 	}
 
 }
 
 static void
+lagg_watchdog_infiniband(void *arg)
+{
+	struct lagg_softc *sc;
+	struct lagg_port *lp;
+	struct ifnet *ifp;
+	struct ifnet *lp_ifp;
+
+	sc = arg;
+
+	/*
+	 * Because infiniband nodes have a fixed MAC address, which is
+	 * generated by the so-called GID, we need to regularly update
+	 * the link level address of the parent lagg<N> device when
+	 * the active port changes. Possibly we could piggy-back on
+	 * link up/down events aswell, but using a timer also provides
+	 * a guarantee against too frequent events. This operation
+	 * does not have to be atomic.
+	 */
+	LAGG_RLOCK();
+	lp = lagg_link_active(sc, sc->sc_primary);
+	if (lp != NULL) {
+		ifp = sc->sc_ifp;
+		lp_ifp = lp->lp_ifp;
+
+		if (ifp != NULL && lp_ifp != NULL &&
+		    memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0) {
+			memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen);
+			CURVNET_SET(ifp->if_vnet);
+			EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+			CURVNET_RESTORE();
+		}
+	}
+	LAGG_RUNLOCK();
+
+	callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg);
+}
+
+static void
 lagg_init(void *xsc)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)xsc;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct lagg_port *lp;
 
 	LAGG_XLOCK(sc);
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		LAGG_XUNLOCK(sc);
 		return;
 	}
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
 	/*
 	 * Update the port lladdrs if needed.
 	 * This might be if_setlladdr() notification
 	 * that lladdr has been changed.
 	 */
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
-		    ETHER_ADDR_LEN) != 0)
-			if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+		    ifp->if_addrlen) != 0)
+			if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen);
 	}
 
 	lagg_proto_init(sc);
 
+	if (ifp->if_type == IFT_INFINIBAND) {
+		mtx_lock(&sc->sc_mtx);
+		lagg_watchdog_infiniband(sc);
+		mtx_unlock(&sc->sc_mtx);
+	}
+
 	LAGG_XUNLOCK(sc);
 }
 
 static void
 lagg_stop(struct lagg_softc *sc)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	LAGG_XLOCK_ASSERT(sc);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	lagg_proto_stop(sc);
+
+	mtx_lock(&sc->sc_mtx);
+	callout_stop(&sc->sc_watchdog);
+	mtx_unlock(&sc->sc_mtx);
+
+	callout_drain(&sc->sc_watchdog);
 }
 
 static int
 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_reqall *ra = (struct lagg_reqall *)data;
 	struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
 	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
 	struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct lagg_port *lp;
 	struct ifnet *tpif;
 	struct thread *td = curthread;
 	char *buf, *outbuf;
 	int count, buflen, len, error = 0, oldmtu;
 
 	bzero(&rpbuf, sizeof(rpbuf));
 
 	/* XXX: This can race with lagg_clone_destroy. */
 
 	switch (cmd) {
 	case SIOCGLAGG:
 		LAGG_XLOCK(sc);
 		buflen = sc->sc_count * sizeof(struct lagg_reqport);
 		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 		ra->ra_proto = sc->sc_proto;
 		lagg_proto_request(sc, &ra->ra_psc);
 		count = 0;
 		buf = outbuf;
 		len = min(ra->ra_size, buflen);
 		CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 			if (len < sizeof(rpbuf))
 				break;
 
 			lagg_port2req(lp, &rpbuf);
 			memcpy(buf, &rpbuf, sizeof(rpbuf));
 			count++;
 			buf += sizeof(rpbuf);
 			len -= sizeof(rpbuf);
 		}
 		LAGG_XUNLOCK(sc);
 		ra->ra_ports = count;
 		ra->ra_size = count * sizeof(rpbuf);
 		error = copyout(outbuf, ra->ra_port, ra->ra_size);
 		free(outbuf, M_TEMP);
 		break;
 	case SIOCSLAGG:
 		error = priv_check(td, PRIV_NET_LAGG);
 		if (error)
 			break;
 		if (ra->ra_proto >= LAGG_PROTO_MAX) {
 			error = EPROTONOSUPPORT;
 			break;
 		}
-
+		/* Infiniband only supports the failover protocol. */
+		if (ra->ra_proto != LAGG_PROTO_FAILOVER &&
+		    ifp->if_type == IFT_INFINIBAND) {
+			error = EPROTONOSUPPORT;
+			break;
+		}
 		LAGG_XLOCK(sc);
 		lagg_proto_detach(sc);
 		LAGG_UNLOCK_ASSERT();
 		lagg_proto_attach(sc, ra->ra_proto);
 		LAGG_XUNLOCK(sc);
 		break;
 	case SIOCGLAGGOPTS:
 		LAGG_XLOCK(sc);
 		ro->ro_opts = sc->sc_opts;
 		if (sc->sc_proto == LAGG_PROTO_LACP) {
 			struct lacp_softc *lsc;
 
 			lsc = (struct lacp_softc *)sc->sc_psc;
 			if (lsc->lsc_debug.lsc_tx_test != 0)
 				ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
 			if (lsc->lsc_debug.lsc_rx_test != 0)
 				ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
 			if (lsc->lsc_strict_mode != 0)
 				ro->ro_opts |= LAGG_OPT_LACP_STRICT;
 			if (lsc->lsc_fast_timeout != 0)
 				ro->ro_opts |= LAGG_OPT_LACP_FAST_TIMO;
 
 			ro->ro_active = sc->sc_active;
 		} else {
 			ro->ro_active = 0;
 			CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 				ro->ro_active += LAGG_PORTACTIVE(lp);
 		}
 		ro->ro_bkt = sc->sc_stride;
 		ro->ro_flapping = sc->sc_flapping;
 		ro->ro_flowid_shift = sc->flowid_shift;
 		LAGG_XUNLOCK(sc);
 		break;
 	case SIOCSLAGGOPTS:
 		error = priv_check(td, PRIV_NET_LAGG);
 		if (error)
 			break;
 
 		/*
 		 * The stride option was added without defining a corresponding
 		 * LAGG_OPT flag, so handle a non-zero value before checking
 		 * anything else to preserve compatibility.
 		 */
 		LAGG_XLOCK(sc);
 		if (ro->ro_opts == 0 && ro->ro_bkt != 0) {
 			if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN) {
 				LAGG_XUNLOCK(sc);
 				error = EINVAL;
 				break;
 			}
 			sc->sc_stride = ro->ro_bkt;
 		}
 		if (ro->ro_opts == 0) {
 			LAGG_XUNLOCK(sc);
 			break;
 		}
 
 		/*
 		 * Set options.  LACP options are stored in sc->sc_psc,
 		 * not in sc_opts.
 		 */
 		int valid, lacp;
 
 		switch (ro->ro_opts) {
 		case LAGG_OPT_USE_FLOWID:
 		case -LAGG_OPT_USE_FLOWID:
 		case LAGG_OPT_USE_NUMA:
 		case -LAGG_OPT_USE_NUMA:
 		case LAGG_OPT_FLOWIDSHIFT:
 		case LAGG_OPT_RR_LIMIT:
 			valid = 1;
 			lacp = 0;
 			break;
 		case LAGG_OPT_LACP_TXTEST:
 		case -LAGG_OPT_LACP_TXTEST:
 		case LAGG_OPT_LACP_RXTEST:
 		case -LAGG_OPT_LACP_RXTEST:
 		case LAGG_OPT_LACP_STRICT:
 		case -LAGG_OPT_LACP_STRICT:
 		case LAGG_OPT_LACP_FAST_TIMO:
 		case -LAGG_OPT_LACP_FAST_TIMO:
 			valid = lacp = 1;
 			break;
 		default:
 			valid = lacp = 0;
 			break;
 		}
 
 		if (valid == 0 ||
 		    (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
 			/* Invalid combination of options specified. */
 			error = EINVAL;
 			LAGG_XUNLOCK(sc);
 			break;	/* Return from SIOCSLAGGOPTS. */ 
 		}
 
 		/*
 		 * Store new options into sc->sc_opts except for
 		 * FLOWIDSHIFT, RR and LACP options.
 		 */
 		if (lacp == 0) {
 			if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
 				sc->flowid_shift = ro->ro_flowid_shift;
 			else if (ro->ro_opts == LAGG_OPT_RR_LIMIT) {
 				if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN ||
 				    ro->ro_bkt == 0) {
 					error = EINVAL;
 					LAGG_XUNLOCK(sc);
 					break;
 				}
 				sc->sc_stride = ro->ro_bkt;
 			} else if (ro->ro_opts > 0)
 				sc->sc_opts |= ro->ro_opts;
 			else
 				sc->sc_opts &= ~ro->ro_opts;
 		} else {
 			struct lacp_softc *lsc;
 			struct lacp_port *lp;
 
 			lsc = (struct lacp_softc *)sc->sc_psc;
 
 			switch (ro->ro_opts) {
 			case LAGG_OPT_LACP_TXTEST:
 				lsc->lsc_debug.lsc_tx_test = 1;
 				break;
 			case -LAGG_OPT_LACP_TXTEST:
 				lsc->lsc_debug.lsc_tx_test = 0;
 				break;
 			case LAGG_OPT_LACP_RXTEST:
 				lsc->lsc_debug.lsc_rx_test = 1;
 				break;
 			case -LAGG_OPT_LACP_RXTEST:
 				lsc->lsc_debug.lsc_rx_test = 0;
 				break;
 			case LAGG_OPT_LACP_STRICT:
 				lsc->lsc_strict_mode = 1;
 				break;
 			case -LAGG_OPT_LACP_STRICT:
 				lsc->lsc_strict_mode = 0;
 				break;
 			case LAGG_OPT_LACP_FAST_TIMO:
 				LACP_LOCK(lsc);
         			LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
                         		lp->lp_state |= LACP_STATE_TIMEOUT;
 				LACP_UNLOCK(lsc);
 				lsc->lsc_fast_timeout = 1;
 				break;
 			case -LAGG_OPT_LACP_FAST_TIMO:
 				LACP_LOCK(lsc);
         			LIST_FOREACH(lp, &lsc->lsc_ports, lp_next)
                         		lp->lp_state &= ~LACP_STATE_TIMEOUT;
 				LACP_UNLOCK(lsc);
 				lsc->lsc_fast_timeout = 0;
 				break;
 			}
 		}
 		LAGG_XUNLOCK(sc);
 		break;
 	case SIOCGLAGGFLAGS:
 		rf->rf_flags = 0;
 		LAGG_XLOCK(sc);
 		if (sc->sc_flags & MBUF_HASHFLAG_L2)
 			rf->rf_flags |= LAGG_F_HASHL2;
 		if (sc->sc_flags & MBUF_HASHFLAG_L3)
 			rf->rf_flags |= LAGG_F_HASHL3;
 		if (sc->sc_flags & MBUF_HASHFLAG_L4)
 			rf->rf_flags |= LAGG_F_HASHL4;
 		LAGG_XUNLOCK(sc);
 		break;
 	case SIOCSLAGGHASH:
 		error = priv_check(td, PRIV_NET_LAGG);
 		if (error)
 			break;
 		if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
 			error = EINVAL;
 			break;
 		}
 		LAGG_XLOCK(sc);
 		sc->sc_flags = 0;
 		if (rf->rf_flags & LAGG_F_HASHL2)
 			sc->sc_flags |= MBUF_HASHFLAG_L2;
 		if (rf->rf_flags & LAGG_F_HASHL3)
 			sc->sc_flags |= MBUF_HASHFLAG_L3;
 		if (rf->rf_flags & LAGG_F_HASHL4)
 			sc->sc_flags |= MBUF_HASHFLAG_L4;
 		LAGG_XUNLOCK(sc);
 		break;
 	case SIOCGLAGGPORT:
 		if (rp->rp_portname[0] == '\0' ||
 		    (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 			error = EINVAL;
 			break;
 		}
 
 		LAGG_RLOCK();
 		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 		    lp->lp_softc != sc) {
 			error = ENOENT;
 			LAGG_RUNLOCK();
 			if_rele(tpif);
 			break;
 		}
 
 		lagg_port2req(lp, rp);
 		LAGG_RUNLOCK();
 		if_rele(tpif);
 		break;
 	case SIOCSLAGGPORT:
 		error = priv_check(td, PRIV_NET_LAGG);
 		if (error)
 			break;
 		if (rp->rp_portname[0] == '\0' ||
 		    (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 			error = EINVAL;
 			break;
 		}
 #ifdef INET6
 		/*
 		 * A laggport interface should not have inet6 address
 		 * because two interfaces with a valid link-local
 		 * scope zone must not be merged in any form.  This
 		 * restriction is needed to prevent violation of
 		 * link-local scope zone.  Attempts to add a laggport
 		 * interface which has inet6 addresses triggers
 		 * removal of all inet6 addresses on the member
 		 * interface.
 		 */
 		if (in6ifa_llaonifp(tpif)) {
 			in6_ifdetach(tpif);
 				if_printf(sc->sc_ifp,
 				    "IPv6 addresses on %s have been removed "
 				    "before adding it as a member to prevent "
 				    "IPv6 address scope violation.\n",
 				    tpif->if_xname);
 		}
 #endif
 		oldmtu = ifp->if_mtu;
 		LAGG_XLOCK(sc);
 		error = lagg_port_create(sc, tpif);
 		LAGG_XUNLOCK(sc);
 		if_rele(tpif);
 
 		/*
 		 * LAGG MTU may change during addition of the first port.
 		 * If it did, do network layer specific procedure.
 		 */
 		if (ifp->if_mtu != oldmtu) {
 #ifdef INET6
 			nd6_setmtu(ifp);
 #endif
 			rt_updatemtu(ifp);
 		}
 
 		VLAN_CAPABILITIES(ifp);
 		break;
 	case SIOCSLAGGDELPORT:
 		error = priv_check(td, PRIV_NET_LAGG);
 		if (error)
 			break;
 		if (rp->rp_portname[0] == '\0' ||
 		    (tpif = ifunit_ref(rp->rp_portname)) == NULL) {
 			error = EINVAL;
 			break;
 		}
 
 		LAGG_XLOCK(sc);
 		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
 		    lp->lp_softc != sc) {
 			error = ENOENT;
 			LAGG_XUNLOCK(sc);
 			if_rele(tpif);
 			break;
 		}
 
 		error = lagg_port_destroy(lp, 1);
 		LAGG_XUNLOCK(sc);
 		if_rele(tpif);
 		VLAN_CAPABILITIES(ifp);
 		break;
 	case SIOCSIFFLAGS:
 		/* Set flags on ports too */
 		LAGG_XLOCK(sc);
 		CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 			lagg_setflags(lp, 1);
 		}
 
 		if (!(ifp->if_flags & IFF_UP) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			/*
 			 * If interface is marked down and it is running,
 			 * then stop and disable it.
 			 */
 			lagg_stop(sc);
 			LAGG_XUNLOCK(sc);
 		} else if ((ifp->if_flags & IFF_UP) &&
 		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			/*
 			 * If interface is marked up and it is stopped, then
 			 * start it.
 			 */
 			LAGG_XUNLOCK(sc);
 			(*ifp->if_init)(sc);
 		} else
 			LAGG_XUNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		LAGG_XLOCK(sc);
 		CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 			lagg_clrmulti(lp);
 			lagg_setmulti(lp);
 		}
 		LAGG_XUNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
-		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+		if (ifp->if_type == IFT_INFINIBAND)
+			error = EINVAL;
+		else
+			error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 		break;
 
 	case SIOCSIFCAP:
 		LAGG_XLOCK(sc);
 		CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 			if (lp->lp_ioctl != NULL)
 				(*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 		}
 		lagg_capabilities(sc);
 		LAGG_XUNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 		error = 0;
 		break;
 
 	case SIOCSIFMTU:
 		LAGG_XLOCK(sc);
 		CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 			if (lp->lp_ioctl != NULL)
 				error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 			else
 				error = EINVAL;
 			if (error != 0) {
 				if_printf(ifp,
 				    "failed to change MTU to %d on port %s, "
 				    "reverting all ports to original MTU (%d)\n",
 				    ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu);
 				break;
 			}
 		}
 		if (error == 0) {
 			ifp->if_mtu = ifr->ifr_mtu;
 		} else {
 			/* set every port back to the original MTU */
 			ifr->ifr_mtu = ifp->if_mtu;
 			CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 				if (lp->lp_ioctl != NULL)
 					(*lp->lp_ioctl)(lp->lp_ifp, cmd, data);
 			}
 		}
 		LAGG_XUNLOCK(sc);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 	return (error);
 }
 
 #if defined(KERN_TLS) || defined(RATELIMIT)
 static inline struct lagg_snd_tag *
 mst_to_lst(struct m_snd_tag *mst)
 {
 
 	return (__containerof(mst, struct lagg_snd_tag, com));
 }
 
 /*
  * Look up the port used by a specific flow.  This only works for lagg
  * protocols with deterministic port mappings (e.g. not roundrobin).
  * In addition protocols which use a hash to map flows to ports must
  * be configured to use the mbuf flowid rather than hashing packet
  * contents.
  */
 static struct lagg_port *
 lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype,
     uint8_t numa_domain)
 {
 	struct lagg_softc *sc;
 	struct lagg_port *lp;
 	struct lagg_lb *lb;
 	uint32_t hash, p;
 
 	sc = ifp->if_softc;
 
 	switch (sc->sc_proto) {
 	case LAGG_PROTO_FAILOVER:
 		return (lagg_link_active(sc, sc->sc_primary));
 	case LAGG_PROTO_LOADBALANCE:
 		if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
 		    flowtype == M_HASHTYPE_NONE)
 			return (NULL);
 		p = flowid >> sc->flowid_shift;
 		p %= sc->sc_count;
 		lb = (struct lagg_lb *)sc->sc_psc;
 		lp = lb->lb_ports[p];
 		return (lagg_link_active(sc, lp));
 	case LAGG_PROTO_LACP:
 		if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
 		    flowtype == M_HASHTYPE_NONE)
 			return (NULL);
 		hash = flowid >> sc->flowid_shift;
 		return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
 	default:
 		return (NULL);
 	}
 }
 
 static int
 lagg_snd_tag_alloc(struct ifnet *ifp,
     union if_snd_tag_alloc_params *params,
     struct m_snd_tag **ppmt)
 {
 	struct lagg_snd_tag *lst;
 	struct lagg_softc *sc;
 	struct lagg_port *lp;
 	struct ifnet *lp_ifp;
 	int error;
 
 	sc = ifp->if_softc;
 
 	LAGG_RLOCK();
 	lp = lookup_snd_tag_port(ifp, params->hdr.flowid,
 	    params->hdr.flowtype, params->hdr.numa_domain);
 	if (lp == NULL) {
 		LAGG_RUNLOCK();
 		return (EOPNOTSUPP);
 	}
 	if (lp->lp_ifp == NULL || lp->lp_ifp->if_snd_tag_alloc == NULL) {
 		LAGG_RUNLOCK();
 		return (EOPNOTSUPP);
 	}
 	lp_ifp = lp->lp_ifp;
 	if_ref(lp_ifp);
 	LAGG_RUNLOCK();
 
 	lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
 	if (lst == NULL) {
 		if_rele(lp_ifp);
 		return (ENOMEM);
 	}
 
 	error = lp_ifp->if_snd_tag_alloc(lp_ifp, params, &lst->tag);
 	if_rele(lp_ifp);
 	if (error) {
 		free(lst, M_LAGG);
 		return (error);
 	}
 
 	m_snd_tag_init(&lst->com, ifp, lst->tag->type);
 
 	*ppmt = &lst->com;
 	return (0);
 }
 
 static int
 lagg_snd_tag_modify(struct m_snd_tag *mst,
     union if_snd_tag_modify_params *params)
 {
 	struct lagg_snd_tag *lst;
 
 	lst = mst_to_lst(mst);
 	return (lst->tag->ifp->if_snd_tag_modify(lst->tag, params));
 }
 
 static int
 lagg_snd_tag_query(struct m_snd_tag *mst,
     union if_snd_tag_query_params *params)
 {
 	struct lagg_snd_tag *lst;
 
 	lst = mst_to_lst(mst);
 	return (lst->tag->ifp->if_snd_tag_query(lst->tag, params));
 }
 
 static void
 lagg_snd_tag_free(struct m_snd_tag *mst)
 {
 	struct lagg_snd_tag *lst;
 
 	lst = mst_to_lst(mst);
 	m_snd_tag_rele(lst->tag);
 	free(lst, M_LAGG);
 }
 
 static void
 lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
 {
 	/*
 	 * For lagg, we have an indirect
 	 * interface. The caller needs to
 	 * get a ratelimit tag on the actual
 	 * interface the flow will go on.
 	 */
 	q->rate_table = NULL;
 	q->flags = RT_IS_INDIRECT;
 	q->max_flows = 0;
 	q->number_of_rates = 0;
 }
 #endif
 
 static int
 lagg_setmulti(struct lagg_port *lp)
 {
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *ifp = lp->lp_ifp;
 	struct ifnet *scifp = sc->sc_ifp;
 	struct lagg_mc *mc;
 	struct ifmultiaddr *ifma;
 	int error;
 
 	IF_ADDR_WLOCK(scifp);
 	CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT);
 		if (mc == NULL) {
 			IF_ADDR_WUNLOCK(scifp);
 			return (ENOMEM);
 		}
 		bcopy(ifma->ifma_addr, &mc->mc_addr,
 		    ifma->ifma_addr->sa_len);
 		mc->mc_addr.sdl_index = ifp->if_index;
 		mc->mc_ifma = NULL;
 		SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
 	}
 	IF_ADDR_WUNLOCK(scifp);
 	SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
 		error = if_addmulti(ifp,
 		    (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
 		if (error)
 			return (error);
 	}
 	return (0);
 }
 
 static int
 lagg_clrmulti(struct lagg_port *lp)
 {
 	struct lagg_mc *mc;
 
 	LAGG_XLOCK_ASSERT(lp->lp_softc);
 	while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
 		SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
 		if (mc->mc_ifma && lp->lp_detaching == 0)
 			if_delmulti_ifma(mc->mc_ifma);
 		free(mc, M_LAGG);
 	}
 	return (0);
 }
 
 static int
 lagg_setcaps(struct lagg_port *lp, int cap)
 {
 	struct ifreq ifr;
 
 	if (lp->lp_ifp->if_capenable == cap)
 		return (0);
 	if (lp->lp_ioctl == NULL)
 		return (ENXIO);
 	ifr.ifr_reqcap = cap;
 	return ((*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr));
 }
 
 /* Handle a ref counted flag that should be set on the lagg port as well */
 static int
 lagg_setflag(struct lagg_port *lp, int flag, int status,
     int (*func)(struct ifnet *, int))
 {
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *scifp = sc->sc_ifp;
 	struct ifnet *ifp = lp->lp_ifp;
 	int error;
 
 	LAGG_XLOCK_ASSERT(sc);
 
 	status = status ? (scifp->if_flags & flag) : 0;
 	/* Now "status" contains the flag value or 0 */
 
 	/*
 	 * See if recorded ports status is different from what
 	 * we want it to be.  If it is, flip it.  We record ports
 	 * status in lp_ifflags so that we won't clear ports flag
 	 * we haven't set.  In fact, we don't clear or set ports
 	 * flags directly, but get or release references to them.
 	 * That's why we can be sure that recorded flags still are
 	 * in accord with actual ports flags.
 	 */
 	if (status != (lp->lp_ifflags & flag)) {
 		error = (*func)(ifp, status);
 		if (error)
 			return (error);
 		lp->lp_ifflags &= ~flag;
 		lp->lp_ifflags |= status;
 	}
 	return (0);
 }
 
 /*
  * Handle IFF_* flags that require certain changes on the lagg port
  * if "status" is true, update ports flags respective to the lagg
  * if "status" is false, forcedly clear the flags set on port.
  */
 static int
 lagg_setflags(struct lagg_port *lp, int status)
 {
 	int error, i;
 
 	for (i = 0; lagg_pflags[i].flag; i++) {
 		error = lagg_setflag(lp, lagg_pflags[i].flag,
 		    status, lagg_pflags[i].func);
 		if (error)
 			return (error);
 	}
 	return (0);
 }
 
 static int
-lagg_transmit(struct ifnet *ifp, struct mbuf *m)
+lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	int error;
 
 #if defined(KERN_TLS) || defined(RATELIMIT)
 	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
 		MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
 #endif
 	LAGG_RLOCK();
 	/* We need a Tx algorithm and at least one port */
 	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
 		LAGG_RUNLOCK();
 		m_freem(m);
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 		return (ENXIO);
 	}
 
 	ETHER_BPF_MTAP(ifp, m);
 
 	error = lagg_proto_start(sc, m);
 	LAGG_RUNLOCK();
 	return (error);
 }
 
+static int
+lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+	int error;
+
+#if defined(KERN_TLS) || defined(RATELIMIT)
+	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+		MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
+	LAGG_RLOCK();
+	/* We need a Tx algorithm and at least one port */
+	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
+		LAGG_RUNLOCK();
+		m_freem(m);
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+		return (ENXIO);
+	}
+
+	INFINIBAND_BPF_MTAP(ifp, m);
+
+	error = lagg_proto_start(sc, m);
+	LAGG_RUNLOCK();
+	return (error);
+}
+
 /*
  * The ifp->if_qflush entry point for lagg(4) is no-op.
  */
 static void
 lagg_qflush(struct ifnet *ifp __unused)
 {
 }
 
 static struct mbuf *
-lagg_input(struct ifnet *ifp, struct mbuf *m)
+lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m)
 {
 	struct lagg_port *lp = ifp->if_lagg;
 	struct lagg_softc *sc = lp->lp_softc;
 	struct ifnet *scifp = sc->sc_ifp;
 
 	LAGG_RLOCK();
 	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 	    lp->lp_detaching != 0 ||
 	    sc->sc_proto == LAGG_PROTO_NONE) {
 		LAGG_RUNLOCK();
 		m_freem(m);
 		return (NULL);
 	}
 
 	ETHER_BPF_MTAP(scifp, m);
+
+	m = lagg_proto_input(sc, lp, m);
+	if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
+		m_freem(m);
+		m = NULL;
+	}
+
+	LAGG_RUNLOCK();
+	return (m);
+}
+
+static struct mbuf *
+lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+	struct lagg_port *lp = ifp->if_lagg;
+	struct lagg_softc *sc = lp->lp_softc;
+	struct ifnet *scifp = sc->sc_ifp;
+
+	LAGG_RLOCK();
+	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    lp->lp_detaching != 0 ||
+	    sc->sc_proto == LAGG_PROTO_NONE) {
+		LAGG_RUNLOCK();
+		m_freem(m);
+		return (NULL);
+	}
+
+	INFINIBAND_BPF_MTAP(scifp, m);
 
 	m = lagg_proto_input(sc, lp, m);
 	if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
 		m_freem(m);
 		m = NULL;
 	}
 
 	LAGG_RUNLOCK();
 	return (m);
 }
 
 static int
 lagg_media_change(struct ifnet *ifp)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 
 	if (sc->sc_ifflags & IFF_DEBUG)
 		printf("%s\n", __func__);
 
 	/* Ignore */
 	return (0);
 }
 
 static void
 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	struct lagg_port *lp;
 
 	imr->ifm_status = IFM_AVALID;
 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
 
 	LAGG_RLOCK();
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (LAGG_PORTACTIVE(lp))
 			imr->ifm_status |= IFM_ACTIVE;
 	}
 	LAGG_RUNLOCK();
 }
 
 static void
 lagg_linkstate(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
 	int new_link = LINK_STATE_DOWN;
 	uint64_t speed;
 
 	LAGG_XLOCK_ASSERT(sc);
 
 	/* LACP handles link state itself */
 	if (sc->sc_proto == LAGG_PROTO_LACP)
 		return;
 
 	/* Our link is considered up if at least one of our ports is active */
 	LAGG_RLOCK();
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
 			new_link = LINK_STATE_UP;
 			break;
 		}
 	}
 	LAGG_RUNLOCK();
 	if_link_state_change(sc->sc_ifp, new_link);
 
 	/* Update if_baudrate to reflect the max possible speed */
 	switch (sc->sc_proto) {
 		case LAGG_PROTO_FAILOVER:
 			sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
 			    sc->sc_primary->lp_ifp->if_baudrate : 0;
 			break;
 		case LAGG_PROTO_ROUNDROBIN:
 		case LAGG_PROTO_LOADBALANCE:
 		case LAGG_PROTO_BROADCAST:
 			speed = 0;
 			LAGG_RLOCK();
 			CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 				speed += lp->lp_ifp->if_baudrate;
 			LAGG_RUNLOCK();
 			sc->sc_ifp->if_baudrate = speed;
 			break;
 		case LAGG_PROTO_LACP:
 			/* LACP updates if_baudrate itself */
 			break;
 	}
 }
 
 static void
 lagg_port_state(struct ifnet *ifp, int state)
 {
 	struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
 	struct lagg_softc *sc = NULL;
 
 	if (lp != NULL)
 		sc = lp->lp_softc;
 	if (sc == NULL)
 		return;
 
 	LAGG_XLOCK(sc);
 	lagg_linkstate(sc);
 	lagg_proto_linkstate(sc, lp);
 	LAGG_XUNLOCK(sc);
 }
 
 struct lagg_port *
 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
 {
 	struct lagg_port *lp_next, *rval = NULL;
 
 	/*
 	 * Search a port which reports an active link state.
 	 */
 
 #ifdef INVARIANTS
 	/*
 	 * This is called with either LAGG_RLOCK() held or
 	 * LAGG_XLOCK(sc) held.
 	 */
 	if (!in_epoch(net_epoch_preempt))
 		LAGG_XLOCK_ASSERT(sc);
 #endif
 
 	if (lp == NULL)
 		goto search;
 	if (LAGG_PORTACTIVE(lp)) {
 		rval = lp;
 		goto found;
 	}
 	if ((lp_next = CK_SLIST_NEXT(lp, lp_entries)) != NULL &&
 	    LAGG_PORTACTIVE(lp_next)) {
 		rval = lp_next;
 		goto found;
 	}
 
 search:
 	CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 		if (LAGG_PORTACTIVE(lp_next)) {
 			return (lp_next);
 		}
 	}
 found:
 	return (rval);
 }
 
 int
 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
 {
 
 #if defined(KERN_TLS) || defined(RATELIMIT)
 	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
 		struct lagg_snd_tag *lst;
 		struct m_snd_tag *mst;
 
 		mst = m->m_pkthdr.snd_tag;
 		lst = mst_to_lst(mst);
 		if (lst->tag->ifp != ifp) {
 			m_freem(m);
 			return (EAGAIN);
 		}
 		m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
 		m_snd_tag_rele(mst);
 	}
 #endif
 	return (ifp->if_transmit)(ifp, m);
 }
 
 /*
  * Simple round robin aggregation
  */
 static void
 lagg_rr_attach(struct lagg_softc *sc)
 {
 	sc->sc_seq = 0;
 	sc->sc_stride = 1;
 }
 
 static int
 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
 {
 	struct lagg_port *lp;
 	uint32_t p;
 
 	p = atomic_fetchadd_32(&sc->sc_seq, 1);
 	p /= sc->sc_stride;
 	p %= sc->sc_count;
 	lp = CK_SLIST_FIRST(&sc->sc_ports);
 
 	while (p--)
 		lp = CK_SLIST_NEXT(lp, lp_entries);
 
 	/*
 	 * Check the port's link state. This will return the next active
 	 * port if the link is down or the port is NULL.
 	 */
 	if ((lp = lagg_link_active(sc, lp)) == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	/* Send mbuf */
 	return (lagg_enqueue(lp->lp_ifp, m));
 }
 
 static struct mbuf *
 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	/* Just pass in the packet to our lagg device */
 	m->m_pkthdr.rcvif = ifp;
 
 	return (m);
 }
 
 /*
  * Broadcast mode
  */
 static int
 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
 {
 	int active_ports = 0;
 	int errors = 0;
 	int ret;
 	struct lagg_port *lp, *last = NULL;
 	struct mbuf *m0;
 
 	LAGG_RLOCK_ASSERT();
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (!LAGG_PORTACTIVE(lp))
 			continue;
 
 		active_ports++;
 
 		if (last != NULL) {
 			m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 			if (m0 == NULL) {
 				ret = ENOBUFS;
 				errors++;
 				break;
 			}
 			lagg_enqueue(last->lp_ifp, m0);
 		}
 		last = lp;
 	}
 
 	if (last == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		m_freem(m);
 		return (ENOENT);
 	}
 	if ((last = lagg_link_active(sc, last)) == NULL) {
 		errors++;
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors);
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	ret = lagg_enqueue(last->lp_ifp, m);
 	if (errors != 0)
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors);
 
 	return (ret);
 }
 
 static struct mbuf*
 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	/* Just pass in the packet to our lagg device */
 	m->m_pkthdr.rcvif = ifp;
 	return (m);
 }
 
 /*
  * Active failover
  */
 static int
 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
 {
 	struct lagg_port *lp;
 
 	/* Use the master port if active or the next available port */
 	if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	/* Send mbuf */
 	return (lagg_enqueue(lp->lp_ifp, m));
 }
 
 static struct mbuf *
 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct lagg_port *tmp_tp;
 
 	if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
 		m->m_pkthdr.rcvif = ifp;
 		return (m);
 	}
 
 	if (!LAGG_PORTACTIVE(sc->sc_primary)) {
 		tmp_tp = lagg_link_active(sc, sc->sc_primary);
 		/*
 		 * If tmp_tp is null, we've received a packet when all
 		 * our links are down. Weird, but process it anyways.
 		 */
 		if ((tmp_tp == NULL || tmp_tp == lp)) {
 			m->m_pkthdr.rcvif = ifp;
 			return (m);
 		}
 	}
 
 	m_freem(m);
 	return (NULL);
 }
 
 /*
  * Loadbalancing
  */
 static void
 lagg_lb_attach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
 	struct lagg_lb *lb;
 
 	LAGG_XLOCK_ASSERT(sc);
 	lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
 	lb->lb_key = m_ether_tcpip_hash_init();
 	sc->sc_psc = lb;
 
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lagg_lb_port_create(lp);
 }
 
 static void
 lagg_lb_detach(struct lagg_softc *sc)
 {
 	struct lagg_lb *lb;
 
 	lb = (struct lagg_lb *)sc->sc_psc;
 	if (lb != NULL)
 		free(lb, M_LAGG);
 }
 
 static int
 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
 {
 	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 	struct lagg_port *lp_next;
 	int i = 0, rv;
 
 	rv = 0;
 	bzero(&lb->lb_ports, sizeof(lb->lb_ports));
 	LAGG_XLOCK_ASSERT(sc);
 	CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
 		if (lp_next == lp)
 			continue;
 		if (i >= LAGG_MAX_PORTS) {
 			rv = EINVAL;
 			break;
 		}
 		if (sc->sc_ifflags & IFF_DEBUG)
 			printf("%s: port %s at index %d\n",
 			    sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
 		lb->lb_ports[i++] = lp_next;
 	}
 
 	return (rv);
 }
 
 static int
 lagg_lb_port_create(struct lagg_port *lp)
 {
 	struct lagg_softc *sc = lp->lp_softc;
 	return (lagg_lb_porttable(sc, NULL));
 }
 
 static void
 lagg_lb_port_destroy(struct lagg_port *lp)
 {
 	struct lagg_softc *sc = lp->lp_softc;
 	lagg_lb_porttable(sc, lp);
 }
 
 static int
 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
 {
 	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
 	struct lagg_port *lp = NULL;
 	uint32_t p = 0;
 
 	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
 	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		p = m->m_pkthdr.flowid >> sc->flowid_shift;
 	else
 		p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key);
 	p %= sc->sc_count;
 	lp = lb->lb_ports[p];
 
 	/*
 	 * Check the port's link state. This will return the next active
 	 * port if the link is down or the port is NULL.
 	 */
 	if ((lp = lagg_link_active(sc, lp)) == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	/* Send mbuf */
 	return (lagg_enqueue(lp->lp_ifp, m));
 }
 
 static struct mbuf *
 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	/* Just pass in the packet to our lagg device */
 	m->m_pkthdr.rcvif = ifp;
 
 	return (m);
 }
 
 /*
  * 802.3ad LACP
  */
 static void
 lagg_lacp_attach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
 
 	lacp_attach(sc);
 	LAGG_XLOCK_ASSERT(sc);
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lacp_port_create(lp);
 }
 
 static void
 lagg_lacp_detach(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
 	void *psc;
 
 	LAGG_XLOCK_ASSERT(sc);
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lacp_port_destroy(lp);
 
 	psc = sc->sc_psc;
 	sc->sc_psc = NULL;
 	lacp_detach(psc);
 }
 
 static void
 lagg_lacp_lladdr(struct lagg_softc *sc)
 {
 	struct lagg_port *lp;
 
 	LAGG_SXLOCK_ASSERT(sc);
 
 	/* purge all the lacp ports */
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lacp_port_destroy(lp);
 
 	/* add them back in */
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
 		lacp_port_create(lp);
 }
 
 static int
 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
 {
 	struct lagg_port *lp;
 
 	lp = lacp_select_tx_port(sc, m);
 	if (lp == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	/* Send mbuf */
 	return (lagg_enqueue(lp->lp_ifp, m));
 }
 
 static struct mbuf *
 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ether_header *eh;
 	u_short etype;
 
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 
 	/* Tap off LACP control messages */
 	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
 		m = lacp_input(lp, m);
 		if (m == NULL)
 			return (NULL);
 	}
 
 	/*
 	 * If the port is not collecting or not in the active aggregator then
 	 * free and return.
 	 */
 	if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
 		m_freem(m);
 		return (NULL);
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 	return (m);
 }
Index: head/sys/net/if_lagg.h
===================================================================
--- head/sys/net/if_lagg.h	(revision 366932)
+++ head/sys/net/if_lagg.h	(revision 366933)
@@ -1,269 +1,300 @@
 /*	$OpenBSD: if_trunk.h,v 1.11 2007/01/31 06:20:19 reyk Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NET_LAGG_H
 #define _NET_LAGG_H
 
 /*
  * Global definitions
  */
 
 #define	LAGG_MAX_PORTS		32	/* logically */
 #define	LAGG_MAX_NAMESIZE	32	/* name of a protocol */
 #define	LAGG_MAX_STACKING	4	/* maximum number of stacked laggs */
 
 /* Lagg flags */
 #define	LAGG_F_HASHL2		0x00000001	/* hash layer 2 */
 #define	LAGG_F_HASHL3		0x00000002	/* hash layer 3 */
 #define	LAGG_F_HASHL4		0x00000004	/* hash layer 4 */
 #define	LAGG_F_HASHMASK		0x00000007
 
 /* Port flags */
 #define	LAGG_PORT_SLAVE		0x00000000	/* normal enslaved port */
 #define	LAGG_PORT_MASTER	0x00000001	/* primary port */
 #define	LAGG_PORT_STACK		0x00000002	/* stacked lagg port */
 #define	LAGG_PORT_ACTIVE	0x00000004	/* port is active */
 #define	LAGG_PORT_COLLECTING	0x00000008	/* port is receiving frames */
 #define	LAGG_PORT_DISTRIBUTING	0x00000010	/* port is sending frames */
 #define	LAGG_PORT_BITS		"\20\01MASTER\02STACK\03ACTIVE\04COLLECTING" \
 				  "\05DISTRIBUTING"
 
 /* Supported lagg PROTOs */
 typedef enum {
 	LAGG_PROTO_NONE = 0,	/* no lagg protocol defined */
 	LAGG_PROTO_ROUNDROBIN,	/* simple round robin */
 	LAGG_PROTO_FAILOVER,	/* active failover */
 	LAGG_PROTO_LOADBALANCE,	/* loadbalance */
 	LAGG_PROTO_LACP,	/* 802.3ad lacp */
 	LAGG_PROTO_BROADCAST,	/* broadcast */
 	LAGG_PROTO_MAX,
 } lagg_proto;
 
 struct lagg_protos {
 	const char		*lpr_name;
 	lagg_proto		lpr_proto;
 };
 
 #define	LAGG_PROTO_DEFAULT	LAGG_PROTO_FAILOVER
 #define LAGG_PROTOS	{						\
 	{ "failover",		LAGG_PROTO_FAILOVER },			\
 	{ "lacp",		LAGG_PROTO_LACP },			\
 	{ "loadbalance",	LAGG_PROTO_LOADBALANCE },		\
 	{ "roundrobin",		LAGG_PROTO_ROUNDROBIN },		\
 	{ "broadcast",		LAGG_PROTO_BROADCAST },			\
 	{ "none",		LAGG_PROTO_NONE },			\
 	{ "default",		LAGG_PROTO_DEFAULT }			\
 }
 
+/* Supported lagg TYPEs */
+typedef enum {
+	LAGG_TYPE_ETHERNET = 0, /* ethernet (default) */
+	LAGG_TYPE_INFINIBAND,	/* infiniband */
+	LAGG_TYPE_MAX,
+} lagg_type;
+
+struct lagg_types {
+	const char		*lt_name;
+	lagg_type		lt_value;
+};
+
+#define	LAGG_TYPE_DEFAULT	LAGG_TYPE_ETHERNET
+#define LAGG_TYPES	{						\
+	{ "ethernet",		LAGG_TYPE_ETHERNET },			\
+	{ "infiniband",		LAGG_TYPE_INFINIBAND },			\
+}
+
 /*
+ * lagg create clone params
+ */
+struct iflaggparam {
+	uint8_t lagg_type;	/* see LAGG_TYPE_XXX */
+	uint8_t reserved_8[3];
+	uint32_t reserved_32[3];
+};
+
+/*
  * lagg ioctls.
  */
 
 /*
  * LACP current operational parameters structure.
  */
 struct lacp_opreq {
 	uint16_t		actor_prio;
 	uint8_t			actor_mac[ETHER_ADDR_LEN];
 	uint16_t		actor_key;
 	uint16_t		actor_portprio;
 	uint16_t		actor_portno;
 	uint8_t			actor_state;
 	uint16_t		partner_prio;
 	uint8_t			partner_mac[ETHER_ADDR_LEN];
 	uint16_t		partner_key;
 	uint16_t		partner_portprio;
 	uint16_t		partner_portno;
 	uint8_t			partner_state;
 };
 
 /* lagg port settings */
 struct lagg_reqport {
 	char			rp_ifname[IFNAMSIZ];	/* name of the lagg */
 	char			rp_portname[IFNAMSIZ];	/* name of the port */
 	u_int32_t		rp_prio;		/* port priority */
 	u_int32_t		rp_flags;		/* port flags */
 	union {
 		struct lacp_opreq rpsc_lacp;
 	} rp_psc;
 #define rp_lacpreq	rp_psc.rpsc_lacp
 };
 
 #define	SIOCGLAGGPORT		_IOWR('i', 140, struct lagg_reqport)
 #define	SIOCSLAGGPORT		 _IOW('i', 141, struct lagg_reqport)
 #define	SIOCSLAGGDELPORT	 _IOW('i', 142, struct lagg_reqport)
 
 /* lagg, ports and options */
 struct lagg_reqall {
 	char			ra_ifname[IFNAMSIZ];	/* name of the lagg */
 	u_int			ra_proto;		/* lagg protocol */
 
 	size_t			ra_size;		/* size of buffer */
 	struct lagg_reqport	*ra_port;		/* allocated buffer */
 	int			ra_ports;		/* total port count */
 	union {
 		struct lacp_opreq rpsc_lacp;
 	} ra_psc;
 #define ra_lacpreq	ra_psc.rpsc_lacp
 };
 
 #define	SIOCGLAGG		_IOWR('i', 143, struct lagg_reqall)
 #define	SIOCSLAGG		 _IOW('i', 144, struct lagg_reqall)
 
 struct lagg_reqflags {
 	char			rf_ifname[IFNAMSIZ];	/* name of the lagg */
 	uint32_t		rf_flags;		/* lagg protocol */
 };
 
 #define	SIOCGLAGGFLAGS		_IOWR('i', 145, struct lagg_reqflags)
 #define	SIOCSLAGGHASH		 _IOW('i', 146, struct lagg_reqflags)
 
 struct lagg_reqopts {
 	char			ro_ifname[IFNAMSIZ];	/* name of the lagg */
 
 	int			ro_opts;		/* Option bitmap */
 #define	LAGG_OPT_NONE			0x00
 #define	LAGG_OPT_USE_FLOWID		0x01		/* enable use of flowid */
 /* Pseudo flags which are used in ro_opts but not stored into sc_opts. */
 #define	LAGG_OPT_FLOWIDSHIFT		0x02		/* set flowid shift */
 #define	LAGG_OPT_USE_NUMA		0x04		/* enable use of numa */
 #define	LAGG_OPT_FLOWIDSHIFT_MASK	0x1f		/* flowid is uint32_t */
 #define	LAGG_OPT_LACP_STRICT		0x10		/* LACP strict mode */
 #define	LAGG_OPT_LACP_TXTEST		0x20		/* LACP debug: txtest */
 #define	LAGG_OPT_LACP_RXTEST		0x40		/* LACP debug: rxtest */
 #define	LAGG_OPT_LACP_FAST_TIMO		0x80		/* LACP fast timeout */
 #define	LAGG_OPT_RR_LIMIT		0x100		/* RR stride */
 	u_int			ro_count;		/* number of ports */
 	u_int			ro_active;		/* active port count */
 	u_int			ro_flapping;		/* number of flapping */
 	int			ro_flowid_shift;	/* shift the flowid */
 	uint32_t		ro_bkt;			/* stride for RR */
 };
 
 #define	SIOCGLAGGOPTS		_IOWR('i', 152, struct lagg_reqopts)
 #define	SIOCSLAGGOPTS		 _IOW('i', 153, struct lagg_reqopts)
 
 #define	LAGG_OPT_BITS		"\020\001USE_FLOWID\003USE_NUMA" \
 				"\005LACP_STRICT\006LACP_TXTEST" \
 				"\007LACP_RXTEST\010LACP_FAST_TIMO"
 
 #ifdef _KERNEL
 
 /*
  * Internal kernel part
  */
 
 #define	LAGG_PORTACTIVE(_tp)	(					\
 	((_tp)->lp_ifp->if_link_state == LINK_STATE_UP) &&		\
 	((_tp)->lp_ifp->if_flags & IFF_UP)				\
 )
 
 struct lagg_ifreq {
 	union {
 		struct ifreq ifreq;
 		struct {
 			char ifr_name[IFNAMSIZ];
 			struct sockaddr_storage ifr_ss;
 		} ifreq_storage;
 	} ifreq;
 };
 
 #define	sc_ifflags		sc_ifp->if_flags		/* flags */
 #define	sc_ifname		sc_ifp->if_xname		/* name */
 
 /* Private data used by the loadbalancing protocol */
 struct lagg_lb {
 	u_int32_t		lb_key;
 	struct lagg_port	*lb_ports[LAGG_MAX_PORTS];
 };
 
 struct lagg_mc {
 	struct sockaddr_dl	mc_addr;
 	struct ifmultiaddr      *mc_ifma;
 	SLIST_ENTRY(lagg_mc)	mc_entries;
 };
 
 struct lagg_counters {
 	uint64_t	val[IFCOUNTERS];
 };
 
 struct lagg_softc {
 	struct ifnet			*sc_ifp;	/* virtual interface */
-	struct rmlock			sc_mtx;
+	struct mtx			sc_mtx;		/* watchdog mutex */
 	struct sx			sc_sx;
 	int				sc_proto;	/* lagg protocol */
 	u_int				sc_count;	/* number of ports */
 	u_int				sc_active;	/* active port count */
 	u_int				sc_flapping;	/* number of flapping
 							 * events */
 	struct lagg_port		*sc_primary;	/* primary port */
 	struct ifmedia			sc_media;	/* media config */
 	void				*sc_psc;	/* protocol data */
 	uint32_t			sc_seq;		/* sequence counter */
 	uint32_t			sc_stride;	/* stride for RR */
 	uint32_t			sc_flags;
 	int				sc_destroying;	/* destroying lagg */
 
 	CK_SLIST_HEAD(__tplhd, lagg_port)	sc_ports;	/* list of interfaces */
 	SLIST_ENTRY(lagg_softc)	sc_entries;
 
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
 	struct callout			sc_callout;
 	u_int				sc_opts;
 	int				flowid_shift;	/* shift the flowid */
 	struct lagg_counters		detached_counters; /* detached ports sum */
+	struct callout			sc_watchdog;	/* watchdog timer */
 };
 
 struct lagg_port {
 	struct ifnet			*lp_ifp;	/* physical interface */
 	struct lagg_softc		*lp_softc;	/* parent lagg */
-	uint8_t				lp_lladdr[ETHER_ADDR_LEN];
+#define	LAGG_ADDR_LEN \
+	MAX(INFINIBAND_ADDR_LEN, ETHER_ADDR_LEN)
+	uint8_t				lp_lladdr[LAGG_ADDR_LEN];
 
 	u_char				lp_iftype;	/* interface type */
 	uint32_t			lp_prio;	/* port priority */
 	uint32_t			lp_flags;	/* port flags */
 	int				lp_ifflags;	/* saved ifp flags */
 	int				lp_ifcapenable;	/* saved ifp capenable */
 	void				*lh_cookie;	/* if state hook */
 	void				*lp_psc;	/* protocol data */
 	int				lp_detaching;	/* ifnet is detaching */
 	SLIST_HEAD(__mclhd, lagg_mc)	lp_mc_head;	/* multicast addresses */
 
 	/* Redirected callbacks */
 	int	(*lp_ioctl)(struct ifnet *, u_long, caddr_t);
 	int	(*lp_output)(struct ifnet *, struct mbuf *,
 		     const struct sockaddr *, struct route *);
 	struct lagg_counters		port_counters;	/* ifp counters copy */
 
 	CK_SLIST_ENTRY(lagg_port)		lp_entries;
 	struct epoch_context	lp_epoch_ctx;
 };
 
-extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
 extern void	(*lagg_linkstate_p)(struct ifnet *, int );
 
 int		lagg_enqueue(struct ifnet *, struct mbuf *);
 
 SYSCTL_DECL(_net_link_lagg);
 
 #endif /* _KERNEL */
 
 #endif /* _NET_LAGG_H */
Index: head/sys/net/if_types.h
===================================================================
--- head/sys/net/if_types.h	(revision 366932)
+++ head/sys/net/if_types.h	(revision 366933)
@@ -1,275 +1,276 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_types.h	8.3 (Berkeley) 4/28/95
  * $FreeBSD$
  * $NetBSD: if_types.h,v 1.16 2000/04/19 06:30:53 itojun Exp $
  */
 
 #ifndef _NET_IF_TYPES_H_
 #define _NET_IF_TYPES_H_
 
 /*
  * Interface types for benefit of parsing media address headers.
  * This list is derived from the SNMP list of ifTypes, originally
  * documented in RFC1573, now maintained as:
  *
  * 	http://www.iana.org/assignments/smi-numbers
  */
 
 typedef enum {
 	IFT_OTHER	= 0x1,		/* none of the following */
 	IFT_1822	= 0x2,		/* old-style arpanet imp */
 	IFT_HDH1822	= 0x3,		/* HDH arpanet imp */
 	IFT_X25DDN	= 0x4,		/* x25 to imp */
 	IFT_X25		= 0x5,		/* PDN X25 interface (RFC877) */
 	IFT_ETHER	= 0x6,		/* Ethernet CSMA/CD */
 	IFT_ISO88023	= 0x7,		/* CMSA/CD */
 	IFT_ISO88024	= 0x8,		/* Token Bus */
 	IFT_ISO88025	= 0x9,		/* Token Ring */
 	IFT_ISO88026	= 0xa,		/* MAN */
 	IFT_STARLAN	= 0xb,
 	IFT_P10		= 0xc,		/* Proteon 10MBit ring */
 	IFT_P80		= 0xd,		/* Proteon 80MBit ring */
 	IFT_HY		= 0xe,		/* Hyperchannel */
 	IFT_FDDI	= 0xf,
 	IFT_LAPB	= 0x10,
 	IFT_SDLC	= 0x11,
 	IFT_T1		= 0x12,
 	IFT_CEPT	= 0x13,		/* E1 - european T1 */
 	IFT_ISDNBASIC	= 0x14,
 	IFT_ISDNPRIMARY	= 0x15,
 	IFT_PTPSERIAL	= 0x16,		/* Proprietary PTP serial */
 	IFT_PPP		= 0x17,		/* RFC 1331 */
 	IFT_LOOP	= 0x18,		/* loopback */
 	IFT_EON		= 0x19,		/* ISO over IP */
 	IFT_XETHER	= 0x1a,		/* obsolete 3MB experimental ethernet */
 	IFT_NSIP	= 0x1b,		/* XNS over IP */
 	IFT_SLIP	= 0x1c,		/* IP over generic TTY */
 	IFT_ULTRA	= 0x1d,		/* Ultra Technologies */
 	IFT_DS3		= 0x1e,		/* Generic T3 */
 	IFT_SIP		= 0x1f,		/* SMDS */
 	IFT_FRELAY	= 0x20,		/* Frame Relay DTE only */
 	IFT_RS232	= 0x21,
 	IFT_PARA	= 0x22,		/* parallel-port */
 	IFT_ARCNET	= 0x23,
 	IFT_ARCNETPLUS	= 0x24,
 	IFT_ATM		= 0x25,		/* ATM cells */
 	IFT_MIOX25	= 0x26,
 	IFT_SONET	= 0x27,		/* SONET or SDH */
 	IFT_X25PLE	= 0x28,
 	IFT_ISO88022LLC	= 0x29,
 	IFT_LOCALTALK	= 0x2a,
 	IFT_SMDSDXI	= 0x2b,
 	IFT_FRELAYDCE	= 0x2c,		/* Frame Relay DCE */
 	IFT_V35		= 0x2d,
 	IFT_HSSI	= 0x2e,
 	IFT_HIPPI	= 0x2f,
 	IFT_MODEM	= 0x30,		/* Generic Modem */
 	IFT_AAL5	= 0x31,		/* AAL5 over ATM */
 	IFT_SONETPATH	= 0x32,
 	IFT_SONETVT	= 0x33,
 	IFT_SMDSICIP	= 0x34,		/* SMDS InterCarrier Interface */
 	IFT_PROPVIRTUAL	= 0x35,		/* Proprietary Virtual/internal */
 	IFT_PROPMUX	= 0x36,		/* Proprietary Multiplexing */
 	IFT_IEEE80212	= 0x37,		/* 100BaseVG */
 	IFT_FIBRECHANNEL = 0x38,	/* Fibre Channel */
 	IFT_HIPPIINTERFACE = 0x39,	/* HIPPI interfaces	 */
 	IFT_FRAMERELAYINTERCONNECT = 0x3a, /* Obsolete, use 0x20 either 0x2c */
 	IFT_AFLANE8023	= 0x3b,		/* ATM Emulated LAN for 802.3 */
 	IFT_AFLANE8025	= 0x3c,		/* ATM Emulated LAN for 802.5 */
 	IFT_CCTEMUL	= 0x3d,		/* ATM Emulated circuit		  */
 	IFT_FASTETHER	= 0x3e,		/* Fast Ethernet (100BaseT) */
 	IFT_ISDN	= 0x3f,		/* ISDN and X.25	    */
 	IFT_V11		= 0x40,		/* CCITT V.11/X.21		*/
 	IFT_V36		= 0x41,		/* CCITT V.36			*/
 	IFT_G703AT64K	= 0x42,		/* CCITT G703 at 64Kbps */
 	IFT_G703AT2MB	= 0x43,		/* Obsolete see DS1-MIB */
 	IFT_QLLC	= 0x44,		/* SNA QLLC			*/
 	IFT_FASTETHERFX	= 0x45,		/* Fast Ethernet (100BaseFX)	*/
 	IFT_CHANNEL	= 0x46,		/* channel			*/
 	IFT_IEEE80211	= 0x47,		/* radio spread spectrum (unused) */
 	IFT_IBM370PARCHAN = 0x48,	/* IBM System 360/370 OEMI Channel */
 	IFT_ESCON	= 0x49,		/* IBM Enterprise Systems Connection */
 	IFT_DLSW	= 0x4a,		/* Data Link Switching */
 	IFT_ISDNS	= 0x4b,		/* ISDN S/T interface */
 	IFT_ISDNU	= 0x4c,		/* ISDN U interface */
 	IFT_LAPD	= 0x4d,		/* Link Access Protocol D */
 	IFT_IPSWITCH	= 0x4e,		/* IP Switching Objects */
 	IFT_RSRB	= 0x4f,		/* Remote Source Route Bridging */
 	IFT_ATMLOGICAL	= 0x50,		/* ATM Logical Port */
 	IFT_DS0		= 0x51,		/* Digital Signal Level 0 */
 	IFT_DS0BUNDLE	= 0x52,		/* group of ds0s on the same ds1 */
 	IFT_BSC		= 0x53,		/* Bisynchronous Protocol */
 	IFT_ASYNC	= 0x54,		/* Asynchronous Protocol */
 	IFT_CNR		= 0x55,		/* Combat Net Radio */
 	IFT_ISO88025DTR	= 0x56,		/* ISO 802.5r DTR */
 	IFT_EPLRS	= 0x57,		/* Ext Pos Loc Report Sys */
 	IFT_ARAP	= 0x58,		/* Appletalk Remote Access Protocol */
 	IFT_PROPCNLS	= 0x59,		/* Proprietary Connectionless Protocol*/
 	IFT_HOSTPAD	= 0x5a,		/* CCITT-ITU X.29 PAD Protocol */
 	IFT_TERMPAD	= 0x5b,		/* CCITT-ITU X.3 PAD Facility */
 	IFT_FRAMERELAYMPI = 0x5c,	/* Multiproto Interconnect over FR */
 	IFT_X213	= 0x5d,		/* CCITT-ITU X213 */
 	IFT_ADSL	= 0x5e,		/* Asymmetric Digital Subscriber Loop */
 	IFT_RADSL	= 0x5f,		/* Rate-Adapt. Digital Subscriber Loop*/
 	IFT_SDSL	= 0x60,		/* Symmetric Digital Subscriber Loop */
 	IFT_VDSL	= 0x61,		/* Very H-Speed Digital Subscrib. Loop*/
 	IFT_ISO88025CRFPINT = 0x62,	/* ISO 802.5 CRFP */
 	IFT_MYRINET	= 0x63,		/* Myricom Myrinet */
 	IFT_VOICEEM	= 0x64,		/* voice recEive and transMit */
 	IFT_VOICEFXO	= 0x65,		/* voice Foreign Exchange Office */
 	IFT_VOICEFXS	= 0x66,		/* voice Foreign Exchange Station */
 	IFT_VOICEENCAP	= 0x67,		/* voice encapsulation */
 	IFT_VOICEOVERIP	= 0x68,		/* voice over IP encapsulation */
 	IFT_ATMDXI	= 0x69,		/* ATM DXI */
 	IFT_ATMFUNI	= 0x6a,		/* ATM FUNI */
 	IFT_ATMIMA	= 0x6b,		/* ATM IMA		      */
 	IFT_PPPMULTILINKBUNDLE = 0x6c,	/* PPP Multilink Bundle */
 	IFT_IPOVERCDLC	= 0x6d,		/* IBM ipOverCdlc */
 	IFT_IPOVERCLAW	= 0x6e,		/* IBM Common Link Access to Workstn */
 	IFT_STACKTOSTACK = 0x6f,	/* IBM stackToStack */
 	IFT_VIRTUALIPADDRESS = 0x70,	/* IBM VIPA */
 	IFT_MPC		= 0x71,		/* IBM multi-protocol channel support */
 	IFT_IPOVERATM	= 0x72,		/* IBM ipOverAtm */
 	IFT_ISO88025FIBER = 0x73,	/* ISO 802.5j Fiber Token Ring */
 	IFT_TDLC	= 0x74,		/* IBM twinaxial data link control */
 	IFT_GIGABITETHERNET = 0x75,	/* Gigabit Ethernet */
 	IFT_HDLC	= 0x76,		/* HDLC */
 	IFT_LAPF	= 0x77,		/* LAP F */
 	IFT_V37		= 0x78,		/* V.37 */
 	IFT_X25MLP	= 0x79,		/* Multi-Link Protocol */
 	IFT_X25HUNTGROUP = 0x7a,	/* X25 Hunt Group */
 	IFT_TRANSPHDLC	= 0x7b,		/* Transp HDLC */
 	IFT_INTERLEAVE	= 0x7c,		/* Interleave channel */
 	IFT_FAST	= 0x7d,		/* Fast channel */
 	IFT_IP		= 0x7e,		/* IP (for APPN HPR in IP networks) */
 	IFT_DOCSCABLEMACLAYER = 0x7f,	/* CATV Mac Layer */
 	IFT_DOCSCABLEDOWNSTREAM = 0x80,	/* CATV Downstream interface */
 	IFT_DOCSCABLEUPSTREAM = 0x81,	/* CATV Upstream interface */
 	IFT_A12MPPSWITCH = 0x82,	/* Avalon Parallel Processor */
 	IFT_TUNNEL	= 0x83,		/* Encapsulation interface */
 	IFT_COFFEE	= 0x84,		/* coffee pot */
 	IFT_CES		= 0x85,		/* Circiut Emulation Service */
 	IFT_ATMSUBINTERFACE = 0x86,	/* (x)  ATM Sub Interface */
 	IFT_L2VLAN	= 0x87,		/* Layer 2 Virtual LAN using 802.1Q */
 	IFT_L3IPVLAN	= 0x88,		/* Layer 3 Virtual LAN - IP Protocol */
 	IFT_L3IPXVLAN	= 0x89,		/* Layer 3 Virtual LAN - IPX Prot. */
 	IFT_DIGITALPOWERLINE = 0x8a,	/* IP over Power Lines */
 	IFT_MEDIAMAILOVERIP = 0x8b,	/* (xxx)  Multimedia Mail over IP */
 	IFT_DTM		= 0x8c,		/* Dynamic synchronous Transfer Mode */
 	IFT_DCN		= 0x8d,		/* Data Communications Network */
 	IFT_IPFORWARD	= 0x8e,		/* IP Forwarding Interface */
 	IFT_MSDSL	= 0x8f,		/* Multi-rate Symmetric DSL */
 	IFT_IEEE1394	= 0x90,		/* IEEE1394 High Performance SerialBus*/
 	IFT_IFGSN	= 0x91,		/* HIPPI-6400 */
 	IFT_DVBRCCMACLAYER = 0x92,	/* DVB-RCC MAC Layer */
 	IFT_DVBRCCDOWNSTREAM = 0x93,	/* DVB-RCC Downstream Channel */
 	IFT_DVBRCCUPSTREAM = 0x94,	/* DVB-RCC Upstream Channel */
 	IFT_ATMVIRTUAL	= 0x95,		/* ATM Virtual Interface */
 	IFT_MPLSTUNNEL	= 0x96,		/* MPLS Tunnel Virtual Interface */
 	IFT_SRP		= 0x97,		/* Spatial Reuse Protocol */
 	IFT_VOICEOVERATM = 0x98,	/* Voice over ATM */
 	IFT_VOICEOVERFRAMERELAY	= 0x99,	/* Voice Over Frame Relay */
 	IFT_IDSL	= 0x9a,		/* Digital Subscriber Loop over ISDN */
 	IFT_COMPOSITELINK = 0x9b,	/* Avici Composite Link Interface */
 	IFT_SS7SIGLINK	= 0x9c,		/* SS7 Signaling Link */
 	IFT_PROPWIRELESSP2P = 0x9d,	/* Prop. P2P wireless interface */
 	IFT_FRFORWARD	= 0x9e,		/* Frame forward Interface */
 	IFT_RFC1483	= 0x9f,		/* Multiprotocol over ATM AAL5 */
 	IFT_USB		= 0xa0,		/* USB Interface */
 	IFT_IEEE8023ADLAG = 0xa1,	/* IEEE 802.3ad Link Aggregate*/
 	IFT_BGPPOLICYACCOUNTING = 0xa2,	/* BGP Policy Accounting */
 	IFT_FRF16MFRBUNDLE = 0xa3,	/* FRF.16 Multilik Frame Relay*/
 	IFT_H323GATEKEEPER = 0xa4,	/* H323 Gatekeeper */
 	IFT_H323PROXY	= 0xa5,		/* H323 Voice and Video Proxy */
 	IFT_MPLS	= 0xa6,		/* MPLS */
 	IFT_MFSIGLINK	= 0xa7,		/* Multi-frequency signaling link */
 	IFT_HDSL2	= 0xa8,		/* High Bit-Rate DSL, 2nd gen. */
 	IFT_SHDSL	= 0xa9,		/* Multirate HDSL2 */
 	IFT_DS1FDL	= 0xaa,		/* Facility Data Link (4Kbps) on a DS1*/
 	IFT_POS		= 0xab,		/* Packet over SONET/SDH Interface */
 	IFT_DVBASILN	= 0xac,		/* DVB-ASI Input */
 	IFT_DVBASIOUT	= 0xad,		/* DVB-ASI Output */
 	IFT_PLC		= 0xae,		/* Power Line Communications */
 	IFT_NFAS	= 0xaf,		/* Non-Facility Associated Signaling */
 	IFT_TR008	= 0xb0,		/* TROO8 */
 	IFT_GR303RDT	= 0xb1,		/* Remote Digital Terminal */
 	IFT_GR303IDT	= 0xb2,		/* Integrated Digital Terminal */
 	IFT_ISUP	= 0xb3,		/* ISUP */
 	IFT_PROPDOCSWIRELESSMACLAYER = 0xb4,	/* prop/Wireless MAC Layer */
 	IFT_PROPDOCSWIRELESSDOWNSTREAM = 0xb5,	/* prop/Wireless Downstream */
 	IFT_PROPDOCSWIRELESSUPSTREAM = 0xb6,	/* prop/Wireless Upstream */
 	IFT_HIPERLAN2	= 0xb7,		/* HIPERLAN Type 2 Radio Interface */
 	IFT_PROPBWAP2MP	= 0xb8,		/* PropBroadbandWirelessAccess P2MP*/
 	IFT_SONETOVERHEADCHANNEL = 0xb9, /* SONET Overhead Channel */
 	IFT_DIGITALWRAPPEROVERHEADCHANNEL = 0xba, /* Digital Wrapper Overhead */
 	IFT_AAL2	= 0xbb,		/* ATM adaptation layer 2 */
 	IFT_RADIOMAC	= 0xbc,		/* MAC layer over radio links */
 	IFT_ATMRADIO	= 0xbd,		/* ATM over radio links */
 	IFT_IMT		= 0xbe,		/* Inter-Machine Trunks */
 	IFT_MVL		= 0xbf,		/* Multiple Virtual Lines DSL */
 	IFT_REACHDSL	= 0xc0,		/* Long Reach DSL */
 	IFT_FRDLCIENDPT	= 0xc1,		/* Frame Relay DLCI End Point */
 	IFT_ATMVCIENDPT	= 0xc2,		/* ATM VCI End Point */
 	IFT_OPTICALCHANNEL = 0xc3,	/* Optical Channel */
 	IFT_OPTICALTRANSPORT = 0xc4,	/* Optical Transport */
 	IFT_INFINIBAND	= 0xc7,		/* Infiniband */
+	IFT_INFINIBANDLAG = 0xc8,	/* Infiniband Link Aggregate */
 	IFT_BRIDGE	= 0xd1,		/* Transparent bridge interface */
 	IFT_STF		= 0xd7,		/* 6to4 interface */
 
 	/*
 	 * Not based on IANA assignments.  Conflicting with IANA assignments.
 	 * We should make them negative probably.
 	 * This requires changes to struct if_data.
 	 */
 	IFT_GIF		= 0xf0,		/* Generic tunnel interface */
 	IFT_PVC		= 0xf1,		/* Unused */
 	IFT_ENC		= 0xf4,		/* Encapsulating interface */
 	IFT_PFLOG	= 0xf6,		/* PF packet filter logging */
 	IFT_PFSYNC	= 0xf7,		/* PF packet filter synchronization */
 } ifType;
 
 /*
  * Some (broken) software uses #ifdef IFT_TYPE to check whether
  * an operating systems supports certain interface type.  Lack of
  * ifdef leads to a piece of functionality compiled out.
  */
 #ifndef BURN_BRIDGES
 #define	IFT_BRIDGE	IFT_BRIDGE
 #define	IFT_PPP		IFT_PPP
 #define	IFT_PROPVIRTUAL	IFT_PROPVIRTUAL
 #define	IFT_L2VLAN	IFT_L2VLAN
 #define	IFT_L3IPVLAN	IFT_L3IPVLAN
 #define	IFT_IEEE1394	IFT_IEEE1394
 #define	IFT_INFINIBAND	IFT_INFINIBAND
 #endif
 
 #endif /* !_NET_IF_TYPES_H_ */