diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index e059c172dd5c..6cbe30748a13 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -1,3252 +1,3253 @@ .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" .Dd June 24, 2022 .Dt IFCONFIG 8 .Os .Sh NAME .Nm ifconfig .Nd configure network interface parameters .Sh SYNOPSIS .Nm .Op Fl kLmn .Op Fl f Ar type Ns Cm \&: Ns Ar format .Ar interface .Op Cm create .Oo .Ar address_family .Oo .Ar address .Op Ar dest_address .Oc .Oc .Op Ar parameters .Nm .Ar interface .Cm destroy .Nm .Fl a .Op Fl dkLmuv .Op Fl f Ar type Ns Cm \&: Ns Ar format .Op Fl G Ar groupname .Op Fl g Ar groupname .Op Ar address_family .Nm .Fl C .Nm .Fl g Ar groupname .Nm .Fl l .Op Fl du .Op Fl g Ar groupname .Op Ar address_family .Nm .Op Fl dkLmuv .Op Fl f Ar type Ns Cm \&: Ns Ar format .Sh DESCRIPTION The .Nm utility is used to assign an address to a network interface and/or configure network interface parameters. The .Nm utility must be used at boot time to define the network address of each interface present on a machine; it may also be used at a later time to redefine an interface's address or other operating parameters. .Pp The following options are available: .Bl -tag -width indent .It Fl a Display information about all interfaces in the system. .Pp The .Fl a flag may be used instead of the .Ar interface argument. .It Fl C List all the interface cloners available on the system, with no additional information. Use of this flag is mutually exclusive with all other flags and commands. .It Fl d Display only the interfaces that are down. .It Fl f Xo .Ar type Ns Cm \&: Ns Ar format Ns .Op Cm \&, Ns Ar type Ns Cm \&: Ns Ar format Ar ... .Xc Control the output format of .Nm . The format is specified as a comma-separated list of .Ar type Ns Cm \&: Ns Ar format pairs .Po see the .Sx EXAMPLES section for more information .Pc . .Pp The output format can also be specified via the .Ev IFCONFIG_FORMAT environment variable. The .Fl f flag can be supplied multiple times. .Pp The .Ar type Ns s and their associated .Ar format strings are: .Pp .Bl -tag -width ether .It Cm addr Adjust the display of inet and inet6 addresses: .Pp .Bl -tag -width default -compact .It Cm default Default format, .Cm numeric .It Cm fqdn Fully qualified domain names .Pq FQDN .It Cm host Unqualified hostnames .It Cm numeric Numeric format .El .It Cm ether Adjust the display of link-level ethernet (MAC) addresses: .Pp .Bl -tag -width default -compact .It Cm colon Separate address segments with a colon .It Cm dash Separate address segments with a dash .It Cm default Default format, .Cm colon .El .It Cm inet Adjust the display of inet address subnet masks: .Pp .Bl -tag -width default -compact .It Cm cidr CIDR notation, for example: .Ql 203.0.113.224/26 .It Cm default Default format, .Cm hex .It Cm dotted Dotted quad notation, for example: .Ql 255.255.255.192 .It Cm hex Hexadecimal format, for example: .Ql 0xffffffc0 .El .It Cm inet6 Adjust the display of inet6 address prefixes (subnet masks): .Pp .Bl -tag -width default -compact .It Cm cidr CIDR notation, for example: .Ql ::1/128 or .Ql fe80::1%lo0/64 .It Cm default Default format, .Cm numeric .It Cm numeric Integer format, for example: .Ql prefixlen 64 .El .El .It Fl G Ar groupname Exclude members of the specified .Ar groupname from the output. .Ar groupname . .Pp Only one option .Fl G should be specified as later override previous ones .Ar groupname may contain shell patterns in which case it should be quoted. .Pp Setting .Ar groupname to .Cm all selects all interfaces. .It Fl g Ar groupname Limit the output to the members of the specified .Ar groupname . .Pp If .Fl g is specified before other significant flags like, e.g., .Fl a , .Fl l , or .Fl C , then .Nm lists names of interfaces belonging to .Ar groupname . Any other flags and arguments are ignored in this case. .Pp Only one option .Fl g should be specified as later override previous ones .Ar groupname may contain shell patterns in which case it should be quoted. .Pp Setting .Ar groupname to .Cm all selects all interfaces. .It Fl k Print keying information for the .Ar interface , if available. .Pp For example, the values of 802.11 WEP keys and .Xr carp 4 passphrases will be printed, if accessible to the current user. .Pp This information is not printed by default, as it may be considered sensitive. .It Fl L Display address lifetime for IPv6 addresses as time offset string. .It Fl l List all available interfaces on the system, with no other additional information. .Pp If an .Ar address_family is specified, only interfaces of that type will be listed. .Pp If the .Ar address_family is set to .Cm ether , then .Fl l will exclude loopback interfaces from the list of Ethernet interfaces. This is a special case, because all the other synonyms of the .Cm link address family will include loopback interfaces in the list. .Pp Use of this flag is mutually exclusive with all other flags and commands, except for .Fl d , .Fl g , and .Fl u . .It Fl m Display the capability list and all of the supported media for the specified interface. .It Fl n Disable automatic loading of network interface drivers. .Pp By default if the network interface driver is not present in the kernel then .Nm will attempt to load it. .It Fl u Display only the interfaces that are up. .It Fl v Get more verbose status for an interface. .It Ar address For the inet family, the address is either a host name present in the host name data base, .Xr hosts 5 , or an IPv4 address expressed in the Internet standard .Dq dot notation . .Pp It is also possible to use the CIDR notation (also known as the slash notation) to include the netmask. That is, one can specify an address like .Li 192.168.0.1/16 . .Pp For the .Cm inet6 family, it is also possible to specify the prefix length using the slash notation, like .Li ::1/128 . See the .Cm prefixlen parameter below for more information. .Pp The link-level .Pq Cm link address is specified as a series of colon-separated hex digits. This can be used to, for example, set a new MAC address on an Ethernet interface, though the mechanism used is not Ethernet specific. .Pp Use the .Cm random keyword to set a randomly generated MAC address. A randomly-generated MAC address might be the same as one already in use in the network. Such duplications are extremely unlikely. .Pp If the interface is already up when the link-level address is modified, it will be briefly brought down and then brought back up again in order to ensure that the receive filter in the underlying Ethernet hardware is properly reprogrammed. .It Ar address_family Specify the address family which affects interpretation of the remaining parameters. Since an interface can receive transmissions in differing protocols with different naming schemes, specifying the address family is recommended. The address or protocol families currently supported are: .Bl -tag .It Cm ether Synonymous with .Cm link .Po with some exceptions, see .Fl l .Pc . .It Cm inet Default, if available. .It Cm inet6 .It Cm link Default, if .Cm inet is not available. .It Cm lladdr Synonymous with .Cm link . .El .It Ar dest_address Specify the address of the correspondent on the other end of a point to point link. .It Ar interface This parameter is a string of the form .Dq name unit , for example, .Dq Li em0 . .El .Pp The .Nm utility displays the current configuration for a network interface when no optional parameters are supplied. If a protocol family is specified, .Nm will report only the details specific to that protocol family. .Pp When no arguments are given, .Fl a is implied. .Pp Only the super-user may modify the configuration of a network interface. .Sh PARAMETERS The following .Ar parameter Ns s may be set with .Nm : .Bl -tag -width indent .It Cm add Another name for the .Cm alias parameter. Introduced for compatibility with .Bsx . .It Cm alias Establish an additional network address for this interface. This is sometimes useful when changing network numbers, and one wishes to accept packets addressed to the old interface. If the address is on the same subnet as the first network address for this interface, a non-conflicting netmask must be given. Usually .Li 0xffffffff is most appropriate. .It Fl alias Remove the network address specified. This would be used if you incorrectly specified an alias, or it was no longer needed. If you have incorrectly set an NS address having the side effect of specifying the host portion, removing all NS addresses will allow you to respecify the host portion. .It Cm anycast (Inet6 only.) Specify that the address configured is an anycast address. Based on the current specification, only routers may configure anycast addresses. Anycast address will not be used as source address of any of outgoing IPv6 packets. .It Cm arp Enable the use of the Address Resolution Protocol .Pq Xr arp 4 in mapping between network level addresses and link level addresses (default). This is currently implemented for mapping between Internet Protocol addresses and IEEE 802 48-bit MAC addresses (Ethernet addresses). .It Fl arp Disable the use of the Address Resolution Protocol .Pq Xr arp 4 . .It Cm staticarp If the Address Resolution Protocol is enabled, the host will only reply to requests for its addresses, and will never send any requests. .It Fl staticarp If the Address Resolution Protocol is enabled, the host will perform normally, sending out requests and listening for replies. .It Cm stickyarp Enable the so-called sticky ARP mode for the interface. If this option is enabled on the given interface, any resolved address is marked as a static one and never expires. This may be used to increase security of the network by preventing ARP spoofing or to reduce latency for high-performance Ethernet networks where the time needed for ARP resolution is too high. Please note that a similar feature is also provided for bridges. See the sticky option in the .Sx Bridge Interface Parameters section. Enabling this option may impact techniques which rely on ARP expiration/overwriting feature such as load-balancers or high-availabity solutions such as .Xr carp 4 . .It Fl stickyarp Disable the so-called sticky ARP mode for the interface (default). Resolved addresses will expire normally respecting the kernel ARP configuration. .It Cm broadcast (Inet only.) Specify the address to use to represent broadcasts to the network. The default broadcast address is the address with a host part of all 1's. .It Cm debug Enable driver dependent debugging code; usually, this turns on extra console error logging. .It Fl debug Disable driver dependent debugging code. .It Cm promisc Put interface into permanently promiscuous mode. .It Fl promisc Disable permanently promiscuous mode. .It Cm delete Another name for the .Fl alias parameter. .It Cm description Ar value , Cm descr Ar value Specify a description of the interface. This can be used to label interfaces in situations where they may otherwise be difficult to distinguish. .It Cm -description , Cm -descr Clear the interface description. .It Cm down Mark an interface .Dq down . When an interface is marked .Dq down , the system will not attempt to transmit messages through that interface. If possible, the interface will be reset to disable reception as well. This action does not automatically disable routes using the interface. .It Cm group Ar groupname Assign the interface to a .Dq group . Any interface can be in multiple groups. .Pp Cloned interfaces are members of their interface family group by default. For example, a PPP interface such as .Em ppp0 is a member of the PPP interface family group, .Em ppp . .\" The interface(s) the default route(s) point to are members of the .\" .Em egress .\" interface group. .It Cm -group Ar groupname Remove the interface from the given .Dq group . .It Cm eui64 (Inet6 only.) Fill interface index (lowermost 64bit of an IPv6 address) automatically. .It Cm fib Ar fib_number Specify interface FIB. A FIB .Ar fib_number is assigned to all frames or packets received on that interface. The FIB is not inherited, e.g., vlans or other sub-interfaces will use the default FIB (0) irrespective of the parent interface's FIB. The kernel needs to be tuned to support more than the default FIB using the .Va ROUTETABLES kernel configuration option, or the .Va net.fibs tunable. .It Cm tunnelfib Ar fib_number Specify tunnel FIB. A FIB .Ar fib_number is assigned to all packets encapsulated by tunnel interface, e.g., -.Xr gif 4 +.Xr gif 4 , +.Xr gre 4 and -.Xr gre 4 . +.Xr vxlan 4 . .It Cm maclabel Ar label If Mandatory Access Control support is enabled in the kernel, set the MAC label to .Ar label . .\" (see .\" .Xr maclabel 7 ) . .It Cm media Ar type If the driver supports the media selection system, set the media type of the interface to .Ar type . Some interfaces support the mutually exclusive use of one of several different physical media connectors. For example, a 10Mbit/s Ethernet interface might support the use of either AUI or twisted pair connectors. Setting the media type to .Cm 10base5/AUI would change the currently active connector to the AUI port. Setting it to .Cm 10baseT/UTP would activate twisted pair. Refer to the interfaces' driver specific documentation or man page for a complete list of the available types. .It Cm mediaopt Ar opts If the driver supports the media selection system, set the specified media options on the interface. The .Ar opts argument is a comma delimited list of options to apply to the interface. Refer to the interfaces' driver specific man page for a complete list of available options. .It Fl mediaopt Ar opts If the driver supports the media selection system, disable the specified media options on the interface. .It Cm mode Ar mode If the driver supports the media selection system, set the specified operating mode on the interface to .Ar mode . For IEEE 802.11 wireless interfaces that support multiple operating modes this directive is used to select between 802.11a .Pq Cm 11a , 802.11b .Pq Cm 11b , and 802.11g .Pq Cm 11g operating modes. .It Cm txrtlmt Set if the driver supports TX rate limiting. .It Cm inst Ar minst , Cm instance Ar minst Set the media instance to .Ar minst . This is useful for devices which have multiple physical layer interfaces .Pq PHYs . .It Cm name Ar name Set the interface name to .Ar name . .It Cm rxcsum , txcsum , rxcsum6 , txcsum6 If the driver supports user-configurable checksum offloading, enable receive (or transmit) checksum offloading on the interface. The feature can be turned on selectively per protocol family. Use .Cm rxcsum6 , txcsum6 for .Xr ip6 4 or .Cm rxcsum , txcsum otherwise. Some drivers may not be able to enable these flags independently of each other, so setting one may also set the other. The driver will offload as much checksum work as it can reliably support, the exact level of offloading varies between drivers. .It Fl rxcsum , txcsum , rxcsum6 , txcsum6 If the driver supports user-configurable checksum offloading, disable receive (or transmit) checksum offloading on the interface. The feature can be turned off selectively per protocol family. Use .Fl rxcsum6 , txcsum6 for .Xr ip6 4 or .Fl rxcsum , txcsum otherwise. These settings may not always be independent of each other. .It Cm tso If the driver supports .Xr tcp 4 segmentation offloading, enable TSO on the interface. Some drivers may not be able to support TSO for .Xr ip 4 and .Xr ip6 4 packets, so they may enable only one of them. .It Fl tso If the driver supports .Xr tcp 4 segmentation offloading, disable TSO on the interface. It will always disable TSO for .Xr ip 4 and .Xr ip6 4 . .It Cm tso6 , tso4 If the driver supports .Xr tcp 4 segmentation offloading for .Xr ip6 4 or .Xr ip 4 use one of these to selectively enabled it only for one protocol family. .It Fl tso6 , tso4 If the driver supports .Xr tcp 4 segmentation offloading for .Xr ip6 4 or .Xr ip 4 use one of these to selectively disable it only for one protocol family. .It Cm lro If the driver supports .Xr tcp 4 large receive offloading, enable LRO on the interface. .It Fl lro If the driver supports .Xr tcp 4 large receive offloading, disable LRO on the interface. .It Cm txtls Transmit TLS offload encrypts Transport Layer Security (TLS) records and segments the encrypted record into one or more .Xr tcp 4 segments over either .Xr ip 4 or .Xr ip6 4 . If the driver supports transmit TLS offload, enable transmit TLS offload on the interface. Some drivers may not be able to support transmit TLS offload for .Xr ip 4 and .Xr ip6 4 packets, so they may enable only one of them. .It Fl txtls If the driver supports transmit TLS offload, disable transmit TLS offload on the interface. It will always disable TLS for .Xr ip 4 and .Xr ip6 4 . .It Cm txtlsrtlmt Enable use of rate limiting (packet pacing) for TLS offload. .It Fl txtlsrtlmt Disable use of rate limiting for TLS offload. .It Cm mextpg If the driver supports extended multi-page .Xr mbuf 9 buffers, enable them on the interface. .It Fl mextpg If the driver supports extended multi-page .Xr mbuf 9 biffers, disable them on the interface. .It Cm wol , wol_ucast , wol_mcast , wol_magic Enable Wake On Lan (WOL) support, if available. WOL is a facility whereby a machine in a low power state may be woken in response to a received packet. There are three types of packets that may wake a system: ucast (directed solely to the machine's mac address), mcast (directed to a broadcast or multicast address), or magic .Po unicast or multicast frames with a .Dq magic contents .Pc . Not all devices support WOL, those that do indicate the mechanisms they support in their capabilities. .Cm wol is a synonym for enabling all available WOL mechanisms. To disable WOL use .Fl wol . .It Cm vlanmtu , vlanhwtag , vlanhwfilter , vlanhwcsum , vlanhwtso If the driver offers user-configurable VLAN support, enable reception of extended frames, tag processing in hardware, frame filtering in hardware, checksum offloading, or TSO on VLAN, respectively. Note that this must be configured on a physical interface associated with .Xr vlan 4 , not on a .Xr vlan 4 interface itself. .It Fl vlanmtu , vlanhwtag , vlanhwfilter , vlanhwcsum , vlanhwtso If the driver offers user-configurable VLAN support, disable reception of extended frames, tag processing in hardware, frame filtering in hardware, checksum offloading, or TSO on VLAN, respectively. .It Cm vxlanhwcsum , vxlanhwtso If the driver offers user-configurable VXLAN support, enable inner checksum offloading (receive and transmit) or TSO on VXLAN, respectively. Note that this must be configured on a physical interface associated with .Xr vxlan 4 , not on a .Xr vxlan 4 interface itself. The physical interface is either the interface specified as the vxlandev or the interface hosting the vxlanlocal address. The driver will offload as much checksum work and TSO as it can reliably support, the exact level of offloading may vary between drivers. .It Fl vxlanhwcsum , vxlanhwtso If the driver offers user-configurable VXLAN support, disable checksum offloading (receive and transmit) or TSO on VXLAN, respectively. .It Cm vnet Ar jail Move the interface to the .Xr jail 8 , specified by name or JID. If the jail has a virtual network stack, the interface will disappear from the current environment and become visible to the jail. .It Fl vnet Ar jail Reclaim the interface from the .Xr jail 8 , specified by name or JID. If the jail has a virtual network stack, the interface will disappear from the jail, and become visible to the current network environment. .It Cm polling Turn on .Xr polling 4 feature and disable interrupts on the interface, if driver supports this mode. .It Fl polling Turn off .Xr polling 4 feature and enable interrupt mode on the interface. .It Cm create Create the specified network pseudo-device. If the interface is given without a unit number, try to create a new device with an arbitrary unit number. If creation of an arbitrary device is successful, the new device name is printed to standard output unless the interface is renamed or destroyed in the same .Nm invocation. .It Cm destroy Destroy the specified network pseudo-device. .It Cm plumb Another name for the .Cm create parameter. Included for Solaris compatibility. .It Cm unplumb Another name for the .Cm destroy parameter. Included for Solaris compatibility. .It Cm metric Ar n Set the routing metric of the interface to .Ar n , default 0. The routing metric is used by the routing protocol .Pq Xr routed 8 . Higher metrics have the effect of making a route less favorable; metrics are counted as additional hops to the destination network or host. .It Cm mtu Ar n Set the maximum transmission unit of the interface to .Ar n , default is interface specific. The MTU is used to limit the size of packets that are transmitted on an interface. Not all interfaces support setting the MTU, and some interfaces have range restrictions. .It Cm netmask Ar mask .\" (Inet and ISO.) (Inet only.) Specify how much of the address to reserve for subdividing networks into sub-networks. The mask includes the network part of the local address and the subnet part, which is taken from the host field of the address. The mask can be specified as a single hexadecimal number with a leading .Ql 0x , with a dot-notation Internet address, or with a pseudo-network name listed in the network table .Xr networks 5 . The mask contains 1's for the bit positions in the 32-bit address which are to be used for the network and subnet parts, and 0's for the host part. The mask should contain at least the standard network portion, and the subnet field should be contiguous with the network portion. .Pp The netmask can also be specified in CIDR notation after the address. See the .Ar address option above for more information. .It Cm prefixlen Ar len (Inet6 only.) Specify that .Ar len bits are reserved for subdividing networks into sub-networks. The .Ar len must be integer, and for syntactical reason it must be between 0 to 128. It is almost always 64 under the current IPv6 assignment rule. If the parameter is omitted, 64 is used. .Pp The prefix can also be specified using the slash notation after the address. See the .Ar address option above for more information. .It Cm remove Another name for the .Fl alias parameter. Introduced for compatibility with .Bsx . .Sm off .It Cm link Op Cm 0 No - Cm 2 .Sm on Enable special processing of the link level of the interface. These three options are interface specific in actual effect, however, they are in general used to select special modes of operation. An example of this is to enable SLIP compression, or to select the connector type for some Ethernet cards. Refer to the man page for the specific driver for more information. .Sm off .It Fl link Op Cm 0 No - Cm 2 .Sm on Disable special processing at the link level with the specified interface. .It Cm monitor Put the interface in monitor mode. No packets are transmitted, and received packets are discarded after .Xr bpf 4 processing. .It Fl monitor Take the interface out of monitor mode. .It Cm pcp Ar priority_code_point Priority code point .Pq Dv PCP is an 3-bit field which refers to the IEEE 802.1p class of service and maps to the frame priority level. .It Fl pcp Stop tagging packets on the interface w/ the priority code point. .It Cm up Mark an interface .Dq up . This may be used to enable an interface after an .Dq Nm Cm down . It happens automatically when setting the first address on an interface. If the interface was reset when previously marked down, the hardware will be re-initialized. .El .Ss ICMPv6 Neighbor Discovery Protocol Parameters The following parameters are for ICMPv6 Neighbor Discovery Protocol. Note that the address family keyword .Dq Li inet6 is needed for them: .Bl -tag -width indent .It Cm accept_rtadv Set a flag to enable accepting ICMPv6 Router Advertisement messages. The .Xr sysctl 8 variable .Va net.inet6.ip6.accept_rtadv controls whether this flag is set by default or not. .It Cm -accept_rtadv Clear a flag .Cm accept_rtadv . .It Cm no_radr Set a flag to control whether routers from which the system accepts Router Advertisement messages will be added to the Default Router List or not. When the .Cm accept_rtadv flag is disabled, this flag has no effect. The .Xr sysctl 8 variable .Va net.inet6.ip6.no_radr controls whether this flag is set by default or not. .It Cm -no_radr Clear a flag .Cm no_radr . .It Cm auto_linklocal Set a flag to perform automatic link-local address configuration when the interface becomes available. The .Xr sysctl 8 variable .Va net.inet6.ip6.auto_linklocal controls whether this flag is set by default or not. .It Cm -auto_linklocal Clear a flag .Cm auto_linklocal . .It Cm defaultif Set the specified interface as the default route when there is no default router. .It Cm -defaultif Clear a flag .Cm defaultif . .It Cm ifdisabled Set a flag to disable all of IPv6 network communications on the specified interface. Note that if there are already configured IPv6 addresses on that interface, all of them are marked as .Dq tentative and DAD will be performed when this flag is cleared. .It Cm -ifdisabled Clear a flag .Cm ifdisabled . When this flag is cleared and .Cm auto_linklocal flag is enabled, automatic configuration of a link-local address is performed. .It Cm nud Set a flag to enable Neighbor Unreachability Detection. .It Cm -nud Clear a flag .Cm nud . .It Cm no_prefer_iface Set a flag to not honor rule 5 of source address selection in RFC 3484. In practice this means the address on the outgoing interface will not be preferred, effectively yielding the decision to the address selection policy table, configurable with .Xr ip6addrctl 8 . .It Cm -no_prefer_iface Clear a flag .Cm no_prefer_iface . .It Cm no_dad Set a flag to disable Duplicate Address Detection. .It Cm -no_dad Clear a flag .Cm no_dad . .El .Ss IPv6 Parameters The following parameters are specific for IPv6 addresses. Note that the address family keyword .Dq Li inet6 is needed for them: .Bl -tag -width indent .It Cm autoconf Set the IPv6 autoconfigured address bit. .It Fl autoconf Clear the IPv6 autoconfigured address bit. .It Cm deprecated Set the IPv6 deprecated address bit. .It Fl deprecated Clear the IPv6 deprecated address bit. .It Cm pltime Ar n Set preferred lifetime for the address. .It Cm prefer_source Set a flag to prefer address as a candidate of the source address for outgoing packets. .It Cm -prefer_source Clear a flag .Cm prefer_source . .It Cm vltime Ar n Set valid lifetime for the address. .El .Ss IEEE 802.11 Wireless Interfaces Cloning Parameters The following parameters are specific to cloning IEEE 802.11 wireless interfaces with the .Cm create request: .Bl -tag -width indent .It Cm wlandev Ar device Use .Ar device as the parent for the cloned device. .It Cm wlanmode Ar mode Specify the operating mode for this cloned device. .Ar mode is one of .Cm sta , .Cm ahdemo (or .Cm adhoc-demo ) , .Cm ibss (or .Cm adhoc ) , .Cm ap (or .Cm hostap ) , .Cm wds , .Cm tdma , .Cm mesh , and .Cm monitor . The operating mode of a cloned interface cannot be changed. The .Cm tdma mode is actually implemented as an .Cm adhoc-demo interface with special properties. .It Cm wlanbssid Ar bssid The 802.11 mac address to use for the bssid. This must be specified at create time for a legacy .Cm wds device. .It Cm wlanaddr Ar address The local mac address. If this is not specified then a mac address will automatically be assigned to the cloned device. Typically this address is the same as the address of the parent device but if the .Cm bssid parameter is specified then the driver will craft a unique address for the device (if supported). .It Cm wdslegacy Mark a .Cm wds device as operating in .Dq legacy mode . Legacy .Cm wds devices have a fixed peer relationship and do not, for example, roam if their peer stops communicating. For completeness a Dynamic WDS (DWDS) interface may marked as .Fl wdslegacy . .It Cm bssid Request a unique local mac address for the cloned device. This is only possible if the device supports multiple mac addresses. To force use of the parent's mac address use .Fl bssid . .It Cm beacons Mark the cloned interface as depending on hardware support to track received beacons. To have beacons tracked in software use .Fl beacons . For .Cm hostap mode .Fl beacons can also be used to indicate no beacons should be transmitted; this can be useful when creating a WDS configuration but .Cm wds interfaces can only be created as companions to an access point. .El .Ss Cloned IEEE 802.11 Wireless Interface Parameters The following parameters are specific to IEEE 802.11 wireless interfaces cloned with a .Cm create operation: .Bl -tag -width indent .It Cm ampdu Enable sending and receiving AMPDU frames when using 802.11n (default). The 802.11n specification states a compliant station must be capable of receiving AMPDU frames but transmission is optional. Use .Fl ampdu to disable all use of AMPDU with 802.11n. For testing and/or to work around interoperability problems one can use .Cm ampdutx and .Cm ampdurx to control use of AMPDU in one direction. .It Cm ampdudensity Ar density Set the AMPDU density parameter used when operating with 802.11n. This parameter controls the inter-packet gap for AMPDU frames. The sending device normally controls this setting but a receiving station may request wider gaps. Legal values for .Ar density are 0, .25, .5, 1, 2, 4, 8, and 16 (microseconds). A value of .Cm - is treated the same as 0. .It Cm ampdulimit Ar limit Set the limit on packet size for receiving AMPDU frames when operating with 802.11n. Legal values for .Ar limit are 8192, 16384, 32768, and 65536 but one can also specify just the unique prefix: 8, 16, 32, 64. Note the sender may limit the size of AMPDU frames to be less than the maximum specified by the receiving station. .It Cm amsdu Enable sending and receiving AMSDU frames when using 802.11n. By default AMSDU is received but not transmitted. Use .Fl amsdu to disable all use of AMSDU with 802.11n. For testing and/or to work around interoperability problems one can use .Cm amsdutx and .Cm amsdurx to control use of AMSDU in one direction. .It Cm amsdulimit Ar limit Set the limit on packet size for sending and receiving AMSDU frames when operating with 802.11n. Legal values for .Ar limit are 7935 and 3839 (bytes). Note the sender may limit the size of AMSDU frames to be less than the maximum specified by the receiving station. Note also that devices are not required to support the 7935 limit, only 3839 is required by the specification and the larger value may require more memory to be dedicated to support functionality that is rarely used. .It Cm apbridge When operating as an access point, pass packets between wireless clients directly (default). To instead let them pass up through the system and be forwarded using some other mechanism, use .Fl apbridge . Disabling the internal bridging is useful when traffic is to be processed with packet filtering. .It Cm authmode Ar mode Set the desired authentication mode in infrastructure mode. Not all adapters support all modes. The set of valid modes is .Cm none , open , shared (shared key), .Cm 8021x (IEEE 802.1x), and .Cm wpa (IEEE WPA/WPA2/802.11i). The .Cm 8021x and .Cm wpa modes are only useful when using an authentication service (a supplicant for client operation or an authenticator when operating as an access point). Modes are case insensitive. .It Cm bgscan Enable background scanning when operating as a station. Background scanning is a technique whereby a station associated to an access point will temporarily leave the channel to scan for neighboring stations. This allows a station to maintain a cache of nearby access points so that roaming between access points can be done without a lengthy scan operation. Background scanning is done only when a station is not busy and any outbound traffic will cancel a scan operation. Background scanning should never cause packets to be lost though there may be some small latency if outbound traffic interrupts a scan operation. By default background scanning is enabled if the device is capable. To disable background scanning, use .Fl bgscan . Background scanning is controlled by the .Cm bgscanidle and .Cm bgscanintvl parameters. Background scanning must be enabled for roaming; this is an artifact of the current implementation and may not be required in the future. .It Cm bgscanidle Ar idletime Set the minimum time a station must be idle (not transmitting or receiving frames) before a background scan is initiated. The .Ar idletime parameter is specified in milliseconds. By default a station must be idle at least 250 milliseconds before a background scan is initiated. The idle time may not be set to less than 100 milliseconds. .It Cm bgscanintvl Ar interval Set the interval at which background scanning is attempted. The .Ar interval parameter is specified in seconds. By default a background scan is considered every 300 seconds (5 minutes). The .Ar interval may not be set to less than 15 seconds. .It Cm bintval Ar interval Set the interval at which beacon frames are sent when operating in ad-hoc or ap mode. The .Ar interval parameter is specified in TU's (1024 usecs). By default beacon frames are transmitted every 100 TU's. .It Cm bmissthreshold Ar count Set the number of consecutive missed beacons at which the station will attempt to roam (i.e., search for a new access point). The .Ar count parameter must be in the range 1 to 255; though the upper bound may be reduced according to device capabilities. The default threshold is 7 consecutive missed beacons; but this may be overridden by the device driver. Another name for the .Cm bmissthreshold parameter is .Cm bmiss . .It Cm bssid Ar address Specify the MAC address of the access point to use when operating as a station in a BSS network. This overrides any automatic selection done by the system. To disable a previously selected access point, supply .Cm any , none , or .Cm - for the address. This option is useful when more than one access point uses the same SSID. Another name for the .Cm bssid parameter is .Cm ap . .It Cm burst Enable packet bursting. Packet bursting is a transmission technique whereby the wireless medium is acquired once to send multiple frames and the interframe spacing is reduced. This technique can significantly increase throughput by reducing transmission overhead. Packet bursting is supported by the 802.11e QoS specification and some devices that do not support QoS may still be capable. By default packet bursting is enabled if a device is capable of doing it. To disable packet bursting, use .Fl burst . .It Cm chanlist Ar channels Set the desired channels to use when scanning for access points, neighbors in an IBSS network, or looking for unoccupied channels when operating as an access point. The set of channels is specified as a comma-separated list with each element in the list representing either a single channel number or a range of the form .Dq Li a-b . Channel numbers must be in the range 1 to 255 and be permissible according to the operating characteristics of the device. .It Cm channel Ar number Set a single desired channel. Channels range from 1 to 255, but the exact selection available depends on the region your adaptor was manufactured for. Setting the channel to .Cm any , or .Dq Cm - will clear any desired channel and, if the device is marked up, force a scan for a channel to operate on. Alternatively the frequency, in megahertz, may be specified instead of the channel number. .Pp When there are several ways to use a channel the channel number/frequency may be appended with attributes to clarify. For example, if a device is capable of operating on channel 6 with 802.11n and 802.11g then one can specify that g-only use should be used by specifying .Cm 6:g . Similarly the channel width can be specified by appending it with .Dq Cm \&/ ; e.g., .Cm 6/40 specifies a 40MHz wide channel. These attributes can be combined as in: .Cm 6:ht/40 . .Pp The full set of flags specified following a .Dq Cm \&: are: .Pp .Bl -tag -compact .It Cm a 802.11a .It Cm b 802.11b .It Cm d Atheros Dynamic Turbo mode .It Cm g 802.11g .It Cm h Same as .Cm n .It Cm n 802.11n aka HT .It Cm s Atheros Static Turbo mode .It Cm t Atheros Dynamic Turbo mode, or appended to .Cm st and .Cm dt .El .Pp The full set of channel widths following a .Cm \&/ are: .Pp .Bl -tag -compact .It Cm 5 5MHz aka quarter-rate channel .It Cm 10 10MHz aka half-rate channel .It Cm 20 20MHz mostly for use in specifying .Cm ht20 .It Cm 40 40MHz mostly for use in specifying .Cm ht40 .El .Pp In addition, a 40MHz HT channel specification may include the location of the extension channel by appending .Dq Cm \&+ or .Dq Cm \&- for above and below, respectively; e.g., .Cm 2437:ht/40+ specifies 40MHz wide HT operation with the center channel at frequency 2437 and the extension channel above. .It Cm country Ar name Set the country code to use in calculating the regulatory constraints for operation. In particular the set of available channels, how the wireless device will operation on the channels, and the maximum transmit power that can be used on a channel are defined by this setting. Country/Region codes are specified as a 2-character abbreviation defined by ISO 3166 or using a longer, but possibly ambiguous, spelling; e.g., "ES" and "Spain". The set of country codes are taken from .Pa /etc/regdomain.xml and can also be viewed with the .Cm list countries request. Note that not all devices support changing the country code from a default setting; typically stored in EEPROM. See also .Cm regdomain , .Cm indoor , .Cm outdoor , and .Cm anywhere . .It Cm dfs Enable Dynamic Frequency Selection (DFS) as specified in 802.11h. DFS embodies several facilities including detection of overlapping radar signals, dynamic transmit power control, and channel selection according to a least-congested criteria. DFS support is mandatory for some 5GHz frequencies in certain locales (e.g., ETSI). By default DFS is enabled according to the regulatory definitions specified in .Pa /etc/regdomain.xml and the current country code, regdomain, and channel. Note the underlying device (and driver) must support radar detection for full DFS support to work. To be fully compliant with the local regulatory agency frequencies that require DFS should not be used unless it is fully supported. Use .Fl dfs to disable this functionality for testing. .It Cm dotd Enable support for the 802.11d specification (default). When this support is enabled in station mode, beacon frames that advertise a country code different than the currently configured country code will cause an event to be dispatched to user applications. This event can be used by the station to adopt that country code and operate according to the associated regulatory constraints. When operating as an access point with 802.11d enabled the beacon and probe response frames transmitted will advertise the current regulatory domain settings. To disable 802.11d use .Fl dotd . .It Cm doth Enable 802.11h support including spectrum management. When 802.11h is enabled beacon and probe response frames will have the SpectrumMgt bit set in the capabilities field and country and power constraint information elements will be present. 802.11h support also includes handling Channel Switch Announcements (CSA) which are a mechanism to coordinate channel changes by an access point. By default 802.11h is enabled if the device is capable. To disable 802.11h use .Fl doth . .It Cm deftxkey Ar index Set the default key to use for transmission. Typically this is only set when using WEP encryption. Note that you must set a default transmit key for the system to know which key to use in encrypting outbound traffic. The .Cm weptxkey is an alias for this request; it is provided for backwards compatibility. .It Cm dtimperiod Ar period Set the DTIM period for transmitting buffered multicast data frames when operating in ap mode. The .Ar period specifies the number of beacon intervals between DTIM and must be in the range 1 to 15. By default DTIM is 1 (i.e., DTIM occurs at each beacon). .It Cm quiet Enable the use of quiet IE. Hostap will use this to silence other stations to reduce interference for radar detection when operating on 5GHz frequency and doth support is enabled. Use .Fl quiet to disable this functionality. .It Cm quiet_period Ar period Set the QUIET .Ar period to the number of beacon intervals between the start of regularly scheduled quiet intervals defined by Quiet element. .It Cm quiet_count Ar count Set the QUIET .Ar count to the number of TBTTs until the beacon interval during which the next quiet interval shall start. A value of 1 indicates the quiet interval will start during the beacon interval starting at the next TBTT. A value 0 is reserved. .It Cm quiet_offset Ar offset Set the QUIET .Ar offset to the offset of the start of the quiet interval from the TBTT specified by the Quiet count, expressed in TUs. The value of the .Ar offset shall be less than one beacon interval. .It Cm quiet_duration Ar dur Set the QUIET .Ar dur to the duration of the Quiet interval, expressed in TUs. The value should be less than beacon interval. .It Cm dturbo Enable the use of Atheros Dynamic Turbo mode when communicating with another Dynamic Turbo-capable station. Dynamic Turbo mode is an Atheros-specific mechanism by which stations switch between normal 802.11 operation and a .Dq boosted mode in which a 40MHz wide channel is used for communication. Stations using Dynamic Turbo mode operate boosted only when the channel is free of non-dturbo stations; when a non-dturbo station is identified on the channel all stations will automatically drop back to normal operation. By default, Dynamic Turbo mode is not enabled, even if the device is capable. Note that turbo mode (dynamic or static) is only allowed on some channels depending on the regulatory constraints; use the .Cm list chan command to identify the channels where turbo mode may be used. To disable Dynamic Turbo mode use .Fl dturbo . .It Cm dwds Enable Dynamic WDS (DWDS) support. DWDS is a facility by which 4-address traffic can be carried between stations operating in infrastructure mode. A station first associates to an access point and authenticates using normal procedures (e.g., WPA). Then 4-address frames are passed to carry traffic for stations operating on either side of the wireless link. DWDS extends the normal WDS mechanism by leveraging existing security protocols and eliminating static binding. .Pp When DWDS is enabled on an access point 4-address frames received from an authorized station will generate a .Dq DWDS discovery event to user applications. This event should be used to create a WDS interface that is bound to the remote station (and usually plumbed into a bridge). Once the WDS interface is up and running 4-address traffic then logically flows through that interface. .Pp When DWDS is enabled on a station, traffic with a destination address different from the peer station are encapsulated in a 4-address frame and transmitted to the peer. All 4-address traffic uses the security information of the stations (e.g., cryptographic keys). A station is associated using 802.11n facilities may transport 4-address traffic using these same mechanisms; this depends on available resources and capabilities of the device. The DWDS implementation guards against layer 2 routing loops of multicast traffic. .It Cm ff Enable the use of Atheros Fast Frames when communicating with another Fast Frames-capable station. Fast Frames are an encapsulation technique by which two 802.3 frames are transmitted in a single 802.11 frame. This can noticeably improve throughput but requires that the receiving station understand how to decapsulate the frame. Fast frame use is negotiated using the Atheros 802.11 vendor-specific protocol extension so enabling use is safe when communicating with non-Atheros devices. By default, use of fast frames is enabled if the device is capable. To explicitly disable fast frames, use .Fl ff . .It Cm fragthreshold Ar length Set the threshold for which transmitted frames are broken into fragments. The .Ar length argument is the frame size in bytes and must be in the range 256 to 2346. Setting .Ar length to .Li 2346 , .Cm any , or .Cm - disables transmit fragmentation. Not all adapters honor the fragmentation threshold. .It Cm hidessid When operating as an access point, do not broadcast the SSID in beacon frames or respond to probe request frames unless they are directed to the ap (i.e., they include the ap's SSID). By default, the SSID is included in beacon frames and undirected probe request frames are answered. To re-enable the broadcast of the SSID etc., use .Fl hidessid . .It Cm ht Enable use of High Throughput (HT) when using 802.11n (default). The 802.11n specification includes mechanisms for operation on 20MHz and 40MHz wide channels using different signalling mechanisms than specified in 802.11b, 802.11g, and 802.11a. Stations negotiate use of these facilities, termed HT20 and HT40, when they associate. To disable all use of 802.11n use .Fl ht . To disable use of HT20 (e.g., to force only HT40 use) use .Fl ht20 . To disable use of HT40 use .Fl ht40 . .Pp HT configuration is used to .Dq auto promote operation when several choices are available. For example, if a station associates to an 11n-capable access point it controls whether the station uses legacy operation, HT20, or HT40. When an 11n-capable device is setup as an access point and Auto Channel Selection is used to locate a channel to operate on, HT configuration controls whether legacy, HT20, or HT40 operation is setup on the selected channel. If a fixed channel is specified for a station then HT configuration can be given as part of the channel specification; e.g., 6:ht/20 to setup HT20 operation on channel 6. .It Cm htcompat Enable use of compatibility support for pre-802.11n devices (default). The 802.11n protocol specification went through several incompatible iterations. Some vendors implemented 11n support to older specifications that will not interoperate with a purely 11n-compliant station. In particular the information elements included in management frames for old devices are different. When compatibility support is enabled both standard and compatible data will be provided. Stations that associate using the compatibility mechanisms are flagged in .Cm list sta . To disable compatibility support use .Fl htcompat . .It Cm htprotmode Ar technique For interfaces operating in 802.11n, use the specified .Ar technique for protecting HT frames in a mixed legacy/HT network. The set of valid techniques is .Cm off , and .Cm rts (RTS/CTS, default). Technique names are case insensitive. .It Cm inact Enable inactivity processing for stations associated to an access point (default). When operating as an access point the 802.11 layer monitors the activity of each associated station. When a station is inactive for 5 minutes it will send several .Dq probe frames to see if the station is still present. If no response is received then the station is deauthenticated. Applications that prefer to handle this work can disable this facility by using .Fl inact . .It Cm indoor Set the location to use in calculating regulatory constraints. The location is also advertised in beacon and probe response frames when 802.11d is enabled with .Cm dotd . See also .Cm outdoor , .Cm anywhere , .Cm country , and .Cm regdomain . .It Cm list active Display the list of channels available for use taking into account any restrictions set with the .Cm chanlist directive. See the description of .Cm list chan for more information. .It Cm list caps Display the adaptor's capabilities, including the operating modes supported. .It Cm list chan Display the list of channels available for use. Channels are shown with their IEEE channel number, equivalent frequency, and usage modes. Channels identified as .Ql 11g are also usable in .Ql 11b mode. Channels identified as .Ql 11a Turbo may be used only for Atheros' Static Turbo mode (specified with . Cm mediaopt turbo ) . Channels marked with a .Ql * have a regulatory constraint that they be passively scanned. This means a station is not permitted to transmit on the channel until it identifies the channel is being used for 802.11 communication; typically by hearing a beacon frame from an access point operating on the channel. .Cm list freq is another way of requesting this information. By default a compacted list of channels is displayed; if the .Fl v option is specified then all channels are shown. .It Cm list countries Display the set of country codes and regulatory domains that can be used in regulatory configuration. .It Cm list mac Display the current MAC Access Control List state. Each address is prefixed with a character that indicates the current policy applied to it: .Ql + indicates the address is allowed access, .Ql - indicates the address is denied access, .Ql * indicates the address is present but the current policy open (so the ACL is not consulted). .It Cm list mesh Displays the mesh routing table, used for forwarding packets on a mesh network. .It Cm list regdomain Display the current regulatory settings including the available channels and transmit power caps. .It Cm list roam Display the parameters that govern roaming operation. .It Cm list txparam Display the parameters that govern transmit operation. .It Cm list txpower Display the transmit power caps for each channel. .It Cm list scan Display the access points and/or ad-hoc neighbors located in the vicinity. This information may be updated automatically by the adapter with a .Cm scan request or through background scanning. Depending on the capabilities of the stations the following flags (capability codes) can be included in the output: .Bl -tag -width 3n .It Li A Channel agility. .It Li B PBCC modulation. .It Li C Poll request capability. .It Li D DSSS/OFDM capability. .It Li E Extended Service Set (ESS). Indicates that the station is part of an infrastructure network rather than an IBSS/ad-hoc network. .It Li I Independent Basic Service Set (IBSS). Indicates that the station is part of an ad-hoc network rather than an ESS network. .It Li P Privacy capability. The station requires authentication and encryption for all data frames exchanged within the BSS using cryptographic means such as WEP, TKIP, or AES-CCMP. .It Li R Robust Secure Network (RSN). .It Li S Short Preamble. Indicates that the network is using short preambles, defined in 802.11b High Rate/DSSS PHY, and utilizes a 56 bit sync field rather than the 128 bit field used in long preamble mode. Short preambles are used to optionally improve throughput performance with 802.11g and 802.11b. .It Li c Pollable capability. .It Li s Short slot time capability. Indicates that the 802.11g network is using a short slot time because there are no legacy (802.11b) stations present. .El .Pp By default interesting information elements captured from the neighboring stations are displayed at the end of each row. Possible elements include: .Cm WME (station supports WME), .Cm WPA (station supports WPA), .Cm WPS (station supports WPS), .Cm RSN (station supports 802.11i/RSN), .Cm HTCAP (station supports 802.11n/HT communication), .Cm ATH (station supports Atheros protocol extensions), .Cm VEN (station supports unknown vendor-specific extensions). If the .Fl v flag is used all the information elements and their contents will be shown. Specifying the .Fl v flag also enables display of long SSIDs. The .Cm list ap command is another way of requesting this information. .It Cm list sta When operating as an access point display the stations that are currently associated. When operating in ad-hoc mode display stations identified as neighbors in the IBSS. When operating in mesh mode display stations identified as neighbors in the MBSS. When operating in station mode display the access point. Capabilities advertised by the stations are described under the .Cm scan request. The following flags can be included in the output: .Bl -tag -width 3n .It Li A Authorized. Indicates that the station is permitted to send/receive data frames. .It Li E Extended Rate Phy (ERP). Indicates that the station is operating in an 802.11g network using extended transmit rates. .It Li H High Throughput (HT). Indicates that the station is using HT transmit rates. If a .Sq Li + follows immediately after then the station associated using deprecated mechanisms supported only when .Cm htcompat is enabled. .It Li P Power Save. Indicates that the station is operating in power save mode. .It Li Q Quality of Service (QoS). Indicates that the station is using QoS encapsulation for data frame. QoS encapsulation is enabled only when WME mode is enabled. .It Li S Short GI in HT 40MHz mode enabled. If a .Sq Li + follows immediately after then short GI in HT 20MHz mode is enabled as well. .It Li T Transitional Security Network (TSN). Indicates that the station associated using TSN; see also .Cm tsn below. .It Li W Wi-Fi Protected Setup (WPS). Indicates that the station associated using WPS. .It Li s Short GI in HT 20MHz mode enabled. .El .Pp By default information elements received from associated stations are displayed in a short form; the .Fl v flag causes this information to be displayed symbolically. .It Cm list wme Display the current channel parameters to use when operating in WME mode. If the .Fl v option is specified then both channel and BSS parameters are displayed for each AC (first channel, then BSS). When WME mode is enabled for an adaptor this information will be displayed with the regular status; this command is mostly useful for examining parameters when WME mode is disabled. See the description of the .Cm wme directive for information on the various parameters. .It Cm maxretry Ar count Set the maximum number of tries to use in sending unicast frames. The default setting is 6 but drivers may override this with a value they choose. .It Cm mcastrate Ar rate Set the rate for transmitting multicast/broadcast frames. Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s. This rate should be valid for the current operating conditions; if an invalid rate is specified drivers are free to chose an appropriate rate. .It Cm mgtrate Ar rate Set the rate for transmitting management and/or control frames. Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s. .It Cm outdoor Set the location to use in calculating regulatory constraints. The location is also advertised in beacon and probe response frames when 802.11d is enabled with .Cm dotd . See also .Cm anywhere , .Cm country , .Cm indoor , and .Cm regdomain . .It Cm powersave Enable powersave operation. When operating as a client, the station will conserve power by periodically turning off the radio and listening for messages from the access point telling it there are packets waiting. The station must then retrieve the packets. Not all devices support power save operation as a client. The 802.11 specification requires that all access points support power save but some drivers do not. Use .Fl powersave to disable powersave operation when operating as a client. .It Cm powersavesleep Ar sleep Set the desired max powersave sleep time in TU's (1024 usecs). By default the max powersave sleep time is 100 TU's. .It Cm protmode Ar technique For interfaces operating in 802.11g, use the specified .Ar technique for protecting OFDM frames in a mixed 11b/11g network. The set of valid techniques is .Cm off , cts (CTS to self), and .Cm rtscts (RTS/CTS). Technique names are case insensitive. Not all devices support .Cm cts as a protection technique. .It Cm pureg When operating as an access point in 802.11g mode allow only 11g-capable stations to associate (11b-only stations are not permitted to associate). To allow both 11g and 11b-only stations to associate, use .Fl pureg . .It Cm puren When operating as an access point in 802.11n mode allow only HT-capable stations to associate (legacy stations are not permitted to associate). To allow both HT and legacy stations to associate, use .Fl puren . .It Cm regdomain Ar sku Set the regulatory domain to use in calculating the regulatory constraints for operation. In particular the set of available channels, how the wireless device will operation on the channels, and the maximum transmit power that can be used on a channel are defined by this setting. Regdomain codes (SKU's) are taken from .Pa /etc/regdomain.xml and can also be viewed with the .Cm list countries request. Note that not all devices support changing the regdomain from a default setting; typically stored in EEPROM. See also .Cm country , .Cm indoor , .Cm outdoor , and .Cm anywhere . .It Cm rifs Enable use of Reduced InterFrame Spacing (RIFS) when operating in 802.11n on an HT channel. Note that RIFS must be supported by both the station and access point for it to be used. To disable RIFS use .Fl rifs . .It Cm roam:rate Ar rate Set the threshold for controlling roaming when operating in a BSS. The .Ar rate parameter specifies the transmit rate in megabits at which roaming should be considered. If the current transmit rate drops below this setting and background scanning is enabled, then the system will check if a more desirable access point is available and switch over to it. The current scan cache contents are used if they are considered valid according to the .Cm scanvalid parameter; otherwise a background scan operation is triggered before any selection occurs. Each channel type has a separate rate threshold; the default values are: 12 Mb/s (11a), 2 Mb/s (11b), 2 Mb/s (11g), MCS 1 (11na, 11ng). .It Cm roam:rssi Ar rssi Set the threshold for controlling roaming when operating in a BSS. The .Ar rssi parameter specifies the receive signal strength in dBm units at which roaming should be considered. If the current rssi drops below this setting and background scanning is enabled, then the system will check if a more desirable access point is available and switch over to it. The current scan cache contents are used if they are considered valid according to the .Cm scanvalid parameter; otherwise a background scan operation is triggered before any selection occurs. Each channel type has a separate rssi threshold; the default values are all 7 dBm. .It Cm roaming Ar mode When operating as a station, control how the system will behave when communication with the current access point is broken. The .Ar mode argument may be one of .Cm device (leave it to the hardware device to decide), .Cm auto (handle either in the device or the operating system\[em]as appropriate), .Cm manual (do nothing until explicitly instructed). By default, the device is left to handle this if it is capable; otherwise, the operating system will automatically attempt to reestablish communication. Manual mode is used by applications such as .Xr wpa_supplicant 8 that want to control the selection of an access point. .It Cm rtsthreshold Ar length Set the threshold for which transmitted frames are preceded by transmission of an RTS control frame. The .Ar length argument is the frame size in bytes and must be in the range 1 to 2346. Setting .Ar length to .Li 2346 , .Cm any , or .Cm - disables transmission of RTS frames. Not all adapters support setting the RTS threshold. .It Cm scan Initiate a scan of neighboring stations, wait for it to complete, and display all stations found. Only the super-user can initiate a scan. See .Cm list scan for information on the display. By default a background scan is done; otherwise a foreground scan is done and the station may roam to a different access point. The .Cm list scan request can be used to show recent scan results without initiating a new scan. .It Cm scanvalid Ar threshold Set the maximum time the scan cache contents are considered valid; i.e., will be used without first triggering a scan operation to refresh the data. The .Ar threshold parameter is specified in seconds and defaults to 60 seconds. The minimum setting for .Ar threshold is 10 seconds. One should take care setting this threshold; if it is set too low then attempts to roam to another access point may trigger unnecessary background scan operations. .It Cm shortgi Enable use of Short Guard Interval when operating in 802.11n on an HT channel. NB: this currently enables Short GI on both HT40 and HT20 channels. To disable Short GI use .Fl shortgi . .It Cm smps Enable use of Static Spatial Multiplexing Power Save (SMPS) when operating in 802.11n. A station operating with Static SMPS maintains only a single receive chain active (this can significantly reduce power consumption). To disable SMPS use .Fl smps . .It Cm smpsdyn Enable use of Dynamic Spatial Multiplexing Power Save (SMPS) when operating in 802.11n. A station operating with Dynamic SMPS maintains only a single receive chain active but switches to multiple receive chains when it receives an RTS frame (this can significantly reduce power consumption). Note that stations cannot distinguish between RTS/CTS intended to enable multiple receive chains and those used for other purposes. To disable SMPS use .Fl smps . .It Cm ssid Ar ssid Set the desired Service Set Identifier (aka network name). The SSID is a string up to 32 characters in length and may be specified as either a normal string or in hexadecimal when preceded by .Ql 0x . Additionally, the SSID may be cleared by setting it to .Ql - . .It Cm tdmaslot Ar slot When operating with TDMA, use the specified .Ar slot configuration. The .Ar slot is a number between 0 and the maximum number of slots in the BSS. Note that a station configured as slot 0 is a master and will broadcast beacon frames advertising the BSS; stations configured to use other slots will always scan to locate a master before they ever transmit. By default .Cm tdmaslot is set to 1. .It Cm tdmaslotcnt Ar cnt When operating with TDMA, setup a BSS with .Ar cnt slots. The slot count may be at most 8. The current implementation is only tested with two stations (i.e., point to point applications). This setting is only meaningful when a station is configured as slot 0; other stations adopt this setting from the BSS they join. By default .Cm tdmaslotcnt is set to 2. .It Cm tdmaslotlen Ar len When operating with TDMA, setup a BSS such that each station has a slot .Ar len microseconds long. The slot length must be at least 150 microseconds (1/8 TU) and no more than 65 milliseconds. Note that setting too small a slot length may result in poor channel bandwidth utilization due to factors such as timer granularity and guard time. This setting is only meaningful when a station is configured as slot 0; other stations adopt this setting from the BSS they join. By default .Cm tdmaslotlen is set to 10 milliseconds. .It Cm tdmabintval Ar intval When operating with TDMA, setup a BSS such that beacons are transmitted every .Ar intval superframes to synchronize the TDMA slot timing. A superframe is defined as the number of slots times the slot length; e.g., a BSS with two slots of 10 milliseconds has a 20 millisecond superframe. The beacon interval may not be zero. A lower setting of .Cm tdmabintval causes the timers to be resynchronized more often; this can be help if significant timer drift is observed. By default .Cm tdmabintval is set to 5. .It Cm tsn When operating as an access point with WPA/802.11i allow legacy stations to associate using static key WEP and open authentication. To disallow legacy station use of WEP, use .Fl tsn . .It Cm txpower Ar power Set the power used to transmit frames. The .Ar power argument is specified in .5 dBm units. Out of range values are truncated. Typically only a few discrete power settings are available and the driver will use the setting closest to the specified value. Not all adapters support changing the transmit power. .It Cm ucastrate Ar rate Set a fixed rate for transmitting unicast frames. Rates are specified as megabits/second in decimal; e.g.,\& 5.5 for 5.5 Mb/s. This rate should be valid for the current operating conditions; if an invalid rate is specified drivers are free to chose an appropriate rate. .It Cm wepmode Ar mode Set the desired WEP mode. Not all adapters support all modes. The set of valid modes is .Cm off , on , and .Cm mixed . The .Cm mixed mode explicitly tells the adaptor to allow association with access points which allow both encrypted and unencrypted traffic. On these adapters, .Cm on means that the access point must only allow encrypted connections. On other adapters, .Cm on is generally another name for .Cm mixed . Modes are case insensitive. .It Cm weptxkey Ar index Set the WEP key to be used for transmission. This is the same as setting the default transmission key with .Cm deftxkey . .It Cm wepkey Ar key Ns | Ns Ar index : Ns Ar key Set the selected WEP key. If an .Ar index is not given, key 1 is set. A WEP key will be either 5 or 13 characters (40 or 104 bits) depending on the local network and the capabilities of the adaptor. It may be specified either as a plain string or as a string of hexadecimal digits preceded by .Ql 0x . For maximum portability, hex keys are recommended; the mapping of text keys to WEP encryption is usually driver-specific. In particular, the Windows drivers do this mapping differently to .Fx . A key may be cleared by setting it to .Ql - . If WEP is supported then there are at least four keys. Some adapters support more than four keys. If that is the case, then the first four keys (1-4) will be the standard temporary keys and any others will be adaptor specific keys such as permanent keys stored in NVRAM. .Pp Note that you must set a default transmit key with .Cm deftxkey for the system to know which key to use in encrypting outbound traffic. .It Cm wme Enable Wireless Multimedia Extensions (WME) support, if available, for the specified interface. WME is a subset of the IEEE 802.11e standard to support the efficient communication of realtime and multimedia data. To disable WME support, use .Fl wme . Another name for this parameter is .Cm wmm . .Pp The following parameters are meaningful only when WME support is in use. Parameters are specified per-AC (Access Category) and split into those that are used by a station when acting as an access point and those for client stations in the BSS. The latter are received from the access point and may not be changed (at the station). The following Access Categories are recognized: .Pp .Bl -tag -width ".Cm AC_BK" -compact .It Cm AC_BE (or .Cm BE ) best effort delivery, .It Cm AC_BK (or .Cm BK ) background traffic, .It Cm AC_VI (or .Cm VI ) video traffic, .It Cm AC_VO (or .Cm VO ) voice traffic. .El .Pp AC parameters are case-insensitive. Traffic classification is done in the operating system using the vlan priority associated with data frames or the ToS (Type of Service) indication in IP-encapsulated frames. If neither information is present, traffic is assigned to the Best Effort (BE) category. .Bl -tag -width indent .It Cm ack Ar ac Set the ACK policy for QoS transmissions by the local station; this controls whether or not data frames transmitted by a station require an ACK response from the receiving station. To disable waiting for an ACK use .Fl ack . This parameter is applied only to the local station. .It Cm acm Ar ac Enable the Admission Control Mandatory (ACM) mechanism for transmissions by the local station. To disable the ACM use .Fl acm . On stations in a BSS this parameter is read-only and indicates the setting received from the access point. NB: ACM is not supported right now. .It Cm aifs Ar ac Ar count Set the Arbitration Inter Frame Spacing (AIFS) channel access parameter to use for transmissions by the local station. On stations in a BSS this parameter is read-only and indicates the setting received from the access point. .It Cm cwmin Ar ac Ar count Set the CWmin channel access parameter to use for transmissions by the local station. On stations in a BSS this parameter is read-only and indicates the setting received from the access point. .It Cm cwmax Ar ac Ar count Set the CWmax channel access parameter to use for transmissions by the local station. On stations in a BSS this parameter is read-only and indicates the setting received from the access point. .It Cm txoplimit Ar ac Ar limit Set the Transmission Opportunity Limit channel access parameter to use for transmissions by the local station. This parameter defines an interval of time when a WME station has the right to initiate transmissions onto the wireless medium. On stations in a BSS this parameter is read-only and indicates the setting received from the access point. .It Cm bss:aifs Ar ac Ar count Set the AIFS channel access parameter to send to stations in a BSS. This parameter is meaningful only when operating in ap mode. .It Cm bss:cwmin Ar ac Ar count Set the CWmin channel access parameter to send to stations in a BSS. This parameter is meaningful only when operating in ap mode. .It Cm bss:cwmax Ar ac Ar count Set the CWmax channel access parameter to send to stations in a BSS. This parameter is meaningful only when operating in ap mode. .It Cm bss:txoplimit Ar ac Ar limit Set the TxOpLimit channel access parameter to send to stations in a BSS. This parameter is meaningful only when operating in ap mode. .El .It Cm wps Enable Wireless Privacy Subscriber support. Note that WPS support requires a WPS-capable supplicant. To disable this function use .Fl wps . .El .Ss MAC-Based Access Control List Parameters The following parameters support an optional access control list feature available with some adapters when operating in ap mode; see .Xr wlan_acl 4 . This facility allows an access point to accept/deny association requests based on the MAC address of the station. Note that this feature does not significantly enhance security as MAC address spoofing is easy to do. .Bl -tag -width indent .It Cm mac:add Ar address Add the specified MAC address to the database. Depending on the policy setting association requests from the specified station will be allowed or denied. .It Cm mac:allow Set the ACL policy to permit association only by stations registered in the database. .It Cm mac:del Ar address Delete the specified MAC address from the database. .It Cm mac:deny Set the ACL policy to deny association only by stations registered in the database. .It Cm mac:kick Ar address Force the specified station to be deauthenticated. This typically is done to block a station after updating the address database. .It Cm mac:open Set the ACL policy to allow all stations to associate. .It Cm mac:flush Delete all entries in the database. .It Cm mac:radius Set the ACL policy to permit association only by stations approved by a RADIUS server. Note that this feature requires the .Xr hostapd 8 program be configured to do the right thing as it handles the RADIUS processing (and marks stations as authorized). .El .Ss Mesh Mode Wireless Interface Parameters The following parameters are related to a wireless interface operating in mesh mode: .Bl -tag -width indent .It Cm meshid Ar meshid Set the desired Mesh Identifier. The Mesh ID is a string up to 32 characters in length. A mesh interface must have a Mesh Identifier specified to reach an operational state. .It Cm meshttl Ar ttl Set the desired .Dq time to live for mesh forwarded packets; this is the number of hops a packet may be forwarded before it is discarded. The default setting for .Cm meshttl is 31. .It Cm meshpeering Enable or disable peering with neighbor mesh stations. Stations must peer before any data packets can be exchanged. By default .Cm meshpeering is enabled. .It Cm meshforward Enable or disable forwarding packets by a mesh interface. By default .Cm meshforward is enabled. .It Cm meshgate This attribute specifies whether or not the mesh STA activates mesh gate announcements. By default .Cm meshgate is disabled. .It Cm meshmetric Ar protocol Set the specified .Ar protocol as the link metric protocol used on a mesh network. The default protocol is called .Ar AIRTIME . The mesh interface will restart after changing this setting. .It Cm meshpath Ar protocol Set the specified .Ar protocol as the path selection protocol used on a mesh network. The only available protocol at the moment is called .Ar HWMP (Hybrid Wireless Mesh Protocol). The mesh interface will restart after changing this setting. .It Cm hwmprootmode Ar mode Stations on a mesh network can operate as .Dq root nodes . Root nodes try to find paths to all mesh nodes and advertise themselves regularly. When there is a root mesh node on a network, other mesh nodes can setup paths between themselves faster because they can use the root node to find the destination. This path may not be the best, but on-demand routing will eventually find the best path. The following modes are recognized: .Pp .Bl -tag -width ".Cm PROACTIVE" -compact .It Cm DISABLED Disable root mode. .It Cm NORMAL Send broadcast path requests every two seconds. Nodes on the mesh without a path to this root mesh station with try to discover a path to us. .It Cm PROACTIVE Send broadcast path requests every two seconds and every node must reply with a path reply even if it already has a path to this root mesh station. .It Cm RANN Send broadcast root announcement (RANN) frames. Nodes on the mesh without a path to this root mesh station with try to discover a path to us. .El By default .Cm hwmprootmode is set to .Ar DISABLED . .It Cm hwmpmaxhops Ar cnt Set the maximum number of hops allowed in an HMWP path to .Ar cnt . The default setting for .Cm hwmpmaxhops is 31. .El .Ss Compatibility Parameters The following parameters are for compatibility with other systems: .Bl -tag -width indent .It Cm nwid Ar ssid Another name for the .Cm ssid parameter. Included for .Nx compatibility. .It Cm stationname Ar name Set the name of this station. The station name is not part of the IEEE 802.11 protocol though some interfaces support it. As such it only seems to be meaningful to identical or virtually identical equipment. Setting the station name is identical in syntax to setting the SSID. One can also use .Cm station for .Bsx compatibility. .It Cm wep Another way of saying .Cm wepmode on . Included for .Bsx compatibility. .It Fl wep Another way of saying .Cm wepmode off . Included for .Bsx compatibility. .It Cm nwkey key Another way of saying: .Dq Li "wepmode on weptxkey 1 wepkey 1:key wepkey 2:- wepkey 3:- wepkey 4:-" . Included for .Nx compatibility. .It Cm nwkey Xo .Sm off .Ar n : k1 , k2 , k3 , k4 .Sm on .Xc Another way of saying .Dq Li "wepmode on weptxkey n wepkey 1:k1 wepkey 2:k2 wepkey 3:k3 wepkey 4:k4" . Included for .Nx compatibility. .It Fl nwkey Another way of saying .Cm wepmode off . Included for .Nx compatibility. .El .Ss Bridge Interface Parameters The following parameters are specific to bridge interfaces: .Bl -tag -width indent .It Cm addm Ar interface Add the interface named by .Ar interface as a member of the bridge. The interface is put into promiscuous mode so that it can receive every packet sent on the network. .It Cm deletem Ar interface Remove the interface named by .Ar interface from the bridge. Promiscuous mode is disabled on the interface when it is removed from the bridge. .It Cm maxaddr Ar size Set the size of the bridge address cache to .Ar size . The default is 2000 entries. .It Cm timeout Ar seconds Set the timeout of address cache entries to .Ar seconds seconds. If .Ar seconds is zero, then address cache entries will not be expired. The default is 1200 seconds. .It Cm addr Display the addresses that have been learned by the bridge. .It Cm static Ar interface-name Ar address Add a static entry into the address cache pointing to .Ar interface-name . Static entries are never aged out of the cache or re-placed, even if the address is seen on a different interface. .It Cm deladdr Ar address Delete .Ar address from the address cache. .It Cm flush Delete all dynamically-learned addresses from the address cache. .It Cm flushall Delete all addresses, including static addresses, from the address cache. .It Cm discover Ar interface Mark an interface as a .Dq discovering interface. When the bridge has no address cache entry (either dynamic or static) for the destination address of a packet, the bridge will forward the packet to all member interfaces marked as .Dq discovering . This is the default for all interfaces added to a bridge. .It Cm -discover Ar interface Clear the .Dq discovering attribute on a member interface. For packets without the .Dq discovering attribute, the only packets forwarded on the interface are broadcast or multicast packets and packets for which the destination address is known to be on the interface's segment. .It Cm learn Ar interface Mark an interface as a .Dq learning interface. When a packet arrives on such an interface, the source address of the packet is entered into the address cache as being a destination address on the interface's segment. This is the default for all interfaces added to a bridge. .It Cm -learn Ar interface Clear the .Dq learning attribute on a member interface. .It Cm sticky Ar interface Mark an interface as a .Dq sticky interface. Dynamically learned address entries are treated at static once entered into the cache. Sticky entries are never aged out of the cache or replaced, even if the address is seen on a different interface. .It Cm -sticky Ar interface Clear the .Dq sticky attribute on a member interface. .It Cm private Ar interface Mark an interface as a .Dq private interface. A private interface does not forward any traffic to any other port that is also a private interface. .It Cm -private Ar interface Clear the .Dq private attribute on a member interface. .It Cm span Ar interface Add the interface named by .Ar interface as a span port on the bridge. Span ports transmit a copy of every frame received by the bridge. This is most useful for snooping a bridged network passively on another host connected to one of the span ports of the bridge. .It Cm -span Ar interface Delete the interface named by .Ar interface from the list of span ports of the bridge. .It Cm stp Ar interface Enable Spanning Tree protocol on .Ar interface . The .Xr if_bridge 4 driver has support for the IEEE 802.1D Spanning Tree protocol (STP). Spanning Tree is used to detect and remove loops in a network topology. .It Cm -stp Ar interface Disable Spanning Tree protocol on .Ar interface . This is the default for all interfaces added to a bridge. .It Cm edge Ar interface Set .Ar interface as an edge port. An edge port connects directly to end stations cannot create bridging loops in the network, this allows it to transition straight to forwarding. .It Cm -edge Ar interface Disable edge status on .Ar interface . .It Cm autoedge Ar interface Allow .Ar interface to automatically detect edge status. This is the default for all interfaces added to a bridge. .It Cm -autoedge Ar interface Disable automatic edge status on .Ar interface . .It Cm ptp Ar interface Set the .Ar interface as a point to point link. This is required for straight transitions to forwarding and should be enabled on a direct link to another RSTP capable switch. .It Cm -ptp Ar interface Disable point to point link status on .Ar interface . This should be disabled for a half duplex link and for an interface connected to a shared network segment, like a hub or a wireless network. .It Cm autoptp Ar interface Automatically detect the point to point status on .Ar interface by checking the full duplex link status. This is the default for interfaces added to the bridge. .It Cm -autoptp Ar interface Disable automatic point to point link detection on .Ar interface . .It Cm maxage Ar seconds Set the time that a Spanning Tree protocol configuration is valid. The default is 20 seconds. The minimum is 6 seconds and the maximum is 40 seconds. .It Cm fwddelay Ar seconds Set the time that must pass before an interface begins forwarding packets when Spanning Tree is enabled. The default is 15 seconds. The minimum is 4 seconds and the maximum is 30 seconds. .It Cm hellotime Ar seconds Set the time between broadcasting of Spanning Tree protocol configuration messages. The hello time may only be changed when operating in legacy stp mode. The default is 2 seconds. The minimum is 1 second and the maximum is 2 seconds. .It Cm priority Ar value Set the bridge priority for Spanning Tree. The default is 32768. The minimum is 0 and the maximum is 61440. .It Cm proto Ar value Set the Spanning Tree protocol. The default is rstp. The available options are stp and rstp. .It Cm holdcnt Ar value Set the transmit hold count for Spanning Tree. This is the number of packets transmitted before being rate limited. The default is 6. The minimum is 1 and the maximum is 10. .It Cm ifpriority Ar interface Ar value Set the Spanning Tree priority of .Ar interface to .Ar value . The default is 128. The minimum is 0 and the maximum is 240. .It Cm ifpathcost Ar interface Ar value Set the Spanning Tree path cost of .Ar interface to .Ar value . The default is calculated from the link speed. To change a previously selected path cost back to automatic, set the cost to 0. The minimum is 1 and the maximum is 200000000. .It Cm ifmaxaddr Ar interface Ar size Set the maximum number of hosts allowed from an interface, packets with unknown source addresses are dropped until an existing host cache entry expires or is removed. Set to 0 to disable. .El .Ss Link Aggregation and Link Failover Parameters The following parameters are specific to lagg interfaces: .Bl -tag -width indent .It Cm laggtype Ar type When creating a lagg interface the type can be specified as either .Cm ethernet or .Cm infiniband . If not specified ethernet is the default lagg type. .It Cm laggport Ar interface Add the interface named by .Ar interface as a port of the aggregation interface. .It Cm -laggport Ar interface Remove the interface named by .Ar interface from the aggregation interface. .It Cm laggproto Ar proto Set the aggregation protocol. The default is .Li failover . The available options are .Li failover , .Li lacp , .Li loadbalance , .Li roundrobin , .Li broadcast and .Li none . .It Cm lagghash Ar option Ns Oo , Ns Ar option Oc Set the packet layers to hash for aggregation protocols which load balance. The default is .Dq l2,l3,l4 . The options can be combined using commas. .Pp .Bl -tag -width ".Cm l2" -compact .It Cm l2 src/dst mac address and optional vlan number. .It Cm l3 src/dst address for IPv4 or IPv6. .It Cm l4 src/dst port for TCP/UDP/SCTP. .El .It Cm -use_flowid Enable local hash computation for RSS hash on the interface. The .Li loadbalance and .Li lacp modes will use the RSS hash from the network card if available to avoid computing one, this may give poor traffic distribution if the hash is invalid or uses less of the protocol header information. .Cm -use_flowid disables use of RSS hash from the network card. The default value can be set via the .Va net.link.lagg.default_use_flowid .Xr sysctl 8 variable. .Li 0 means .Dq disabled and .Li 1 means .Dq enabled . .It Cm use_flowid Use the RSS hash from the network card if available. .It Cm flowid_shift Ar number Set a shift parameter for RSS local hash computation. Hash is calculated by using flowid bits in a packet header mbuf which are shifted by the number of this parameter. .It Cm use_numa Enable selection of egress ports based on the native .Xr NUMA 4 domain for the packets being transmitted. This is currently only implemented for lacp mode. This works only on .Xr NUMA 4 hardware, running a kernel compiled with the .Xr NUMA 4 option, and when interfaces from multiple .Xr NUMA 4 domains are ports of the aggregation interface. .It Cm -use_numa Disable selection of egress ports based on the native .Xr NUMA 4 domain for the packets being transmitted. .It Cm lacp_fast_timeout Enable lacp fast-timeout on the interface. .It Cm -lacp_fast_timeout Disable lacp fast-timeout on the interface. .It Cm lacp_strict Enable lacp strict compliance on the interface. The default value can be set via the .Va net.link.lagg.lacp.default_strict_mode .Xr sysctl 8 variable. .Li 0 means .Dq disabled and .Li 1 means .Dq enabled . .It Cm -lacp_strict Disable lacp strict compliance on the interface. .It Cm rr_limit Ar number Configure a stride for an interface in round-robin mode. The default stride is 1. .El .Ss Generic IP Tunnel Parameters The following parameters apply to IP tunnel interfaces, .Xr gif 4 : .Bl -tag -width indent .It Cm tunnel Ar src_addr dest_addr Configure the physical source and destination address for IP tunnel interfaces. The arguments .Ar src_addr and .Ar dest_addr are interpreted as the outer source/destination for the encapsulating IPv4/IPv6 header. .It Fl tunnel Unconfigure the physical source and destination address for IP tunnel interfaces previously configured with .Cm tunnel . .It Cm deletetunnel Another name for the .Fl tunnel parameter. .It Cm accept_rev_ethip_ver Set a flag to accept both correct EtherIP packets and ones with reversed version field. Enabled by default. This is for backward compatibility with .Fx 6.1 , 6.2, 6.3, 7.0, and 7.1. .It Cm -accept_rev_ethip_ver Clear a flag .Cm accept_rev_ethip_ver . .It Cm ignore_source Set a flag to accept encapsulated packets destined to this host independently from source address. This may be useful for hosts, that receive encapsulated packets from the load balancers. .It Cm -ignore_source Clear a flag .Cm ignore_source . .It Cm send_rev_ethip_ver Set a flag to send EtherIP packets with reversed version field intentionally. Disabled by default. This is for backward compatibility with .Fx 6.1 , 6.2, 6.3, 7.0, and 7.1. .It Cm -send_rev_ethip_ver Clear a flag .Cm send_rev_ethip_ver . .El .Ss GRE Tunnel Parameters The following parameters apply to GRE tunnel interfaces, .Xr gre 4 : .Bl -tag -width indent .It Cm tunnel Ar src_addr dest_addr Configure the physical source and destination address for GRE tunnel interfaces. The arguments .Ar src_addr and .Ar dest_addr are interpreted as the outer source/destination for the encapsulating IPv4/IPv6 header. .It Fl tunnel Unconfigure the physical source and destination address for GRE tunnel interfaces previously configured with .Cm tunnel . .It Cm deletetunnel Another name for the .Fl tunnel parameter. .It Cm grekey Ar key Configure the GRE key to be used for outgoing packets. Note that .Xr gre 4 will always accept GRE packets with invalid or absent keys. This command will result in a four byte MTU reduction on the interface. .El .Ss Packet Filter State Table Sychronisation Parameters The following parameters are specific to .Xr pfsync 4 interfaces: .Bl -tag -width indent .It Cm syncdev Ar iface Use the specified interface to send and receive pfsync state synchronisation messages. .It Fl syncdev Stop sending pfsync state synchronisation messages over the network. .It Cm syncpeer Ar peer_address Make the pfsync link point-to-point rather than using multicast to broadcast the state synchronisation messages. The peer_address is the IP address of the other host taking part in the pfsync cluster. .It Fl syncpeer Broadcast the packets using multicast. .It Cm maxupd Ar n Set the maximum number of updates for a single state which can be collapsed into one. This is an 8-bit number; the default value is 128. .It Cm defer Defer transmission of the first packet in a state until a peer has acknowledged that the associated state has been inserted. .It Fl defer Do not defer the first packet in a state. This is the default. .El .Ss VLAN Parameters The following parameters are specific to .Xr vlan 4 interfaces: .Bl -tag -width indent .It Cm vlan Ar vlan_tag Set the VLAN tag value to .Ar vlan_tag . This value is a 12-bit VLAN Identifier (VID) which is used to create an 802.1Q or 802.1ad VLAN header for packets sent from the .Xr vlan 4 interface. Note that .Cm vlan and .Cm vlandev must both be set at the same time. .It Cm vlanproto Ar vlan_proto Set the VLAN encapsulation protocol to .Ar vlan_proto . Supported encapsulation protocols are currently .Dq 802.1Q and .Dq 802.1ad . The default encapsulation protocol is .Dq 802.1Q . The .Dq 802.1ad protocol is also commonly known as .Dq QinQ ; either name can be used. .It Cm vlanpcp Ar priority_code_point Priority code point .Pq Dv PCP is an 3-bit field which refers to the IEEE 802.1p class of service and maps to the frame priority level. .Pp Values in order of priority are: .Cm 1 .Pq Dv Background (lowest) , .Cm 0 .Pq Dv Best effort (default) , .Cm 2 .Pq Dv Excellent effort , .Cm 3 .Pq Dv Critical applications , .Cm 4 .Pq Dv Video, < 100ms latency and jitter , .Cm 5 .Pq Dv Voice, < 10ms latency and jitter , .Cm 6 .Pq Dv Internetwork control , .Cm 7 .Pq Dv Network control (highest) . .It Cm vlandev Ar iface Associate the physical interface .Ar iface with a .Xr vlan 4 interface. Packets transmitted through the .Xr vlan 4 interface will be diverted to the specified physical interface .Ar iface with 802.1Q VLAN encapsulation. Packets with 802.1Q encapsulation received by the parent interface with the correct VLAN Identifier will be diverted to the associated .Xr vlan 4 pseudo-interface. The .Xr vlan 4 interface is assigned a copy of the parent interface's flags and the parent's Ethernet address. The .Cm vlandev and .Cm vlan must both be set at the same time. If the .Xr vlan 4 interface already has a physical interface associated with it, this command will fail. To change the association to another physical interface, the existing association must be cleared first. .Pp Note: if the hardware tagging capability is set on the parent interface, the .Xr vlan 4 pseudo interface's behavior changes: the .Xr vlan 4 interface recognizes that the parent interface supports insertion and extraction of VLAN tags on its own (usually in firmware) and that it should pass packets to and from the parent unaltered. .It Fl vlandev Op Ar iface If the driver is a .Xr vlan 4 pseudo device, disassociate the parent interface from it. This breaks the link between the .Xr vlan 4 interface and its parent, clears its VLAN Identifier, flags and its link address and shuts the interface down. The .Ar iface argument is useless and hence deprecated. .El .Ss Virtual eXtensible LAN Parameters The following parameters are used to configure .Xr vxlan 4 interfaces. .Bl -tag -width indent .It Cm vxlanid Ar identifier This value is a 24-bit VXLAN Network Identifier (VNI) that identifies the virtual network segment membership of the interface. .It Cm vxlanlocal Ar address The source address used in the encapsulating IPv4/IPv6 header. The address should already be assigned to an existing interface. When the interface is configured in unicast mode, the listening socket is bound to this address. .It Cm vxlanremote Ar address The interface can be configured in a unicast, or point-to-point, mode to create a tunnel between two hosts. This is the IP address of the remote end of the tunnel. .It Cm vxlangroup Ar address The interface can be configured in a multicast mode to create a virtual network of hosts. This is the IP multicast group address the interface will join. .It Cm vxlanlocalport Ar port The port number the interface will listen on. The default port number is 4789. .It Cm vxlanremoteport Ar port The destination port number used in the encapsulating IPv4/IPv6 header. The remote host should be listening on this port. The default port number is 4789. Note some other implementations, such as Linux, do not default to the IANA assigned port, but instead listen on port 8472. .It Cm vxlanportrange Ar low high The range of source ports used in the encapsulating IPv4/IPv6 header. The port selected within the range is based on a hash of the inner frame. A range is useful to provide entropy within the outer IP header for more effective load balancing. The default range is between the .Xr sysctl 8 variables .Va net.inet.ip.portrange.first and .Va net.inet.ip.portrange.last .It Cm vxlantimeout Ar timeout The maximum time, in seconds, before an entry in the forwarding table is pruned. The default is 1200 seconds (20 minutes). .It Cm vxlanmaxaddr Ar max The maximum number of entries in the forwarding table. The default is 2000. .It Cm vxlandev Ar dev When the interface is configured in multicast mode, the .Cm dev interface is used to transmit IP multicast packets. .It Cm vxlanttl Ar ttl The TTL used in the encapsulating IPv4/IPv6 header. The default is 64. .It Cm vxlanlearn The source IP address and inner source Ethernet MAC address of received packets are used to dynamically populate the forwarding table. When in multicast mode, an entry in the forwarding table allows the interface to send the frame directly to the remote host instead of broadcasting the frame to the multicast group. This is the default. .It Fl vxlanlearn The forwarding table is not populated by received packets. .It Cm vxlanflush Delete all dynamically-learned addresses from the forwarding table. .It Cm vxlanflushall Delete all addresses, including static addresses, from the forwarding table. .El .Ss CARP Parameters The following parameters are used to configure .Xr carp 4 protocol on an interface: .Bl -tag -width indent .It Cm vhid Ar n Set the virtual host ID. This is a required setting to initiate .Xr carp 4 . If the virtual host ID does not exist yet, it is created and attached to the interface, otherwise configuration of an existing vhid is adjusted. If the .Cm vhid keyword is supplied along with an .Dq inet6 or .Dq inet address, then this address is configured to be run under control of the specified vhid. Whenever a last address that refers to a particular vhid is removed from an interface, the vhid is automatically removed from interface and destroyed. Any other configuration parameters for the .Xr carp 4 protocol should be supplied along with the .Cm vhid keyword. Acceptable values for vhid are 1 to 255. .It Cm advbase Ar seconds Specifies the base of the advertisement interval in seconds. The acceptable values are 1 to 255. The default value is 1. .It Cm advskew Ar interval Specifies the skew to add to the base advertisement interval to make one host advertise slower than another host. It is specified in 1/256 of seconds. The acceptable values are 1 to 254. The default value is 0. .It Cm pass Ar phrase Set the authentication key to .Ar phrase . .It Cm state Ar state Forcibly change state of a given vhid. The following states are recognized: .Cm MASTER and .Cm BACKUP . .El .Sh ENVIRONMENT The following environment variables affect the execution of .Nm : .Bl -tag -width IFCONFIG_FORMAT .It Ev IFCONFIG_FORMAT This variable can contain a specification of the output format. See the description of the .Fl f flag for more details. .El .Sh EXAMPLES Assign the IPv4 address .Li 192.0.2.10 , with a network mask of .Li 255.255.255.0 , to the interface .Li em0 : .Dl # ifconfig em0 inet 192.0.2.10 netmask 255.255.255.0 .Pp Add the IPv4 address .Li 192.0.2.45 , with the CIDR network prefix .Li /28 , to the interface .Li em0 : .Dl # ifconfig em0 inet 192.0.2.45/28 alias .Pp Remove the IPv4 address .Li 192.0.2.45 from the interface .Li em0 : .Dl # ifconfig em0 inet 192.0.2.45 -alias .Pp Enable IPv6 functionality of the interface: .Dl # ifconfig em0 inet6 -ifdisabled .Pp Add the IPv6 address .Li 2001:DB8:DBDB::123/48 to the interface .Li em0 : .Dl # ifconfig em0 inet6 2001:db8:bdbd::123 prefixlen 48 alias Note that lower case hexadecimal IPv6 addresses are acceptable. .Pp Remove the IPv6 address added in the above example, using the .Li / character as shorthand for the network prefix: .Dl # ifconfig em0 inet6 2001:db8:bdbd::123/48 -alias .Pp Configure a single CARP redundant address on igb0, and then switch it to be master: .Bd -literal -offset indent -compact # ifconfig igb0 vhid 1 10.0.0.1/24 pass foobar up # ifconfig igb0 vhid 1 state master .Ed .Pp Configure the interface .Li xl0 , to use 100baseTX, full duplex Ethernet media options: .Dl # ifconfig xl0 media 100baseTX mediaopt full-duplex .Pp Label the em0 interface as an uplink: .Dl # ifconfig em0 description \&"Uplink to Gigabit Switch 2\&" .Pp Create the software network interface .Li gif1 : .Dl # ifconfig gif1 create .Pp Destroy the software network interface .Li gif1 : .Dl # ifconfig gif1 destroy .Pp Display available wireless networks using .Li wlan0 : .Dl # ifconfig wlan0 list scan .Pp Display inet and inet6 address subnet masks in CIDR notation .Dl # ifconfig -f inet:cidr,inet6:cidr .Pp Display interfaces that are up with the exception of loopback .Dl # ifconfig -a -u -G lo .Pp Display a list of interface names beloning to the wlan group: .Bd -literal -offset indent -compact # ifconfig -g wlan wlan0 wlan1 .Ed .Pp Display details about the interfaces belonging to the wlan group: .Bd -literal -offset indent -compact # ifconfig -a -g wlan wlan0: flags=8843 metric 0 mtu 1500 ether 75:4c:61:6b:7a:73 inet6 fe80::4c75:636a:616e:ffd8%wlan0 prefixlen 64 scopeid 0x3 inet6 2001:5761:6e64:6152:6f6d:616e:fea4:ffe2 prefixlen 64 autoconf inet 192.168.10.5 netmask 0xffffff00 broadcast 192.168.10.255 groups: wlan ssid "Hotspot" channel 11 (2462 MHz 11g) bssid 12:34:ff:ff:43:21 regdomain ETSI country DE authmode WPA2/802.11i privacy ON deftxkey UNDEF AES-CCM 2:128-bit AES-CCM 3:128-bit txpower 30 bmiss 10 scanvalid 60 protmode CTS wme roaming MANUAL parent interface: iwm0 media: IEEE 802.11 Wireless Ethernet DS/2Mbps mode 11g status: associated nd6 options=23 wlan1: flags=8843 metric 0 mtu 1500 ether 00:50:69:6f:74:72 groups: wlan ssid "" channel 2 (2417 MHz 11g) regdomain FCC country US authmode OPEN privacy OFF txpower 30 bmiss 7 scanvalid 60 bgscan bgscanintvl 300 bgscanidle 250 roam:rssi 7 roam:rate 5 protmode CTS wme bintval 0 parent interface: rum0 media: IEEE 802.11 Wireless Ethernet autoselect (autoselect) status: no carrier nd6 options=29 .Ed .Pp Set a randomly-generated MAC address on tap0: .Dl # ifconfig tap0 ether random .Sh DIAGNOSTICS Messages indicating the specified interface does not exist, the requested address is unknown, or the user is not privileged and tried to alter an interface's configuration. .Sh SEE ALSO .Xr netstat 1 , .Xr carp 4 , .Xr gif 4 , .Xr netintro 4 , .Xr pfsync 4 , .Xr polling 4 , .Xr vlan 4 , .Xr vxlan 4 , .Xr devd.conf 5 , .Xr devd 8 , .Xr jail 8 , .Xr rc 8 , .Xr routed 8 , .Xr sysctl 8 .Sh HISTORY The .Nm utility appeared in .Bx 4.2 . .Sh BUGS Basic IPv6 node operation requires a link-local address on each interface configured for IPv6. Normally, such an address is automatically configured by the kernel on each interface added to the system or enabled; this behavior may be disabled by setting per-interface flag .Cm -auto_linklocal . The default value of this flag is 1 and can be disabled by using the sysctl MIB variable .Va net.inet6.ip6.auto_linklocal . .Pp Do not configure IPv6 addresses with no link-local address by using .Nm . It can result in unexpected behaviors of the kernel. diff --git a/sys/net/if_vxlan.c b/sys/net/if_vxlan.c index 99efbe255695..a1e925195d74 100644 --- a/sys/net/if_vxlan.c +++ b/sys/net/if_vxlan.c @@ -1,3663 +1,3686 @@ /*- * Copyright (c) 2014, Bryan Venteicher * All rights reserved. * Copyright (c) 2020, Chelsio Communications. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct vxlan_softc; LIST_HEAD(vxlan_softc_head, vxlan_softc); struct sx vxlan_sx; SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock"); struct vxlan_socket_mc_info { union vxlan_sockaddr vxlsomc_saddr; union vxlan_sockaddr vxlsomc_gaddr; int vxlsomc_ifidx; int vxlsomc_users; }; /* * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet. */ #define VXLAN_MAX_MTU (IP_MAXPACKET - \ 60 /* Maximum IPv4 header len */ - \ sizeof(struct udphdr) - \ sizeof(struct vxlan_header) - \ ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU) #define VXLAN_SO_MC_MAX_GROUPS 32 #define VXLAN_SO_VNI_HASH_SHIFT 6 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT) #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE) struct vxlan_socket { struct socket *vxlso_sock; struct rmlock vxlso_lock; u_int vxlso_refcnt; union vxlan_sockaddr vxlso_laddr; LIST_ENTRY(vxlan_socket) vxlso_entry; struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE]; struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS]; }; #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p)) #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p)) #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock) #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock) #define VXLAN_SO_LOCK_ASSERT(_vso) \ rm_assert(&(_vso)->vxlso_lock, RA_LOCKED) #define VXLAN_SO_LOCK_WASSERT(_vso) \ rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED) #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt) #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt) struct vxlan_ftable_entry { LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash; uint16_t vxlfe_flags; uint8_t vxlfe_mac[ETHER_ADDR_LEN]; union vxlan_sockaddr vxlfe_raddr; time_t vxlfe_expire; }; #define VXLAN_FE_FLAG_DYNAMIC 0x01 #define VXLAN_FE_FLAG_STATIC 0x02 #define VXLAN_FE_IS_DYNAMIC(_fe) \ ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC) #define VXLAN_SC_FTABLE_SHIFT 9 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT) #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1) #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \ (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE) LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry); struct vxlan_statistics { uint32_t ftable_nospace; uint32_t ftable_lock_upgrade_failed; counter_u64_t txcsum; counter_u64_t tso; counter_u64_t rxcsum; }; struct vxlan_softc { struct ifnet *vxl_ifp; int vxl_reqcap; + u_int vxl_fibnum; struct vxlan_socket *vxl_sock; uint32_t vxl_vni; union vxlan_sockaddr vxl_src_addr; union vxlan_sockaddr vxl_dst_addr; uint32_t vxl_flags; #define VXLAN_FLAG_INIT 0x0001 #define VXLAN_FLAG_TEARDOWN 0x0002 #define VXLAN_FLAG_LEARN 0x0004 #define VXLAN_FLAG_USER_MTU 0x0008 uint32_t vxl_port_hash_key; uint16_t vxl_min_port; uint16_t vxl_max_port; uint8_t vxl_ttl; /* Lookup table from MAC address to forwarding entry. */ uint32_t vxl_ftable_cnt; uint32_t vxl_ftable_max; uint32_t vxl_ftable_timeout; uint32_t vxl_ftable_hash_key; struct vxlan_ftable_head *vxl_ftable; /* Derived from vxl_dst_addr. */ struct vxlan_ftable_entry vxl_default_fe; struct ip_moptions *vxl_im4o; struct ip6_moptions *vxl_im6o; struct rmlock vxl_lock; volatile u_int vxl_refcnt; int vxl_unit; int vxl_vso_mc_index; struct vxlan_statistics vxl_stats; struct sysctl_oid *vxl_sysctl_node; struct sysctl_ctx_list vxl_sysctl_ctx; struct callout vxl_callout; struct ether_addr vxl_hwaddr; int vxl_mc_ifindex; struct ifnet *vxl_mc_ifp; struct ifmedia vxl_media; char vxl_mc_ifname[IFNAMSIZ]; LIST_ENTRY(vxlan_softc) vxl_entry; LIST_ENTRY(vxlan_softc) vxl_ifdetach_list; /* For rate limiting errors on the tx fast path. */ struct timeval err_time; int err_pps; }; #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p)) #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p)) #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock) #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock) #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock) #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED) #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED) #define VXLAN_UNLOCK(_sc, _p) do { \ if (VXLAN_LOCK_WOWNED(_sc)) \ VXLAN_WUNLOCK(_sc); \ else \ VXLAN_RUNLOCK(_sc, _p); \ } while (0) #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt) #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt) #define satoconstsin(sa) ((const struct sockaddr_in *)(sa)) #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa)) struct vxlanudphdr { struct udphdr vxlh_udp; struct vxlan_header vxlh_hdr; } __packed; static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *); static void vxlan_ftable_init(struct vxlan_softc *); static void vxlan_ftable_fini(struct vxlan_softc *); static void vxlan_ftable_flush(struct vxlan_softc *, int); static void vxlan_ftable_expire(struct vxlan_softc *); static int vxlan_ftable_update_locked(struct vxlan_softc *, const union vxlan_sockaddr *, const uint8_t *, struct rm_priotracker *); static int vxlan_ftable_learn(struct vxlan_softc *, const struct sockaddr *, const uint8_t *); static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS); static struct vxlan_ftable_entry * vxlan_ftable_entry_alloc(void); static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *); static void vxlan_ftable_entry_init(struct vxlan_softc *, struct vxlan_ftable_entry *, const uint8_t *, const struct sockaddr *, uint32_t); static void vxlan_ftable_entry_destroy(struct vxlan_softc *, struct vxlan_ftable_entry *); static int vxlan_ftable_entry_insert(struct vxlan_softc *, struct vxlan_ftable_entry *); static struct vxlan_ftable_entry * vxlan_ftable_entry_lookup(struct vxlan_softc *, const uint8_t *); static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *, struct sbuf *); static struct vxlan_socket * vxlan_socket_alloc(const union vxlan_sockaddr *); static void vxlan_socket_destroy(struct vxlan_socket *); static void vxlan_socket_release(struct vxlan_socket *); static struct vxlan_socket * vxlan_socket_lookup(union vxlan_sockaddr *vxlsa); static void vxlan_socket_insert(struct vxlan_socket *); static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *); static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *); static int vxlan_socket_create(struct ifnet *, int, const union vxlan_sockaddr *, struct vxlan_socket **); static void vxlan_socket_ifdetach(struct vxlan_socket *, struct ifnet *, struct vxlan_softc_head *); static struct vxlan_socket * vxlan_socket_mc_lookup(const union vxlan_sockaddr *); static int vxlan_sockaddr_mc_info_match( const struct vxlan_socket_mc_info *, const union vxlan_sockaddr *, const union vxlan_sockaddr *, int); static int vxlan_socket_mc_join_group(struct vxlan_socket *, const union vxlan_sockaddr *, const union vxlan_sockaddr *, int *, union vxlan_sockaddr *); static int vxlan_socket_mc_leave_group(struct vxlan_socket *, const union vxlan_sockaddr *, const union vxlan_sockaddr *, int); static int vxlan_socket_mc_add_group(struct vxlan_socket *, const union vxlan_sockaddr *, const union vxlan_sockaddr *, int, int *); static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *, int); static struct vxlan_softc * vxlan_socket_lookup_softc_locked(struct vxlan_socket *, uint32_t); static struct vxlan_softc * vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t); static int vxlan_socket_insert_softc(struct vxlan_socket *, struct vxlan_softc *); static void vxlan_socket_remove_softc(struct vxlan_socket *, struct vxlan_softc *); static struct ifnet * vxlan_multicast_if_ref(struct vxlan_softc *, int); static void vxlan_free_multicast(struct vxlan_softc *); static int vxlan_setup_multicast_interface(struct vxlan_softc *); static int vxlan_setup_multicast(struct vxlan_softc *); static int vxlan_setup_socket(struct vxlan_softc *); #ifdef INET6 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *); #endif static void vxlan_setup_interface_hdrlen(struct vxlan_softc *); static int vxlan_valid_init_config(struct vxlan_softc *); static void vxlan_init_wait(struct vxlan_softc *); static void vxlan_init_complete(struct vxlan_softc *); static void vxlan_init(void *); static void vxlan_release(struct vxlan_softc *); static void vxlan_teardown_wait(struct vxlan_softc *); static void vxlan_teardown_complete(struct vxlan_softc *); static void vxlan_teardown_locked(struct vxlan_softc *); static void vxlan_teardown(struct vxlan_softc *); static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *, struct vxlan_softc_head *); static void vxlan_timer(void *); static int vxlan_ctrl_get_config(struct vxlan_softc *, void *); static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *); static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *); static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *); static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *); static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *); static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *); static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *); static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *); static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *); static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *); static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *); static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *); static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *); static int vxlan_ctrl_flush(struct vxlan_softc *, void *); static int vxlan_ioctl_drvspec(struct vxlan_softc *, struct ifdrv *, int); static int vxlan_ioctl_ifflags(struct vxlan_softc *); static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); #if defined(INET) || defined(INET6) static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *); static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *, int, uint16_t, uint16_t); #endif static int vxlan_encap4(struct vxlan_softc *, const union vxlan_sockaddr *, struct mbuf *); static int vxlan_encap6(struct vxlan_softc *, const union vxlan_sockaddr *, struct mbuf *); static int vxlan_transmit(struct ifnet *, struct mbuf *); static void vxlan_qflush(struct ifnet *); static bool vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *); static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **, const struct sockaddr *); static int vxlan_stats_alloc(struct vxlan_softc *); static void vxlan_stats_free(struct vxlan_softc *); static void vxlan_set_default_config(struct vxlan_softc *); static int vxlan_set_user_config(struct vxlan_softc *, struct ifvxlanparam *); static int vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int); static void vxlan_set_hwcaps(struct vxlan_softc *); static int vxlan_clone_create(struct if_clone *, int, caddr_t); static void vxlan_clone_destroy(struct ifnet *); static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *); static int vxlan_media_change(struct ifnet *); static void vxlan_media_status(struct ifnet *, struct ifmediareq *); static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *, const struct sockaddr *); static void vxlan_sockaddr_copy(union vxlan_sockaddr *, const struct sockaddr *); static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *, const struct sockaddr *); static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *, const struct sockaddr *); static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int); static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *); static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *); static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *); static int vxlan_can_change_config(struct vxlan_softc *); static int vxlan_check_vni(uint32_t); static int vxlan_check_ttl(int); static int vxlan_check_ftable_timeout(uint32_t); static int vxlan_check_ftable_max(uint32_t); static void vxlan_sysctl_setup(struct vxlan_softc *); static void vxlan_sysctl_destroy(struct vxlan_softc *); static int vxlan_tunable_int(struct vxlan_softc *, const char *, int); static void vxlan_ifdetach_event(void *, struct ifnet *); static void vxlan_load(void); static void vxlan_unload(void); static int vxlan_modevent(module_t, int, void *); static const char vxlan_name[] = "vxlan"; static MALLOC_DEFINE(M_VXLAN, vxlan_name, "Virtual eXtensible LAN Interface"); static struct if_clone *vxlan_cloner; static struct mtx vxlan_list_mtx; #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx) #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx) static LIST_HEAD(, vxlan_socket) vxlan_socket_list; static eventhandler_tag vxlan_ifdetach_event_tag; SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Virtual eXtensible Local Area Network"); static int vxlan_legacy_port = 0; TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port); static int vxlan_reuse_port = 0; TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port); /* Default maximum number of addresses in the forwarding table. */ #ifndef VXLAN_FTABLE_MAX #define VXLAN_FTABLE_MAX 2000 #endif /* Timeout (in seconds) of addresses learned in the forwarding table. */ #ifndef VXLAN_FTABLE_TIMEOUT #define VXLAN_FTABLE_TIMEOUT (20 * 60) #endif /* * Maximum timeout (in seconds) of addresses learned in the forwarding * table. */ #ifndef VXLAN_FTABLE_MAX_TIMEOUT #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24) #endif /* Number of seconds between pruning attempts of the forwarding table. */ #ifndef VXLAN_FTABLE_PRUNE #define VXLAN_FTABLE_PRUNE (5 * 60) #endif static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE; struct vxlan_control { int (*vxlc_func)(struct vxlan_softc *, void *); int vxlc_argsize; int vxlc_flags; #define VXLAN_CTRL_FLAG_COPYIN 0x01 #define VXLAN_CTRL_FLAG_COPYOUT 0x02 #define VXLAN_CTRL_FLAG_SUSER 0x04 }; static const struct vxlan_control vxlan_control_table[] = { [VXLAN_CMD_GET_CONFIG] = { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg), VXLAN_CTRL_FLAG_COPYOUT }, [VXLAN_CMD_SET_VNI] = { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_LOCAL_ADDR] = { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_REMOTE_ADDR] = { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_LOCAL_PORT] = { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_REMOTE_PORT] = { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_PORT_RANGE] = { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_FTABLE_TIMEOUT] = { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_FTABLE_MAX] = { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_MULTICAST_IF] = { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_TTL] = { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_SET_LEARN] = { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_FTABLE_ENTRY_ADD] = { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_FTABLE_ENTRY_REM] = { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, [VXLAN_CMD_FLUSH] = { vxlan_ctrl_flush, sizeof(struct ifvxlancmd), VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, }, }; static const int vxlan_control_table_size = nitems(vxlan_control_table); static int vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b) { int i, d; for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) d = ((int)a[i]) - ((int)b[i]); return (d); } static void vxlan_ftable_init(struct vxlan_softc *sc) { int i; sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) * VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK); for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) LIST_INIT(&sc->vxl_ftable[i]); sc->vxl_ftable_hash_key = arc4random(); } static void vxlan_ftable_fini(struct vxlan_softc *sc) { int i; for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]), ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i)); } MPASS(sc->vxl_ftable_cnt == 0); free(sc->vxl_ftable, M_VXLAN); sc->vxl_ftable = NULL; } static void vxlan_ftable_flush(struct vxlan_softc *sc, int all) { struct vxlan_ftable_entry *fe, *tfe; int i; for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { if (all || VXLAN_FE_IS_DYNAMIC(fe)) vxlan_ftable_entry_destroy(sc, fe); } } } static void vxlan_ftable_expire(struct vxlan_softc *sc) { struct vxlan_ftable_entry *fe, *tfe; int i; VXLAN_LOCK_WASSERT(sc); for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { if (VXLAN_FE_IS_DYNAMIC(fe) && time_uptime >= fe->vxlfe_expire) vxlan_ftable_entry_destroy(sc, fe); } } } static int vxlan_ftable_update_locked(struct vxlan_softc *sc, const union vxlan_sockaddr *vxlsa, const uint8_t *mac, struct rm_priotracker *tracker) { struct vxlan_ftable_entry *fe; int error __unused; VXLAN_LOCK_ASSERT(sc); again: /* * A forwarding entry for this MAC address might already exist. If * so, update it, otherwise create a new one. We may have to upgrade * the lock if we have to change or create an entry. */ fe = vxlan_ftable_entry_lookup(sc, mac); if (fe != NULL) { fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; if (!VXLAN_FE_IS_DYNAMIC(fe) || vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa)) return (0); if (!VXLAN_LOCK_WOWNED(sc)) { VXLAN_RUNLOCK(sc, tracker); VXLAN_WLOCK(sc); sc->vxl_stats.ftable_lock_upgrade_failed++; goto again; } vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa); return (0); } if (!VXLAN_LOCK_WOWNED(sc)) { VXLAN_RUNLOCK(sc, tracker); VXLAN_WLOCK(sc); sc->vxl_stats.ftable_lock_upgrade_failed++; goto again; } if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) { sc->vxl_stats.ftable_nospace++; return (ENOSPC); } fe = vxlan_ftable_entry_alloc(); if (fe == NULL) return (ENOMEM); vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC); /* The prior lookup failed, so the insert should not. */ error = vxlan_ftable_entry_insert(sc, fe); MPASS(error == 0); return (0); } static int vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa, const uint8_t *mac) { struct rm_priotracker tracker; union vxlan_sockaddr vxlsa; int error; /* * The source port may be randomly selected by the remote host, so * use the port of the default destination address. */ vxlan_sockaddr_copy(&vxlsa, sa); vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { error = vxlan_sockaddr_in6_embedscope(&vxlsa); if (error) return (error); } VXLAN_RLOCK(sc, &tracker); error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker); VXLAN_UNLOCK(sc, &tracker); return (error); } static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS) { struct rm_priotracker tracker; struct sbuf sb; struct vxlan_softc *sc; struct vxlan_ftable_entry *fe; size_t size; int i, error; /* * This is mostly intended for debugging during development. It is * not practical to dump an entire large table this way. */ sc = arg1; size = PAGE_SIZE; /* Calculate later. */ sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN); sbuf_putc(&sb, '\n'); VXLAN_RLOCK(sc, &tracker); for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) { if (sbuf_error(&sb) != 0) break; vxlan_ftable_entry_dump(fe, &sb); } } VXLAN_RUNLOCK(sc, &tracker); if (sbuf_len(&sb) == 1) sbuf_setpos(&sb, 0); sbuf_finish(&sb); error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (error); } static struct vxlan_ftable_entry * vxlan_ftable_entry_alloc(void) { struct vxlan_ftable_entry *fe; fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT); return (fe); } static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe) { free(fe, M_VXLAN); } static void vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe, const uint8_t *mac, const struct sockaddr *sa, uint32_t flags) { fe->vxlfe_flags = flags; fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN); vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa); } static void vxlan_ftable_entry_destroy(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe) { sc->vxl_ftable_cnt--; LIST_REMOVE(fe, vxlfe_hash); vxlan_ftable_entry_free(fe); } static int vxlan_ftable_entry_insert(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe) { struct vxlan_ftable_entry *lfe; uint32_t hash; int dir; VXLAN_LOCK_WASSERT(sc); hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac); lfe = LIST_FIRST(&sc->vxl_ftable[hash]); if (lfe == NULL) { LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash); goto out; } do { dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac); if (dir == 0) return (EEXIST); if (dir > 0) { LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash); goto out; } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) { LIST_INSERT_AFTER(lfe, fe, vxlfe_hash); goto out; } else lfe = LIST_NEXT(lfe, vxlfe_hash); } while (lfe != NULL); out: sc->vxl_ftable_cnt++; return (0); } static struct vxlan_ftable_entry * vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac) { struct vxlan_ftable_entry *fe; uint32_t hash; int dir; VXLAN_LOCK_ASSERT(sc); hash = VXLAN_SC_FTABLE_HASH(sc, mac); LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) { dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac); if (dir == 0) return (fe); if (dir > 0) break; } return (NULL); } static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb) { char buf[64]; const union vxlan_sockaddr *sa; const void *addr; int i, len, af, width; sa = &fe->vxlfe_raddr; af = sa->sa.sa_family; len = sbuf_len(sb); sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S', fe->vxlfe_flags); for (i = 0; i < ETHER_ADDR_LEN - 1; i++) sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]); sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]); if (af == AF_INET) { addr = &sa->in4.sin_addr; width = INET_ADDRSTRLEN - 1; } else { addr = &sa->in6.sin6_addr; width = INET6_ADDRSTRLEN - 1; } inet_ntop(af, addr, buf, sizeof(buf)); sbuf_printf(sb, "%*s ", width, buf); sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire); sbuf_putc(sb, '\n'); /* Truncate a partial line. */ if (sbuf_error(sb) != 0) sbuf_setpos(sb, len); } static struct vxlan_socket * vxlan_socket_alloc(const union vxlan_sockaddr *sa) { struct vxlan_socket *vso; int i; vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO); rm_init(&vso->vxlso_lock, "vxlansorm"); refcount_init(&vso->vxlso_refcnt, 0); for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) LIST_INIT(&vso->vxlso_vni_hash[i]); vso->vxlso_laddr = *sa; return (vso); } static void vxlan_socket_destroy(struct vxlan_socket *vso) { struct socket *so; #ifdef INVARIANTS int i; struct vxlan_socket_mc_info *mc; for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { mc = &vso->vxlso_mc[i]; KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC, ("%s: socket %p mc[%d] still has address", __func__, vso, i)); } for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]), ("%s: socket %p vni_hash[%d] not empty", __func__, vso, i)); } #endif so = vso->vxlso_sock; if (so != NULL) { vso->vxlso_sock = NULL; soclose(so); } rm_destroy(&vso->vxlso_lock); free(vso, M_VXLAN); } static void vxlan_socket_release(struct vxlan_socket *vso) { int destroy; VXLAN_LIST_LOCK(); destroy = VXLAN_SO_RELEASE(vso); if (destroy != 0) LIST_REMOVE(vso, vxlso_entry); VXLAN_LIST_UNLOCK(); if (destroy != 0) vxlan_socket_destroy(vso); } static struct vxlan_socket * vxlan_socket_lookup(union vxlan_sockaddr *vxlsa) { struct vxlan_socket *vso; VXLAN_LIST_LOCK(); LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) { if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) { VXLAN_SO_ACQUIRE(vso); break; } } VXLAN_LIST_UNLOCK(); return (vso); } static void vxlan_socket_insert(struct vxlan_socket *vso) { VXLAN_LIST_LOCK(); VXLAN_SO_ACQUIRE(vso); LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry); VXLAN_LIST_UNLOCK(); } static int vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp) { struct thread *td; int error; td = curthread; error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock, SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td); if (error) { if_printf(ifp, "cannot create socket: %d\n", error); return (error); } error = udp_set_kernel_tunneling(vso->vxlso_sock, vxlan_rcv_udp_packet, NULL, vso); if (error) { if_printf(ifp, "cannot set tunneling function: %d\n", error); return (error); } if (vxlan_reuse_port != 0) { struct sockopt sopt; int val = 1; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = SO_REUSEPORT; sopt.sopt_val = &val; sopt.sopt_valsize = sizeof(val); error = sosetopt(vso->vxlso_sock, &sopt); if (error) { if_printf(ifp, "cannot set REUSEADDR socket opt: %d\n", error); return (error); } } return (0); } static int vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp) { union vxlan_sockaddr laddr; struct thread *td; int error; td = curthread; laddr = vso->vxlso_laddr; error = sobind(vso->vxlso_sock, &laddr.sa, td); if (error) { if (error != EADDRINUSE) if_printf(ifp, "cannot bind socket: %d\n", error); return (error); } return (0); } static int vxlan_socket_create(struct ifnet *ifp, int multicast, const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop) { union vxlan_sockaddr laddr; struct vxlan_socket *vso; int error; laddr = *saddr; /* * If this socket will be multicast, then only the local port * must be specified when binding. */ if (multicast != 0) { if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) laddr.in4.sin_addr.s_addr = INADDR_ANY; #ifdef INET6 else laddr.in6.sin6_addr = in6addr_any; #endif } vso = vxlan_socket_alloc(&laddr); if (vso == NULL) return (ENOMEM); error = vxlan_socket_init(vso, ifp); if (error) goto fail; error = vxlan_socket_bind(vso, ifp); if (error) goto fail; /* * There is a small window between the bind completing and * inserting the socket, so that a concurrent create may fail. * Let's not worry about that for now. */ vxlan_socket_insert(vso); *vsop = vso; return (0); fail: vxlan_socket_destroy(vso); return (error); } static void vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp, struct vxlan_softc_head *list) { struct rm_priotracker tracker; struct vxlan_softc *sc; int i; VXLAN_SO_RLOCK(vso, &tracker); for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry) vxlan_ifdetach(sc, ifp, list); } VXLAN_SO_RUNLOCK(vso, &tracker); } static struct vxlan_socket * vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa) { union vxlan_sockaddr laddr; struct vxlan_socket *vso; laddr = *vxlsa; if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) laddr.in4.sin_addr.s_addr = INADDR_ANY; #ifdef INET6 else laddr.in6.sin6_addr = in6addr_any; #endif vso = vxlan_socket_lookup(&laddr); return (vso); } static int vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc, const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, int ifidx) { if (!vxlan_sockaddr_in_any(local) && !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa)) return (0); if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa)) return (0); if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx) return (0); return (1); } static int vxlan_socket_mc_join_group(struct vxlan_socket *vso, const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, int *ifidx, union vxlan_sockaddr *source) { struct sockopt sopt; int error; *source = *local; if (VXLAN_SOCKADDR_IS_IPV4(group)) { struct ip_mreq mreq; mreq.imr_multiaddr = group->in4.sin_addr; mreq.imr_interface = local->in4.sin_addr; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = IP_ADD_MEMBERSHIP; sopt.sopt_val = &mreq; sopt.sopt_valsize = sizeof(mreq); error = sosetopt(vso->vxlso_sock, &sopt); if (error) return (error); /* * BMV: Ideally, there would be a formal way for us to get * the local interface that was selected based on the * imr_interface address. We could then update *ifidx so * vxlan_sockaddr_mc_info_match() would return a match for * later creates that explicitly set the multicast interface. * * If we really need to, we can of course look in the INP's * membership list: * sotoinpcb(vso->vxlso_sock)->inp_moptions-> * imo_head[]->imf_inm->inm_ifp * similarly to imo_match_group(). */ source->in4.sin_addr = local->in4.sin_addr; } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { struct ipv6_mreq mreq; mreq.ipv6mr_multiaddr = group->in6.sin6_addr; mreq.ipv6mr_interface = *ifidx; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_IPV6; sopt.sopt_name = IPV6_JOIN_GROUP; sopt.sopt_val = &mreq; sopt.sopt_valsize = sizeof(mreq); error = sosetopt(vso->vxlso_sock, &sopt); if (error) return (error); /* * BMV: As with IPv4, we would really like to know what * interface in6p_lookup_mcast_ifp() selected. */ } else error = EAFNOSUPPORT; return (error); } static int vxlan_socket_mc_leave_group(struct vxlan_socket *vso, const union vxlan_sockaddr *group, const union vxlan_sockaddr *source, int ifidx) { struct sockopt sopt; int error; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; if (VXLAN_SOCKADDR_IS_IPV4(group)) { struct ip_mreq mreq; mreq.imr_multiaddr = group->in4.sin_addr; mreq.imr_interface = source->in4.sin_addr; sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = IP_DROP_MEMBERSHIP; sopt.sopt_val = &mreq; sopt.sopt_valsize = sizeof(mreq); error = sosetopt(vso->vxlso_sock, &sopt); } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { struct ipv6_mreq mreq; mreq.ipv6mr_multiaddr = group->in6.sin6_addr; mreq.ipv6mr_interface = ifidx; sopt.sopt_level = IPPROTO_IPV6; sopt.sopt_name = IPV6_LEAVE_GROUP; sopt.sopt_val = &mreq; sopt.sopt_valsize = sizeof(mreq); error = sosetopt(vso->vxlso_sock, &sopt); } else error = EAFNOSUPPORT; return (error); } static int vxlan_socket_mc_add_group(struct vxlan_socket *vso, const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, int ifidx, int *idx) { union vxlan_sockaddr source; struct vxlan_socket_mc_info *mc; int i, empty, error; /* * Within a socket, the same multicast group may be used by multiple * interfaces, each with a different network identifier. But a socket * may only join a multicast group once, so keep track of the users * here. */ VXLAN_SO_WLOCK(vso); for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { mc = &vso->vxlso_mc[i]; if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { empty++; continue; } if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx)) goto out; } VXLAN_SO_WUNLOCK(vso); if (empty == 0) return (ENOSPC); error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source); if (error) return (error); VXLAN_SO_WLOCK(vso); for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { mc = &vso->vxlso_mc[i]; if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa); vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa); mc->vxlsomc_ifidx = ifidx; goto out; } } VXLAN_SO_WUNLOCK(vso); error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx); MPASS(error == 0); return (ENOSPC); out: mc->vxlsomc_users++; VXLAN_SO_WUNLOCK(vso); *idx = i; return (0); } static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx) { union vxlan_sockaddr group, source; struct vxlan_socket_mc_info *mc; int ifidx, leave; KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS, ("%s: vso %p idx %d out of bounds", __func__, vso, idx)); leave = 0; mc = &vso->vxlso_mc[idx]; VXLAN_SO_WLOCK(vso); mc->vxlsomc_users--; if (mc->vxlsomc_users == 0) { group = mc->vxlsomc_gaddr; source = mc->vxlsomc_saddr; ifidx = mc->vxlsomc_ifidx; bzero(mc, sizeof(*mc)); leave = 1; } VXLAN_SO_WUNLOCK(vso); if (leave != 0) { /* * Our socket's membership in this group may have already * been removed if we joined through an interface that's * been detached. */ vxlan_socket_mc_leave_group(vso, &group, &source, ifidx); } } static struct vxlan_softc * vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni) { struct vxlan_softc *sc; uint32_t hash; VXLAN_SO_LOCK_ASSERT(vso); hash = VXLAN_SO_VNI_HASH(vni); LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) { if (sc->vxl_vni == vni) { VXLAN_ACQUIRE(sc); break; } } return (sc); } static struct vxlan_softc * vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni) { struct rm_priotracker tracker; struct vxlan_softc *sc; VXLAN_SO_RLOCK(vso, &tracker); sc = vxlan_socket_lookup_softc_locked(vso, vni); VXLAN_SO_RUNLOCK(vso, &tracker); return (sc); } static int vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) { struct vxlan_softc *tsc; uint32_t vni, hash; vni = sc->vxl_vni; hash = VXLAN_SO_VNI_HASH(vni); VXLAN_SO_WLOCK(vso); tsc = vxlan_socket_lookup_softc_locked(vso, vni); if (tsc != NULL) { VXLAN_SO_WUNLOCK(vso); vxlan_release(tsc); return (EEXIST); } VXLAN_ACQUIRE(sc); LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry); VXLAN_SO_WUNLOCK(vso); return (0); } static void vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) { VXLAN_SO_WLOCK(vso); LIST_REMOVE(sc, vxl_entry); VXLAN_SO_WUNLOCK(vso); vxlan_release(sc); } static struct ifnet * vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4) { struct ifnet *ifp; VXLAN_LOCK_ASSERT(sc); if (ipv4 && sc->vxl_im4o != NULL) ifp = sc->vxl_im4o->imo_multicast_ifp; else if (!ipv4 && sc->vxl_im6o != NULL) ifp = sc->vxl_im6o->im6o_multicast_ifp; else ifp = NULL; if (ifp != NULL) if_ref(ifp); return (ifp); } static void vxlan_free_multicast(struct vxlan_softc *sc) { if (sc->vxl_mc_ifp != NULL) { if_rele(sc->vxl_mc_ifp); sc->vxl_mc_ifp = NULL; sc->vxl_mc_ifindex = 0; } if (sc->vxl_im4o != NULL) { free(sc->vxl_im4o, M_VXLAN); sc->vxl_im4o = NULL; } if (sc->vxl_im6o != NULL) { free(sc->vxl_im6o, M_VXLAN); sc->vxl_im6o = NULL; } } static int vxlan_setup_multicast_interface(struct vxlan_softc *sc) { struct ifnet *ifp; ifp = ifunit_ref(sc->vxl_mc_ifname); if (ifp == NULL) { if_printf(sc->vxl_ifp, "multicast interface %s does " "not exist\n", sc->vxl_mc_ifname); return (ENOENT); } if ((ifp->if_flags & IFF_MULTICAST) == 0) { if_printf(sc->vxl_ifp, "interface %s does not support " "multicast\n", sc->vxl_mc_ifname); if_rele(ifp); return (ENOTSUP); } sc->vxl_mc_ifp = ifp; sc->vxl_mc_ifindex = ifp->if_index; return (0); } static int vxlan_setup_multicast(struct vxlan_softc *sc) { const union vxlan_sockaddr *group; int error; group = &sc->vxl_dst_addr; error = 0; if (sc->vxl_mc_ifname[0] != '\0') { error = vxlan_setup_multicast_interface(sc); if (error) return (error); } /* * Initialize an multicast options structure that is sufficiently * populated for use in the respective IP output routine. This * structure is typically stored in the socket, but our sockets * may be shared among multiple interfaces. */ if (VXLAN_SOCKADDR_IS_IPV4(group)) { sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN, M_ZERO | M_WAITOK); sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp; sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; sc->vxl_im4o->imo_multicast_vif = -1; } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN, M_ZERO | M_WAITOK); sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp; sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; } return (error); } static int vxlan_setup_socket(struct vxlan_softc *sc) { struct vxlan_socket *vso; struct ifnet *ifp; union vxlan_sockaddr *saddr, *daddr; int multicast, error; vso = NULL; ifp = sc->vxl_ifp; saddr = &sc->vxl_src_addr; daddr = &sc->vxl_dst_addr; multicast = vxlan_sockaddr_in_multicast(daddr); MPASS(multicast != -1); sc->vxl_vso_mc_index = -1; /* * Try to create the socket. If that fails, attempt to use an * existing socket. */ error = vxlan_socket_create(ifp, multicast, saddr, &vso); if (error) { if (multicast != 0) vso = vxlan_socket_mc_lookup(saddr); else vso = vxlan_socket_lookup(saddr); if (vso == NULL) { if_printf(ifp, "cannot create socket (error: %d), " "and no existing socket found\n", error); goto out; } } if (multicast != 0) { error = vxlan_setup_multicast(sc); if (error) goto out; error = vxlan_socket_mc_add_group(vso, daddr, saddr, sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index); if (error) goto out; } sc->vxl_sock = vso; error = vxlan_socket_insert_softc(vso, sc); if (error) { sc->vxl_sock = NULL; if_printf(ifp, "network identifier %d already exists in " "this socket\n", sc->vxl_vni); goto out; } return (0); out: if (vso != NULL) { if (sc->vxl_vso_mc_index != -1) { vxlan_socket_mc_release_group_by_idx(vso, sc->vxl_vso_mc_index); sc->vxl_vso_mc_index = -1; } if (multicast != 0) vxlan_free_multicast(sc); vxlan_socket_release(vso); } return (error); } #ifdef INET6 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *sc) { if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr)) return; MPASS(sc->vxl_src_addr.in6.sin6_port != 0); MPASS(sc->vxl_dst_addr.in6.sin6_port != 0); if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) { if_printf(sc->vxl_ifp, "port %d in src address does not match " "port %d in dst address, rfc6935_port (%d) not updated.\n", ntohs(sc->vxl_src_addr.in6.sin6_port), ntohs(sc->vxl_dst_addr.in6.sin6_port), V_zero_checksum_port); return; } if (V_zero_checksum_port != 0) { if (V_zero_checksum_port != ntohs(sc->vxl_src_addr.in6.sin6_port)) { if_printf(sc->vxl_ifp, "rfc6935_port is already set to " "%d, cannot set it to %d.\n", V_zero_checksum_port, ntohs(sc->vxl_src_addr.in6.sin6_port)); } return; } V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port); if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n", V_zero_checksum_port); } #endif static void vxlan_setup_interface_hdrlen(struct vxlan_softc *sc) { struct ifnet *ifp; VXLAN_LOCK_WASSERT(sc); ifp = sc->vxl_ifp; ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr); if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0) ifp->if_hdrlen += sizeof(struct ip); else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0) ifp->if_hdrlen += sizeof(struct ip6_hdr); if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0) ifp->if_mtu = ETHERMTU - ifp->if_hdrlen; } static int vxlan_valid_init_config(struct vxlan_softc *sc) { const char *reason; if (vxlan_check_vni(sc->vxl_vni) != 0) { reason = "invalid virtual network identifier specified"; goto fail; } if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) { reason = "source address type is not supported"; goto fail; } if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) { reason = "destination address type is not supported"; goto fail; } if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) { reason = "no valid destination address specified"; goto fail; } if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 && sc->vxl_mc_ifname[0] != '\0') { reason = "can only specify interface with a group address"; goto fail; } if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^ VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) { reason = "source and destination address must both " "be either IPv4 or IPv6"; goto fail; } } if (sc->vxl_src_addr.in4.sin_port == 0) { reason = "local port not specified"; goto fail; } if (sc->vxl_dst_addr.in4.sin_port == 0) { reason = "remote port not specified"; goto fail; } return (0); fail: if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason); return (EINVAL); } static void vxlan_init_wait(struct vxlan_softc *sc) { VXLAN_LOCK_WASSERT(sc); while (sc->vxl_flags & VXLAN_FLAG_INIT) rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz); } static void vxlan_init_complete(struct vxlan_softc *sc) { VXLAN_WLOCK(sc); sc->vxl_flags &= ~VXLAN_FLAG_INIT; wakeup(sc); VXLAN_WUNLOCK(sc); } static void vxlan_init(void *xsc) { static const uint8_t empty_mac[ETHER_ADDR_LEN]; struct vxlan_softc *sc; struct ifnet *ifp; sc = xsc; ifp = sc->vxl_ifp; sx_xlock(&vxlan_sx); VXLAN_WLOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { VXLAN_WUNLOCK(sc); sx_xunlock(&vxlan_sx); return; } sc->vxl_flags |= VXLAN_FLAG_INIT; VXLAN_WUNLOCK(sc); if (vxlan_valid_init_config(sc) != 0) goto out; if (vxlan_setup_socket(sc) != 0) goto out; #ifdef INET6 vxlan_setup_zero_checksum_port(sc); #endif /* Initialize the default forwarding entry. */ vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac, &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC); VXLAN_WLOCK(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz, vxlan_timer, sc); VXLAN_WUNLOCK(sc); if_link_state_change(ifp, LINK_STATE_UP); EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family, ntohs(sc->vxl_src_addr.in4.sin_port)); out: vxlan_init_complete(sc); sx_xunlock(&vxlan_sx); } static void vxlan_release(struct vxlan_softc *sc) { /* * The softc may be destroyed as soon as we release our reference, * so we cannot serialize the wakeup with the softc lock. We use a * timeout in our sleeps so a missed wakeup is unfortunate but not * fatal. */ if (VXLAN_RELEASE(sc) != 0) wakeup(sc); } static void vxlan_teardown_wait(struct vxlan_softc *sc) { VXLAN_LOCK_WASSERT(sc); while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz); } static void vxlan_teardown_complete(struct vxlan_softc *sc) { VXLAN_WLOCK(sc); sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN; wakeup(sc); VXLAN_WUNLOCK(sc); } static void vxlan_teardown_locked(struct vxlan_softc *sc) { struct ifnet *ifp; struct vxlan_socket *vso; sx_assert(&vxlan_sx, SA_XLOCKED); VXLAN_LOCK_WASSERT(sc); MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN); ifp = sc->vxl_ifp; ifp->if_flags &= ~IFF_UP; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; callout_stop(&sc->vxl_callout); vso = sc->vxl_sock; sc->vxl_sock = NULL; VXLAN_WUNLOCK(sc); if_link_state_change(ifp, LINK_STATE_DOWN); EVENTHANDLER_INVOKE(vxlan_stop, ifp, sc->vxl_src_addr.in4.sin_family, ntohs(sc->vxl_src_addr.in4.sin_port)); if (vso != NULL) { vxlan_socket_remove_softc(vso, sc); if (sc->vxl_vso_mc_index != -1) { vxlan_socket_mc_release_group_by_idx(vso, sc->vxl_vso_mc_index); sc->vxl_vso_mc_index = -1; } } VXLAN_WLOCK(sc); while (sc->vxl_refcnt != 0) rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz); VXLAN_WUNLOCK(sc); callout_drain(&sc->vxl_callout); vxlan_free_multicast(sc); if (vso != NULL) vxlan_socket_release(vso); vxlan_teardown_complete(sc); } static void vxlan_teardown(struct vxlan_softc *sc) { sx_xlock(&vxlan_sx); VXLAN_WLOCK(sc); if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) { vxlan_teardown_wait(sc); VXLAN_WUNLOCK(sc); sx_xunlock(&vxlan_sx); return; } sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; vxlan_teardown_locked(sc); sx_xunlock(&vxlan_sx); } static void vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp, struct vxlan_softc_head *list) { VXLAN_WLOCK(sc); if (sc->vxl_mc_ifp != ifp) goto out; if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) goto out; sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list); out: VXLAN_WUNLOCK(sc); } static void vxlan_timer(void *xsc) { struct vxlan_softc *sc; sc = xsc; VXLAN_LOCK_WASSERT(sc); vxlan_ftable_expire(sc); callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz); } static int vxlan_ioctl_ifflags(struct vxlan_softc *sc) { struct ifnet *ifp; ifp = sc->vxl_ifp; if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) vxlan_init(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vxlan_teardown(sc); } return (0); } static int vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg) { struct rm_priotracker tracker; struct ifvxlancfg *cfg; cfg = arg; bzero(cfg, sizeof(*cfg)); VXLAN_RLOCK(sc, &tracker); cfg->vxlc_vni = sc->vxl_vni; memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr, sizeof(union vxlan_sockaddr)); memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr, sizeof(union vxlan_sockaddr)); cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex; cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt; cfg->vxlc_ftable_max = sc->vxl_ftable_max; cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout; cfg->vxlc_port_min = sc->vxl_min_port; cfg->vxlc_port_max = sc->vxl_max_port; cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0; cfg->vxlc_ttl = sc->vxl_ttl; VXLAN_RUNLOCK(sc, &tracker); #ifdef INET6 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa)) sa6_recoverscope(&cfg->vxlc_local_sa.in6); if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa)) sa6_recoverscope(&cfg->vxlc_remote_sa.in6); #endif return (0); } static int vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; if (vxlan_check_vni(cmd->vxlcmd_vni) != 0) return (EINVAL); VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { sc->vxl_vni = cmd->vxlcmd_vni; error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; union vxlan_sockaddr *vxlsa; int error; cmd = arg; vxlsa = &cmd->vxlcmd_sa; if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) return (EINVAL); if (vxlan_sockaddr_in_multicast(vxlsa) != 0) return (EINVAL); if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { error = vxlan_sockaddr_in6_embedscope(vxlsa); if (error) return (error); } VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa); vxlan_set_hwcaps(sc); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; union vxlan_sockaddr *vxlsa; int error; cmd = arg; vxlsa = &cmd->vxlcmd_sa; if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) return (EINVAL); if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { error = vxlan_sockaddr_in6_embedscope(vxlsa); if (error) return (error); } VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa); vxlan_setup_interface_hdrlen(sc); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; if (cmd->vxlcmd_port == 0) return (EINVAL); VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; if (cmd->vxlcmd_port == 0) return (EINVAL); VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; uint16_t min, max; int error; cmd = arg; min = cmd->vxlcmd_port_min; max = cmd->vxlcmd_port_max; if (max < min) return (EINVAL); VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { sc->vxl_min_port = min; sc->vxl_max_port = max; error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; VXLAN_WLOCK(sc); if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) { sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout; error = 0; } else error = EINVAL; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; VXLAN_WLOCK(sc); if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) { sc->vxl_ftable_max = cmd->vxlcmd_ftable_max; error = 0; } else error = EINVAL; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; VXLAN_WLOCK(sc); if (vxlan_can_change_config(sc)) { strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ); vxlan_set_hwcaps(sc); error = 0; } else error = EBUSY; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int error; cmd = arg; VXLAN_WLOCK(sc); if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) { sc->vxl_ttl = cmd->vxlcmd_ttl; if (sc->vxl_im4o != NULL) sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; if (sc->vxl_im6o != NULL) sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; error = 0; } else error = EINVAL; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; cmd = arg; VXLAN_WLOCK(sc); if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN) sc->vxl_flags |= VXLAN_FLAG_LEARN; else sc->vxl_flags &= ~VXLAN_FLAG_LEARN; VXLAN_WUNLOCK(sc); return (0); } static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg) { union vxlan_sockaddr vxlsa; struct ifvxlancmd *cmd; struct vxlan_ftable_entry *fe; int error; cmd = arg; vxlsa = cmd->vxlcmd_sa; if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa)) return (EINVAL); if (vxlan_sockaddr_in_any(&vxlsa) != 0) return (EINVAL); if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) return (EINVAL); /* BMV: We could support both IPv4 and IPv6 later. */ if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family) return (EAFNOSUPPORT); if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { error = vxlan_sockaddr_in6_embedscope(&vxlsa); if (error) return (error); } fe = vxlan_ftable_entry_alloc(); if (fe == NULL) return (ENOMEM); if (vxlsa.in4.sin_port == 0) vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa, VXLAN_FE_FLAG_STATIC); VXLAN_WLOCK(sc); error = vxlan_ftable_entry_insert(sc, fe); VXLAN_WUNLOCK(sc); if (error) vxlan_ftable_entry_free(fe); return (error); } static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; struct vxlan_ftable_entry *fe; int error; cmd = arg; VXLAN_WLOCK(sc); fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac); if (fe != NULL) { vxlan_ftable_entry_destroy(sc, fe); error = 0; } else error = ENOENT; VXLAN_WUNLOCK(sc); return (error); } static int vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg) { struct ifvxlancmd *cmd; int all; cmd = arg; all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL; VXLAN_WLOCK(sc); vxlan_ftable_flush(sc, all); VXLAN_WUNLOCK(sc); return (0); } static int vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get) { const struct vxlan_control *vc; union { struct ifvxlancfg cfg; struct ifvxlancmd cmd; } args; int out, error; if (ifd->ifd_cmd >= vxlan_control_table_size) return (EINVAL); bzero(&args, sizeof(args)); vc = &vxlan_control_table[ifd->ifd_cmd]; out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0; if ((get != 0 && out == 0) || (get == 0 && out != 0)) return (EINVAL); if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) { error = priv_check(curthread, PRIV_NET_VXLAN); if (error) return (error); } if (ifd->ifd_len != vc->vxlc_argsize || ifd->ifd_len > sizeof(args)) return (EINVAL); if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) { error = copyin(ifd->ifd_data, &args, ifd->ifd_len); if (error) return (error); } error = vc->vxlc_func(sc, &args); if (error) return (error); if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) { error = copyout(&args, ifd->ifd_data, ifd->ifd_len); if (error) return (error); } return (0); } static int vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { + struct rm_priotracker tracker; struct vxlan_softc *sc; struct ifreq *ifr; struct ifdrv *ifd; int error; sc = ifp->if_softc; ifr = (struct ifreq *) data; ifd = (struct ifdrv *) data; error = 0; switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCGDRVSPEC: case SIOCSDRVSPEC: error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC); break; case SIOCSIFFLAGS: error = vxlan_ioctl_ifflags(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd); break; case SIOCSIFMTU: if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) { error = EINVAL; } else { VXLAN_WLOCK(sc); ifp->if_mtu = ifr->ifr_mtu; sc->vxl_flags |= VXLAN_FLAG_USER_MTU; VXLAN_WUNLOCK(sc); } break; case SIOCSIFCAP: VXLAN_WLOCK(sc); error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap); if (error == 0) vxlan_set_hwcaps(sc); VXLAN_WUNLOCK(sc); break; + case SIOCGTUNFIB: + VXLAN_RLOCK(sc, &tracker); + ifr->ifr_fib = sc->vxl_fibnum; + VXLAN_RUNLOCK(sc, &tracker); + break; + + case SIOCSTUNFIB: + if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0) + break; + + if (ifr->ifr_fib >= rt_numfibs) + error = EINVAL; + else { + VXLAN_WLOCK(sc); + sc->vxl_fibnum = ifr->ifr_fib; + VXLAN_WUNLOCK(sc); + } + break; + default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } #if defined(INET) || defined(INET6) static uint16_t vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m) { int range; uint32_t hash; range = sc->vxl_max_port - sc->vxl_min_port + 1; if (M_HASHTYPE_ISHASH(m)) hash = m->m_pkthdr.flowid; else hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, sc->vxl_port_hash_key); return (sc->vxl_min_port + (hash % range)); } static void vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff, uint16_t srcport, uint16_t dstport) { struct vxlanudphdr *hdr; struct udphdr *udph; struct vxlan_header *vxh; int len; len = m->m_pkthdr.len - ipoff; MPASS(len >= sizeof(struct vxlanudphdr)); hdr = mtodo(m, ipoff); udph = &hdr->vxlh_udp; udph->uh_sport = srcport; udph->uh_dport = dstport; udph->uh_ulen = htons(len); udph->uh_sum = 0; vxh = &hdr->vxlh_hdr; vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI); vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT); } #endif #if defined(INET6) || defined(INET) /* * Return the CSUM_INNER_* equivalent of CSUM_* caps. */ static uint32_t csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap) { uint32_t csum_flags = encap; const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP; /* * csum_flags can request either v4 or v6 offload but not both. * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO) * so those bits are no good to detect the IP version. Other bits are * always set with CSUM_TSO and we use those to figure out the IP * version. */ if (csum_flags_in & v4) { if (csum_flags_in & CSUM_IP) csum_flags |= CSUM_INNER_IP; if (csum_flags_in & CSUM_IP_UDP) csum_flags |= CSUM_INNER_IP_UDP; if (csum_flags_in & CSUM_IP_TCP) csum_flags |= CSUM_INNER_IP_TCP; if (csum_flags_in & CSUM_IP_TSO) csum_flags |= CSUM_INNER_IP_TSO; } else { #ifdef INVARIANTS const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP; MPASS((csum_flags_in & v6) != 0); #endif if (csum_flags_in & CSUM_IP6_UDP) csum_flags |= CSUM_INNER_IP6_UDP; if (csum_flags_in & CSUM_IP6_TCP) csum_flags |= CSUM_INNER_IP6_TCP; if (csum_flags_in & CSUM_IP6_TSO) csum_flags |= CSUM_INNER_IP6_TSO; } return (csum_flags); } #endif static int vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, struct mbuf *m) { #ifdef INET struct ifnet *ifp; struct ip *ip; struct in_addr srcaddr, dstaddr; uint16_t srcport, dstport; int len, mcast, error; struct route route, *ro; struct sockaddr_in *sin; uint32_t csum_flags; NET_EPOCH_ASSERT(); ifp = sc->vxl_ifp; srcaddr = sc->vxl_src_addr.in4.sin_addr; srcport = vxlan_pick_source_port(sc, m); dstaddr = fvxlsa->in4.sin_addr; dstport = fvxlsa->in4.sin_port; M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr), M_NOWAIT); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOBUFS); } len = m->m_pkthdr.len; ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_len = htons(len); ip->ip_off = 0; ip->ip_ttl = sc->vxl_ttl; ip->ip_p = IPPROTO_UDP; ip->ip_sum = 0; ip->ip_src = srcaddr; ip->ip_dst = dstaddr; vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport); mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; m->m_flags &= ~(M_MCAST | M_BCAST); m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; if (m->m_pkthdr.csum_flags != 0) { /* * HW checksum (L3 and/or L4) or TSO has been requested. Look * up the ifnet for the outbound route and verify that the * outbound ifnet can perform the requested operation on the * inner frame. */ bzero(&route, sizeof(route)); ro = &route; sin = (struct sockaddr_in *)&ro->ro_dst; sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = ip->ip_dst; - ro->ro_nh = fib4_lookup(RT_DEFAULT_FIB, ip->ip_dst, 0, NHR_NONE, + ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 0); if (ro->ro_nh == NULL) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (EHOSTUNREACH); } csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, CSUM_ENCAP_VXLAN); if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != csum_flags) { if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; if_printf(ifp, "interface %s is missing hwcaps " "0x%08x, csum_flags 0x%08x -> 0x%08x, " "hwassist 0x%08x\n", nh_ifp->if_xname, csum_flags & ~(uint32_t)nh_ifp->if_hwassist, m->m_pkthdr.csum_flags, csum_flags, (uint32_t)nh_ifp->if_hwassist); } m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENXIO); } m->m_pkthdr.csum_flags = csum_flags; if (csum_flags & (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { counter_u64_add(sc->vxl_stats.txcsum, 1); if (csum_flags & CSUM_INNER_TSO) counter_u64_add(sc->vxl_stats.tso, 1); } } else ro = NULL; error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if (mcast != 0) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); #else m_freem(m); return (ENOTSUP); #endif } static int vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, struct mbuf *m) { #ifdef INET6 struct ifnet *ifp; struct ip6_hdr *ip6; const struct in6_addr *srcaddr, *dstaddr; uint16_t srcport, dstport; int len, mcast, error; struct route_in6 route, *ro; struct sockaddr_in6 *sin6; uint32_t csum_flags; NET_EPOCH_ASSERT(); ifp = sc->vxl_ifp; srcaddr = &sc->vxl_src_addr.in6.sin6_addr; srcport = vxlan_pick_source_port(sc, m); dstaddr = &fvxlsa->in6.sin6_addr; dstport = fvxlsa->in6.sin6_port; M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr), M_NOWAIT); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOBUFS); } len = m->m_pkthdr.len; ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */ ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = 0; ip6->ip6_nxt = IPPROTO_UDP; ip6->ip6_hlim = sc->vxl_ttl; ip6->ip6_src = *srcaddr; ip6->ip6_dst = *dstaddr; vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport); mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; m->m_flags &= ~(M_MCAST | M_BCAST); ro = NULL; m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; if (m->m_pkthdr.csum_flags != 0) { /* * HW checksum (L3 and/or L4) or TSO has been requested. Look * up the ifnet for the outbound route and verify that the * outbound ifnet can perform the requested operation on the * inner frame. */ bzero(&route, sizeof(route)); ro = &route; sin6 = (struct sockaddr_in6 *)&ro->ro_dst; sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_addr = ip6->ip6_dst; - ro->ro_nh = fib6_lookup(RT_DEFAULT_FIB, &ip6->ip6_dst, 0, + ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, NHR_NONE, 0); if (ro->ro_nh == NULL) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (EHOSTUNREACH); } csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, CSUM_ENCAP_VXLAN); if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != csum_flags) { if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; if_printf(ifp, "interface %s is missing hwcaps " "0x%08x, csum_flags 0x%08x -> 0x%08x, " "hwassist 0x%08x\n", nh_ifp->if_xname, csum_flags & ~(uint32_t)nh_ifp->if_hwassist, m->m_pkthdr.csum_flags, csum_flags, (uint32_t)nh_ifp->if_hwassist); } m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENXIO); } m->m_pkthdr.csum_flags = csum_flags; if (csum_flags & (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { counter_u64_add(sc->vxl_stats.txcsum, 1); if (csum_flags & CSUM_INNER_TSO) counter_u64_add(sc->vxl_stats.tso, 1); } } else if (ntohs(dstport) != V_zero_checksum_port) { struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr)); hdr->uh_sum = in6_cksum_pseudo(ip6, m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0); m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if (mcast != 0) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); #else m_freem(m); return (ENOTSUP); #endif } static int vxlan_transmit(struct ifnet *ifp, struct mbuf *m) { struct rm_priotracker tracker; union vxlan_sockaddr vxlsa; struct vxlan_softc *sc; struct vxlan_ftable_entry *fe; struct ifnet *mcifp; struct ether_header *eh; int ipv4, error; sc = ifp->if_softc; eh = mtod(m, struct ether_header *); fe = NULL; mcifp = NULL; ETHER_BPF_MTAP(ifp, m); VXLAN_RLOCK(sc, &tracker); + M_SETFIB(m, sc->vxl_fibnum); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VXLAN_RUNLOCK(sc, &tracker); m_freem(m); return (ENETDOWN); } if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost); if (fe == NULL) fe = &sc->vxl_default_fe; vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa); ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0; if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) mcifp = vxlan_multicast_if_ref(sc, ipv4); VXLAN_ACQUIRE(sc); VXLAN_RUNLOCK(sc, &tracker); if (ipv4 != 0) error = vxlan_encap4(sc, &vxlsa, m); else error = vxlan_encap6(sc, &vxlsa, m); vxlan_release(sc); if (mcifp != NULL) if_rele(mcifp); return (error); } static void vxlan_qflush(struct ifnet *ifp __unused) { } static bool vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb, const struct sockaddr *srcsa, void *xvso) { struct vxlan_socket *vso; struct vxlan_header *vxh, vxlanhdr; uint32_t vni; int error __unused; M_ASSERTPKTHDR(m); vso = xvso; offset += sizeof(struct udphdr); if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header)) goto out; if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) { m_copydata(m, offset, sizeof(struct vxlan_header), (caddr_t) &vxlanhdr); vxh = &vxlanhdr; } else vxh = mtodo(m, offset); /* * Drop if there is a reserved bit set in either the flags or VNI * fields of the header. This goes against the specification, but * a bit set may indicate an unsupported new feature. This matches * the behavior of the Linux implementation. */ if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) || vxh->vxlh_vni & ~VXLAN_VNI_MASK) goto out; vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT; /* Adjust to the start of the inner Ethernet frame. */ m_adj_decap(m, offset + sizeof(struct vxlan_header)); error = vxlan_input(vso, vni, &m, srcsa); MPASS(error != 0 || m == NULL); out: if (m != NULL) m_freem(m); return (true); } static int vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0, const struct sockaddr *sa) { struct vxlan_softc *sc; struct ifnet *ifp; struct mbuf *m; struct ether_header *eh; int error; m = *m0; if (m->m_pkthdr.len < ETHER_HDR_LEN) return (EINVAL); sc = vxlan_socket_lookup_softc(vso, vni); if (sc == NULL) return (ENOENT); ifp = sc->vxl_ifp; eh = mtod(m, struct ether_header *); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { error = ENETDOWN; goto out; } else if (ifp == m->m_pkthdr.rcvif) { /* XXX Does not catch more complex loops. */ error = EDEADLK; goto out; } if (sc->vxl_flags & VXLAN_FLAG_LEARN) vxlan_ftable_learn(sc, sa, eh->ether_shost); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; M_SETFIB(m, ifp->if_fib); if (((ifp->if_capenable & IFCAP_RXCSUM && m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) || (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) { uint32_t csum_flags = 0; if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) csum_flags |= CSUM_L3_CALC; if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID) csum_flags |= CSUM_L3_VALID; if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC) csum_flags |= CSUM_L4_CALC; if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID) csum_flags |= CSUM_L4_VALID; m->m_pkthdr.csum_flags = csum_flags; counter_u64_add(sc->vxl_stats.rxcsum, 1); } else { /* clear everything */ m->m_pkthdr.csum_flags = 0; m->m_pkthdr.csum_data = 0; } (*ifp->if_input)(ifp, m); *m0 = NULL; error = 0; out: vxlan_release(sc); return (error); } static int vxlan_stats_alloc(struct vxlan_softc *sc) { struct vxlan_statistics *stats = &sc->vxl_stats; stats->txcsum = counter_u64_alloc(M_WAITOK); if (stats->txcsum == NULL) goto failed; stats->tso = counter_u64_alloc(M_WAITOK); if (stats->tso == NULL) goto failed; stats->rxcsum = counter_u64_alloc(M_WAITOK); if (stats->rxcsum == NULL) goto failed; return (0); failed: vxlan_stats_free(sc); return (ENOMEM); } static void vxlan_stats_free(struct vxlan_softc *sc) { struct vxlan_statistics *stats = &sc->vxl_stats; if (stats->txcsum != NULL) { counter_u64_free(stats->txcsum); stats->txcsum = NULL; } if (stats->tso != NULL) { counter_u64_free(stats->tso); stats->tso = NULL; } if (stats->rxcsum != NULL) { counter_u64_free(stats->rxcsum); stats->rxcsum = NULL; } } static void vxlan_set_default_config(struct vxlan_softc *sc) { sc->vxl_flags |= VXLAN_FLAG_LEARN; sc->vxl_vni = VXLAN_VNI_MAX; sc->vxl_ttl = IPDEFTTL; if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) { sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT); sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT); } else { sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); } sc->vxl_min_port = V_ipport_firstauto; sc->vxl_max_port = V_ipport_lastauto; sc->vxl_ftable_max = VXLAN_FTABLE_MAX; sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT; } static int vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp) { #ifndef INET if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 | VXLAN_PARAM_WITH_REMOTE_ADDR4)) return (EAFNOSUPPORT); #endif #ifndef INET6 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 | VXLAN_PARAM_WITH_REMOTE_ADDR6)) return (EAFNOSUPPORT); #else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa); if (error) return (error); } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { int error = vxlan_sockaddr_in6_embedscope( &vxlp->vxlp_remote_sa); if (error) return (error); } #endif if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) { if (vxlan_check_vni(vxlp->vxlp_vni) == 0) sc->vxl_vni = vxlp->vxlp_vni; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) { sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in); sc->vxl_src_addr.in4.sin_family = AF_INET; sc->vxl_src_addr.in4.sin_addr = vxlp->vxlp_local_sa.in4.sin_addr; } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6); sc->vxl_src_addr.in6.sin6_family = AF_INET6; sc->vxl_src_addr.in6.sin6_addr = vxlp->vxlp_local_sa.in6.sin6_addr; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) { sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in); sc->vxl_dst_addr.in4.sin_family = AF_INET; sc->vxl_dst_addr.in4.sin_addr = vxlp->vxlp_remote_sa.in4.sin_addr; } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6); sc->vxl_dst_addr.in6.sin6_family = AF_INET6; sc->vxl_dst_addr.in6.sin6_addr = vxlp->vxlp_remote_sa.in6.sin6_addr; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT) sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port); if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT) sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port); if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) { if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) { sc->vxl_min_port = vxlp->vxlp_min_port; sc->vxl_max_port = vxlp->vxlp_max_port; } } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF) strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ); if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) { if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0) sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) { if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0) sc->vxl_ftable_max = vxlp->vxlp_ftable_max; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) { if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0) sc->vxl_ttl = vxlp->vxlp_ttl; } if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) { if (vxlp->vxlp_learn == 0) sc->vxl_flags &= ~VXLAN_FLAG_LEARN; } return (0); } static int vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap) { int mask = reqcap ^ ifp->if_capenable; /* Disable TSO if tx checksums are disabled. */ if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) && reqcap & IFCAP_TSO4) { reqcap &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) && reqcap & IFCAP_TSO6) { reqcap &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } /* Do not enable TSO if tx checksums are disabled. */ if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 && !(reqcap & IFCAP_TXCSUM)) { if_printf(ifp, "enable txcsum first.\n"); return (EAGAIN); } if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 && !(reqcap & IFCAP_TXCSUM_IPV6)) { if_printf(ifp, "enable txcsum6 first.\n"); return (EAGAIN); } sc->vxl_reqcap = reqcap; return (0); } /* * A VXLAN interface inherits the capabilities of the vxlandev or the interface * hosting the vxlanlocal address. */ static void vxlan_set_hwcaps(struct vxlan_softc *sc) { struct epoch_tracker et; struct ifnet *p; struct ifaddr *ifa; u_long hwa; int cap, ena; bool rel; struct ifnet *ifp = sc->vxl_ifp; /* reset caps */ ifp->if_capabilities &= VXLAN_BASIC_IFCAPS; ifp->if_capenable &= VXLAN_BASIC_IFCAPS; ifp->if_hwassist = 0; NET_EPOCH_ENTER(et); CURVNET_SET(ifp->if_vnet); rel = false; p = NULL; if (sc->vxl_mc_ifname[0] != '\0') { rel = true; p = ifunit_ref(sc->vxl_mc_ifname); } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { if (sc->vxl_src_addr.sa.sa_family == AF_INET) { struct sockaddr_in in4 = sc->vxl_src_addr.in4; in4.sin_port = 0; ifa = ifa_ifwithaddr((struct sockaddr *)&in4); if (ifa != NULL) p = ifa->ifa_ifp; } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) { struct sockaddr_in6 in6 = sc->vxl_src_addr.in6; in6.sin6_port = 0; ifa = ifa_ifwithaddr((struct sockaddr *)&in6); if (ifa != NULL) p = ifa->ifa_ifp; } } if (p == NULL) goto done; cap = ena = hwa = 0; /* checksum offload */ if (p->if_capabilities & IFCAP_VXLAN_HWCSUM) cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (p->if_capenable & IFCAP_VXLAN_HWCSUM) { ena |= sc->vxl_reqcap & p->if_capenable & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (ena & IFCAP_TXCSUM) { if (p->if_hwassist & CSUM_INNER_IP) hwa |= CSUM_IP; if (p->if_hwassist & CSUM_INNER_IP_UDP) hwa |= CSUM_IP_UDP; if (p->if_hwassist & CSUM_INNER_IP_TCP) hwa |= CSUM_IP_TCP; } if (ena & IFCAP_TXCSUM_IPV6) { if (p->if_hwassist & CSUM_INNER_IP6_UDP) hwa |= CSUM_IP6_UDP; if (p->if_hwassist & CSUM_INNER_IP6_TCP) hwa |= CSUM_IP6_TCP; } } /* hardware TSO */ if (p->if_capabilities & IFCAP_VXLAN_HWTSO) { cap |= p->if_capabilities & IFCAP_TSO; if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen) ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen; else ifp->if_hw_tsomax = p->if_hw_tsomax; /* XXX: tsomaxsegcount decrement is cxgbe specific */ ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1; ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize; } if (p->if_capenable & IFCAP_VXLAN_HWTSO) { ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO; if (ena & IFCAP_TSO) { if (p->if_hwassist & CSUM_INNER_IP_TSO) hwa |= CSUM_IP_TSO; if (p->if_hwassist & CSUM_INNER_IP6_TSO) hwa |= CSUM_IP6_TSO; } } ifp->if_capabilities |= cap; ifp->if_capenable |= ena; ifp->if_hwassist |= hwa; if (rel) if_rele(p); done: CURVNET_RESTORE(); NET_EPOCH_EXIT(et); } static int vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct vxlan_softc *sc; struct ifnet *ifp; struct ifvxlanparam vxlp; int error; sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO); sc->vxl_unit = unit; + sc->vxl_fibnum = curthread->td_proc->p_fibnum; vxlan_set_default_config(sc); error = vxlan_stats_alloc(sc); if (error != 0) goto fail; if (params != 0) { error = copyin(params, &vxlp, sizeof(vxlp)); if (error) goto fail; error = vxlan_set_user_config(sc, &vxlp); if (error) goto fail; } ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { error = ENOSPC; goto fail; } sc->vxl_ifp = ifp; rm_init(&sc->vxl_lock, "vxlanrm"); callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0); sc->vxl_port_hash_key = arc4random(); vxlan_ftable_init(sc); vxlan_sysctl_setup(sc); ifp->if_softc = sc; if_initname(ifp, vxlan_name, unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vxlan_init; ifp->if_ioctl = vxlan_ioctl; ifp->if_transmit = vxlan_transmit; ifp->if_qflush = vxlan_qflush; ifp->if_capabilities = VXLAN_BASIC_IFCAPS; ifp->if_capenable = VXLAN_BASIC_IFCAPS; sc->vxl_reqcap = -1; vxlan_set_hwcaps(sc); ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status); ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO); ether_gen_addr(ifp, &sc->vxl_hwaddr); ether_ifattach(ifp, sc->vxl_hwaddr.octet); ifp->if_baudrate = 0; VXLAN_WLOCK(sc); vxlan_setup_interface_hdrlen(sc); VXLAN_WUNLOCK(sc); return (0); fail: free(sc, M_VXLAN); return (error); } static void vxlan_clone_destroy(struct ifnet *ifp) { struct vxlan_softc *sc; sc = ifp->if_softc; vxlan_teardown(sc); vxlan_ftable_flush(sc, 1); ether_ifdetach(ifp); if_free(ifp); ifmedia_removeall(&sc->vxl_media); vxlan_ftable_fini(sc); vxlan_sysctl_destroy(sc); rm_destroy(&sc->vxl_lock); vxlan_stats_free(sc); free(sc, M_VXLAN); } /* BMV: Taken from if_bridge. */ static uint32_t vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr) { uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key; b += addr[5] << 8; b += addr[4]; a += addr[3] << 24; a += addr[2] << 16; a += addr[1] << 8; a += addr[0]; /* * The following hash function is adapted from "Hash Functions" by Bob Jenkins * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). */ #define mix(a, b, c) \ do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (0) mix(a, b, c); #undef mix return (c); } static int vxlan_media_change(struct ifnet *ifp) { /* Ignore. */ return (0); } static void vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID; ifmr->ifm_active = IFM_ETHER | IFM_FDX; } static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr, const struct sockaddr *sa) { return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len)); } static void vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr, const struct sockaddr *sa) { MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); bzero(vxladdr, sizeof(*vxladdr)); if (sa->sa_family == AF_INET) { vxladdr->in4 = *satoconstsin(sa); vxladdr->in4.sin_len = sizeof(struct sockaddr_in); } else if (sa->sa_family == AF_INET6) { vxladdr->in6 = *satoconstsin6(sa); vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); } } static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr, const struct sockaddr *sa) { int equal; if (sa->sa_family == AF_INET) { const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr; } else if (sa->sa_family == AF_INET6) { const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr); } else equal = 0; return (equal); } static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr, const struct sockaddr *sa) { MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); if (sa->sa_family == AF_INET) { const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; vxladdr->in4.sin_family = AF_INET; vxladdr->in4.sin_len = sizeof(struct sockaddr_in); vxladdr->in4.sin_addr = *in4; } else if (sa->sa_family == AF_INET6) { const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; vxladdr->in6.sin6_family = AF_INET6; vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); vxladdr->in6.sin6_addr = *in6; } } static int vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec) { const struct sockaddr *sa; int supported; sa = &vxladdr->sa; supported = 0; if (sa->sa_family == AF_UNSPEC && unspec != 0) { supported = 1; } else if (sa->sa_family == AF_INET) { #ifdef INET supported = 1; #endif } else if (sa->sa_family == AF_INET6) { #ifdef INET6 supported = 1; #endif } return (supported); } static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr) { const struct sockaddr *sa; int any; sa = &vxladdr->sa; if (sa->sa_family == AF_INET) { const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; any = in4->s_addr == INADDR_ANY; } else if (sa->sa_family == AF_INET6) { const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; any = IN6_IS_ADDR_UNSPECIFIED(in6); } else any = -1; return (any); } static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr) { const struct sockaddr *sa; int mc; sa = &vxladdr->sa; if (sa->sa_family == AF_INET) { const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; mc = IN_MULTICAST(ntohl(in4->s_addr)); } else if (sa->sa_family == AF_INET6) { const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; mc = IN6_IS_ADDR_MULTICAST(in6); } else mc = -1; return (mc); } static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr) { int error; MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr)); #ifdef INET6 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone); #else error = EAFNOSUPPORT; #endif return (error); } static int vxlan_can_change_config(struct vxlan_softc *sc) { struct ifnet *ifp; ifp = sc->vxl_ifp; VXLAN_LOCK_ASSERT(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN)) return (0); return (1); } static int vxlan_check_vni(uint32_t vni) { return (vni >= VXLAN_VNI_MAX); } static int vxlan_check_ttl(int ttl) { return (ttl > MAXTTL); } static int vxlan_check_ftable_timeout(uint32_t timeout) { return (timeout > VXLAN_FTABLE_MAX_TIMEOUT); } static int vxlan_check_ftable_max(uint32_t max) { return (max > VXLAN_FTABLE_MAX); } static void vxlan_sysctl_setup(struct vxlan_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct vxlan_statistics *stats; char namebuf[8]; ctx = &sc->vxl_sysctl_ctx; stats = &sc->vxl_stats; snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit); sysctl_ctx_init(ctx); sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count", CTLFLAG_RD, &sc->vxl_ftable_cnt, 0, "Number of entries in forwarding table"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max", CTLFLAG_RD, &sc->vxl_ftable_max, 0, "Maximum number of entries allowed in forwarding table"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout", CTLFLAG_RD, &sc->vxl_ftable_timeout, 0, "Number of seconds between prunes of the forwarding table"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, sc, 0, vxlan_ftable_sysctl_dump, "A", "Dump the forwarding table entries"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0, "Fowarding table reached maximum entries"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "ftable_lock_upgrade_failed", CTLFLAG_RD, &stats->ftable_lock_upgrade_failed, 0, "Forwarding table update required lock upgrade"); SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum", CTLFLAG_RD, &stats->txcsum, "# of times hardware assisted with tx checksum"); SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso", CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO"); SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum", CTLFLAG_RD, &stats->rxcsum, "# of times hardware assisted with rx checksum"); } static void vxlan_sysctl_destroy(struct vxlan_softc *sc) { sysctl_ctx_free(&sc->vxl_sysctl_ctx); sc->vxl_sysctl_node = NULL; } static int vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def) { char path[64]; snprintf(path, sizeof(path), "net.link.vxlan.%d.%s", sc->vxl_unit, knob); TUNABLE_INT_FETCH(path, &def); return (def); } static void vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp) { struct vxlan_softc_head list; struct vxlan_socket *vso; struct vxlan_softc *sc, *tsc; LIST_INIT(&list); if (ifp->if_flags & IFF_RENAMING) return; if ((ifp->if_flags & IFF_MULTICAST) == 0) return; VXLAN_LIST_LOCK(); LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) vxlan_socket_ifdetach(vso, ifp, &list); VXLAN_LIST_UNLOCK(); LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) { LIST_REMOVE(sc, vxl_ifdetach_list); sx_xlock(&vxlan_sx); VXLAN_WLOCK(sc); if (sc->vxl_flags & VXLAN_FLAG_INIT) vxlan_init_wait(sc); vxlan_teardown_locked(sc); sx_xunlock(&vxlan_sx); } } static void vxlan_load(void) { mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF); LIST_INIT(&vxlan_socket_list); vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY); vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create, vxlan_clone_destroy, 0); } static void vxlan_unload(void) { EVENTHANDLER_DEREGISTER(ifnet_departure_event, vxlan_ifdetach_event_tag); if_clone_detach(vxlan_cloner); mtx_destroy(&vxlan_list_mtx); MPASS(LIST_EMPTY(&vxlan_socket_list)); } static int vxlan_modevent(module_t mod, int type, void *unused) { int error; error = 0; switch (type) { case MOD_LOAD: vxlan_load(); break; case MOD_UNLOAD: vxlan_unload(); break; default: error = ENOTSUP; break; } return (error); } static moduledata_t vxlan_mod = { "if_vxlan", vxlan_modevent, 0 }; DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_vxlan, 1);