diff --git a/share/man/man9/ifnet.9 b/share/man/man9/ifnet.9 index ac059b59eb81..3c45e4f29e2d 100644 --- a/share/man/man9/ifnet.9 +++ b/share/man/man9/ifnet.9 @@ -1,1703 +1,1695 @@ .\" -*- Nroff -*- .\" Copyright 1996, 1997 Massachusetts Institute of Technology .\" .\" Permission to use, copy, modify, and distribute this software and .\" its documentation for any purpose and without fee is hereby .\" granted, provided that both the above copyright notice and this .\" permission notice appear in all copies, that both the above .\" copyright notice and this permission notice appear in all .\" supporting documentation, and that the name of M.I.T. not be used .\" in advertising or publicity pertaining to distribution of the .\" software without specific, written prior permission. M.I.T. makes .\" no representations about the suitability of this software for any .\" purpose. It is provided "as is" without express or implied .\" warranty. .\" .\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS .\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, .\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF .\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT .\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, .\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT .\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF .\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND .\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd September 29, 2025 +.Dd December 10, 2024 .Dt IFNET 9 .Os .Sh NAME .Nm if_t , .Nm ifnet , .Nm ifaddr , .Nm ifqueue , .Nm if_data .Nd kernel interfaces for manipulating network interfaces .Sh SYNOPSIS .In sys/param.h .In sys/time.h .In sys/socket.h .In net/if.h .In net/if_var.h .In net/if_types.h .\" .Ss "Interface Manipulation Functions" .Ft "if_t" .Fn if_alloc "u_char type" .Ft "if_t" .Fn if_alloc_dev "u_char type" "device_t dev" .Ft "if_t" .Fn if_alloc_domain "u_char type" "int numa_domain" .Ft void .Fn if_attach "if_t ifp" .Ft void .Fn if_detach "if_t ifp" .Ft void .Fn if_free "if_t ifp" .Ft void .Fn if_free_type "if_t ifp" "u_char type" .Ft void .Fn if_down "if_t ifp" .Ft int .Fn ifioctl "struct socket *so" "u_long cmd" "caddr_t data" "struct thread *td" .Ft int .Fn ifpromisc "if_t ifp" "int pswitch" .Ft int .Fn if_allmulti "if_t ifp" "int amswitch" .Ft "if_t" .Fn ifunit "const char *name" .Ft "if_t" .Fn ifunit_ref "const char *name" .Ft void .Fn if_up "if_t ifp" .\" .Ss "Interface Address Functions" .Ft "struct ifaddr *" .Fn ifa_ifwithaddr "struct sockaddr *addr" .Ft "struct ifaddr *" .Fn ifa_ifwithdstaddr "struct sockaddr *addr" "int fib" .Ft "struct ifaddr *" .Fn ifa_ifwithnet "struct sockaddr *addr" "int ignore_ptp" "int fib" .Ft "struct ifaddr *" .Fn ifaof_ifpforaddr "struct sockaddr *addr" "if_t ifp" .Ft void .Fn ifa_ref "struct ifaddr *ifa" .Ft void .Fn ifa_free "struct ifaddr *ifa" .\" .Ss "Interface Multicast Address Functions" .Ft int .Fn if_addmulti "if_t ifp" "struct sockaddr *sa" "struct ifmultiaddr **ifmap" .Ft int .Fn if_delmulti "if_t ifp" "struct sockaddr *sa" .Ft "struct ifmultiaddr *" .Fn if_findmulti "if_t ifp" "struct sockaddr *sa" .Ss "Output queue accessors" .Fn if_dequeue "if_t ifp" "struct mbuf *m" .Ss "Output queue macros" .Fn IF_DEQUEUE "struct ifqueue *ifq" "struct mbuf *m" .\" .Ss "if_t accesors" .Ft uint64_t .Fn if_setbaudrate "if_t ifp" "uint64_t baudrate" .Ft uint64_t .Fn if_getbaudrate "const if_t ifp" .Ft int .Fn if_setcapabilities "if_t ifp" "int capabilities" .Ft int .Fn if_setcapabilitiesbit "if_t ifp" "int setbit" "int clearbit" .Ft int .Fn if_getcapabilities "const if_t ifp" .Ft int .Fn if_togglecapenable "if_t ifp" "int togglecap" .Ft int .Fn if_setcapenable "if_t ifp" "int capenable" .Ft int .Fn if_setcapenablebit "if_t ifp" "int setcap" "int clearcap" .Ft int .Fn if_getcapenable "const if_t ifp" .Ft int .Fn if_setcapabilities2 "if_t ifp" "int capabilities" .Ft int .Fn if_setcapabilities2bit "if_t ifp" "int setbit" "int clearbit" .Ft int .Fn if_getcapabilities2 "const if_t ifp" .Ft int .Fn if_togglecapenable2 "if_t ifp" "int togglecap" .Ft int .Fn if_setcapenable2 "if_t ifp" "int capenable" .Ft int .Fn if_setcapenable2bit "if_t ifp" "int setcap" "int clearcap" .Ft int .Fn if_getcapenable2 "const if_t ifp" .Ft int .Fn if_getdunit "const if_t ifp" .Ft int .Fn if_getindex "const if_t ifp" .Ft int .Fn if_getidxgen "const if_t ifp" .Ft const char * .Fn if_getdname "const if_t ifp" .Ft void .Fn if_setdname "if_t ifp" "const char *name" .Ft const char * .Fn if_name "if_t ifp" .Ft int .Fn if_setname "if_t ifp" "const char *name" .Ft void .Fn if_setdescr "if_t ifp" "char *descrbuf" .Ft char * .Fn if_allocdescr "size_t sz" "int malloc_flag" .Ft void .Fn if_freedescr "char *descrbuf" .Ft int .Fn if_getalloctype "const if_t ifp" .Ft int .Fn if_gettype "const if_t ifp" .Ft int .Fn if_setdev "if_t ifp" "void *dev" .Ft int .Fn if_setdrvflagbits "if_t ifp" "int if_setflags" "int clear_flags" .Ft int .Fn if_getdrvflags "const if_t ifp" .Ft int .Fn if_setdrvflags "if_t ifp" "int flags" .Ft int .Fn if_getlinkstate "if_t ifp" .Ft int .Fn if_clearhwassist "if_t ifp" .Ft int .Fn if_sethwassistbits "if_t ifp" "int toset" "int toclear" .Ft int .Fn if_sethwassist "if_t ifp" "int hwassist_bit" .Ft int .Fn if_gethwassist "const if_t ifp" .Ft int .Fn if_togglehwassist "if_t ifp" "int toggle_bits" .Ft int .Fn if_setsoftc "if_t ifp" "void *softc" .Ft void * .Fn if_getsoftc "if_t ifp" .Ft void .Fn if_setllsoftc "if_t ifp" "void *softc" .Ft void * .Fn if_getllsoftc "if_t ifp" .Ft u_int .Fn if_getfib "if_t ifp" .Ft uint8_t .Fn if_getaddrlen "if_t ifp" .Ft int .Fn if_gethwaddr "const if_t ifp" "struct ifreq *" .Ft const uint8_t * .Fn if_getbroadcastaddr "const if_t ifp" .Ft void .Fn if_setbroadcastaddr "if_t ifp" "const uint8_t *" .Ft int .Fn if_setmtu "if_t ifp" "int mtu" .Ft int .Fn if_getmtu "const if_t ifp" .Ft int .Fn if_getmtu_family "const if_t ifp" "int family" .Ft void .Fn if_notifymtu "if_t ifp" .Ft int .Fn if_setflagbits "if_t ifp" "int set" "int clear" .Ft int .Fn if_setflags "if_t ifp" "int flags" .Ft int .Fn if_getflags "const if_t ifp" .Ft int .Fn if_getnumadomain "if_t ifp" .Ft int .Fn if_sendq_empty "if_t ifp" .Ft int .Fn if_setsendqready "if_t ifp" .Ft int .Fn if_setsendqlen "if_t ifp" "int tx_desc_count" .Ft int .Fn if_sethwtsomax "if_t ifp" "u_int if_hw_tsomax" .Ft int .Fn if_sethwtsomaxsegcount "if_t ifp" "u_int if_hw_tsomaxsegcount" .Ft int .Fn if_sethwtsomaxsegsize "if_t ifp" "u_int if_hw_tsomaxsegsize" .Ft u_int .Fn if_gethwtsomax "const if_t ifp" .Ft u_int .Fn if_gethwtsomaxsegcount "const if_t ifp" .Ft u_int .Fn if_gethwtsomaxsegsize "const if_t ifp" .Ft void .Fn if_setnetmapadapter "if_t ifp" "struct netmap_adapter *na" .Ft struct netmap_adapter * .Fn if_getnetmapadapter "if_t ifp" .Ft void .Fn if_input "if_t ifp" "struct mbuf* sendmp" .Ft int .Fn if_sendq_prepend "if_t ifp" "struct mbuf *m" .Ft struct mbuf * .Fn if_dequeue "if_t ifp" .Ft int .Fn if_setifheaderlen "if_t ifp" "int len" .Ft void .Fn if_setrcvif "struct mbuf *m" "if_t ifp" .Ft void .Fn if_setvtag "struct mbuf *m" "u_int16_t tag" .Ft u_int16_t .Fn if_getvtag "struct mbuf *m" .Ft int .Fn if_vlantrunkinuse "if_t ifp" .Ft caddr_t .Fn if_getlladdr "const if_t ifp" .Ft struct vnet * .Fn if_getvnet "const if_t ifp" -.Ft struct vnet * -.Fn if_gethomevnet "const if_t ifp" .Ft void * .Fn if_gethandle "u_char" .Ft void .Fn if_bpfmtap "if_t ifp" "struct mbuf *m" .Ft void .Fn if_etherbpfmtap "if_t ifp" "struct mbuf *m" .Ft void .Fn if_vlancap "if_t ifp" .Ft int .Fn if_transmit "if_t ifp" "struct mbuf *m" .Ft void .Fn if_init "if_t ifp" "void *ctx" .Ft int .Fn if_resolvemulti "if_t ifp" "struct sockaddr **" "struct sockaddr *" .Ft uint64_t .Fn if_getcounter "if_t ifp" "ift_counter counter" .Ft struct label * .Fn if_getmaclabel "if_t ifp" .Ft void .Fn if_setmaclabel "if_t ifp" "struct label *label" .Ft struct bpf_if * .Fn if_getbpf "if_t ifp" .Ft uint8_t .Fn if_getpcp "if_t ifp" .Ft void * .Fn if_getl2com "if_t ifp" .Ft struct ifvlantrunk * .Fn if_getvlantrunk "if_t ifp" .Ft bool .Fn if_altq_is_enabled "if_t ifp" .\" .Ss "struct ifnet Member Functions" .Ft void .Fn \*(lp*if_input\*(rp "if_t ifp" "struct mbuf *m" .Ft int .Fo \*(lp*if_output\*(rp .Fa "if_t ifp" "struct mbuf *m" .Fa "const struct sockaddr *dst" "struct route *ro" .Fc .Ft void .Fn \*(lp*if_start\*(rp "if_t ifp" .Ft int .Fn \*(lp*if_transmit\*(rp "if_t ifp" "struct mbuf *m" .Ft void .Fn \*(lp*if_qflush\*(rp "if_t ifp" .Ft int .Fn \*(lp*if_ioctl\*(rp "if_t ifp" "u_long cmd" "caddr_t data" .Ft void .Fn \*(lp*if_init\*(rp "void *if_softc" .Ft int .Fo \*(lp*if_resolvemulti\*(rp .Fa "if_t ifp" "struct sockaddr **retsa" "struct sockaddr *addr" .Fc .Ss "struct ifaddr member function" .Ft void .Fo \*(lp*ifa_rtrequest\*(rp .Fa "int cmd" "struct rtentry *rt" "struct rt_addrinfo *info" .Fc .\" .Ss "Global Variables" .Vt extern struct ifnethead ifnet ; .\" extern struct ifindex_entry *ifindex_table ; .Vt extern int if_index ; .Vt extern int ifqmaxlen ; .Sh DATA STRUCTURES The kernel mechanisms for handling network interfaces reside primarily in the .Vt ifnet , if_data , ifaddr , and .Vt ifmultiaddr structures in .In net/if.h and .In net/if_var.h and the functions named above and defined in .Pa /sys/net/if.c . Those interfaces which are intended to be used by user programs are defined in .In net/if.h ; these include the interface flags, the .Vt if_data structure, and the structures defining the appearance of interface-related messages on the .Xr route 4 routing socket and in .Xr sysctl 3 . The header file .In net/if_var.h defines the kernel-internal interfaces, including the .Vt ifnet , ifaddr , and .Vt ifmultiaddr structures and the functions which manipulate them. (A few user programs will need .In net/if_var.h because it is the prerequisite of some other header file like .In netinet/if_ether.h . Most references to those two files in particular can be replaced by .In net/ethernet.h . ) .Pp The system keeps a linked list of interfaces using the .Li TAILQ macros defined in .Xr queue 3 ; this list is headed by a .Vt "struct ifnethead" called .Va ifnet . The elements of this list are of type .Vt "struct ifnet" , and most kernel routines which manipulate interface as such accept or return pointers to these structures. Each interface structure contains an .Vt if_data structure used for statistics and information. Each interface also has a .Li TAILQ of interface addresses, described by .Vt ifaddr structures. An .Dv AF_LINK address (see .Xr link_addr 3 ) describing the link layer implemented by the interface (if any) is accessed by the .Va if_addr structure. (Some trivial interfaces do not provide any link layer addresses; this structure, while still present, serves only to identify the interface name and index.) .Pp Finally, those interfaces supporting reception of multicast datagrams have a .Li TAILQ of multicast group memberships, described by .Vt ifmultiaddr structures. These memberships are reference-counted. .Pp Interfaces are also associated with an output queue, defined as a .Vt "struct ifqueue" ; this structure is used to hold packets while the interface is in the process of sending another. .Ss The ifnet accessors The accessors for .Vt "if_t" are as follows: .Bl -tag -width indent -offset indent .It Fn if_getbaudrate Fn if_setbaudrate .Pq Vt u_long The line rate of the interface, in bits per second. .It Fn if_setcapabilities Fn if_setcapabilitiesbit Fn if_getcapabilities .Pq Vt int Flags describing the capabilities the interface supports (see below). .It Fn if_getcapenable Fn if_setcapenable Fn if_setcapenablebit Fn if_togglecapenable .Pq Vt int Flags describing the enabled capabilities of the interface (see below). .It Fn if_getcapabilities2 Fn if_setcapabilities2 Fn if_setcapabilities2bit .It Fn if_getcapenable2 Fn if_setcapenable2 Fn if_setcapenable2bit Fn if_togglecapenable2 .It Fn if_getdunit .Pq Vt int A unique number assigned to each interface managed by a particular driver. Drivers may choose to set this to .Dv IF_DUNIT_NONE if a unit number is not associated with the device. (Initialized by driver, usually via .Fn if_initname . ) .It Fn if_getindex .Pq Vt u_short Return the unique number assigned to the device when attached. This number can be used in a .Vt "struct sockaddr_dl" to refer to a particular interface by index (see .Xr link_addr 3 ) . This is initialized by .Fn if_alloc . .It Fn if_getidxgen .It Fn if_getdname Fn if_setdname .Pq Ft "const char *" The name of the driver. This is initialized by driver (usually via .Fn if_initname ) . .It Fn if_name Fn if_setname .Pq Vt "char *" The name of the interface, (e.g., .Ql fxp0 or .Dq Li lo0 ) . This is initialized by driver, usually via .Fn if_initname . .It Fn if_getalloctype .Pq Ft u_char The type of the interface as it was at the time of its allocation. It is used to cache the type passed to .Fn if_alloc , but unlike .Va if_type , it would not be changed by drivers. .It Fn if_gettype .It Fn if_setdev .It Fn if_getdrvflags Fn if_setdrvflags Fn if_setdrvflagbits .It Fn if_getlinkstate .It Fn if_clearhwassist Fn if_sethwassistbits .Fn if_gethwassist Fn if_sethwassist Fn if_togglehwassist .Pq Vt u_long A detailed interpretation of the capabilities to offload computational tasks for .Em outgoing packets. The interface driver must keep this field in accord with the current value of .Va if_capenable . .It Fn if_getsoftc Fn if_setsoftc .Pq Ft "void *" A pointer to the driver's private state block. This is initialized by driver at attach. .It Fn if_setllsoftc .It Fn if_getllsoftc .It Fn if_getfib .It Fn if_getaddrlen .It Fn if_gethwaddr .It Fn if_getbroadcastaddr Fn if_setbroadcastaddr Access the interface broadcast addess. .It Fn if_setmtu .It Fn if_getmtu Access the interface MTU. .It Fn if_setflags Fn if_getflags Fn if_setflagbits .Pq Vt int Flags describing operational parameters of this interface (see below). These flags are manipulated by generic code. .It Fn if_getnumadomain .Pq Vt uint8_t The NUMA domain of the hardware device associated with the interface. This is filled in with a wildcard value unless the kernel is NUMA aware, the system is a NUMA system, and the ifnet is allocated using .Fn if_alloc_dev or .Fn if_alloc_domain . .It Fn if_sendq_empty .It Fn if_setsendqready .It Fn if_setsendqlen .It Fn if_sethwtsomax Fn if_gethwtsomax .It Fn if_sethwtsomaxsegcount Fn if_gethwtsomaxsegcount .It Fn if_sethwtsomaxsegsize Fn if_gethwtsomaxsegsize .It Fn if_setnetmapadapter Fn if_getnetmapadapter .It Fn if_setifheaderlen .It Fn if_setrcvif .It Fn if_setvtag Fn if_getvtag .It Fn if_vlantrunkinuse .It Fn if_getlladdr .It Fn if_getvnet .Pq Vt "struct vnet *" A pointer to the virtual network stack instance. This is initialized by .Fn if_attach . -.It Fn if_gethomevnet -.Pq Vt "struct vnet *" -A pointer to the parent virtual network stack, -where this struct ifnet originates from. -This is initialized by -.Fn if_attach . .It Fn if_gethandle .It Fn if_vlancap .It Fn if_getcounter .It Fn if_getmaclabel Fn if_setmaclabel .It Fn if_getbpf .Pq Ft "struct bpf_if *" Opaque per-interface data for the packet filter, .Xr bpf 4 . This is initialized by .Fn bpf_attach . .It Fn if_getpcp .It Fn if_getl2com A pointer to the common data for the interface's layer 2 protocol. This is initialized by .Fn if_alloc . .Fn if_getvlantrunk "if_t ifp" .Pq Ft struct ifvlantrunk * A pointer to 802.1Q trunk structure, .Xr vlan 4 . This is initialized by the driver-specific .Fn if_ioctl routine. .It Fn if_getdrvflags Fn if_setdrvflags Fn if_setdrvflagbits .Pq Ft int Flags describing operational status of this interface (see below). These flags are manipulated by driver. .It Fn if_addmulti Fn if_delmulti Fn if_findmulti Add, remove, and find multicast addresses assigned to this interface. .It Fn if_getifaddr .Pq Vt "struct ifaddr *" Get a pointer to the interface's link-level address. .It Fn if_getbroadcastaddr Fn if_setbroadcastaddr .Pq Ft "const u_int8_t *" A link-level broadcast bytestring for protocols with variable address length. .It Fn if_getafdata .Pq Ft "void *" An address family dependent data region. .It Fn if_addgroup Fn if_delgroup Add and delete groups from the interface. .El .Pp References to .Vt ifnet structures are gained by calling the .Fn if_ref function and released by calling the .Fn if_rele function. They are used to allow kernel code walking global interface lists to release the .Vt ifnet lock yet keep the .Vt ifnet structure stable. .Pp There are in addition a number of function pointers which the driver must initialize to complete its interface with the generic interface layer: .Bl -ohang -offset indent .It Fn if_input Pass a packet to an appropriate upper layer as determined from the link-layer header of the packet. This routine is to be called from an interrupt handler or used to emulate reception of a packet on this interface. A single function implementing .Fn if_input can be shared among multiple drivers utilizing the same link-layer framing, e.g., Ethernet. .It Fn if_output Output a packet on interface .Fa ifp , or queue it on the output queue if the interface is already active. .It Fn if_transmit Transmit a packet on an interface or queue it if the interface is in use. This function will return .Dv ENOBUFS if the devices software and hardware queues are both full. This function must be installed after .Fn if_attach to override the default implementation. This function is exposed in order to allow drivers to manage their own queues and to reduce the latency caused by a frequently gratuitous enqueue / dequeue pair to ifq. The suggested internal software queuing mechanism is buf_ring. .It Fn if_qflush Free mbufs in internally managed queues when the interface is marked down. This function must be installed after .Fn if_attach to override the default implementation. This function is exposed in order to allow drivers to manage their own queues and to reduce the latency caused by a frequently gratuitous enqueue / dequeue pair to ifq. The suggested internal software queuing mechanism is buf_ring. .It Fn if_start Start queued output on an interface. This function is exposed in order to provide for some interface classes to share a .Fn if_output among all drivers. .Fn if_start may only be called when the .Dv IFF_DRV_OACTIVE flag is not set. (Thus, .Dv IFF_DRV_OACTIVE does not literally mean that output is active, but rather that the device's internal output queue is full.) Please note that this function will soon be deprecated. .It Fn if_ioctl Process interface-related .Xr ioctl 2 requests (defined in .In sys/sockio.h ) . Preliminary processing is done by the generic routine .Fn ifioctl to check for appropriate privileges, locate the interface being manipulated, and perform certain generic operations like twiddling flags and flushing queues. See the description of .Fn ifioctl below for more information. .It Fn if_init Initialize and bring up the hardware, e.g., reset the chip and enable the receiver unit. Should mark the interface running, but not active .Dv ( IFF_DRV_RUNNING , ~IIF_DRV_OACTIVE ) . .It Fn if_resolvemulti Check the requested multicast group membership, .Fa addr , for validity, and if necessary compute a link-layer group which corresponds to that address which is returned in .Fa *retsa . Returns zero on success, or an error code on failure. .El .Ss "Interface Flags" Interface flags are used for a number of different purposes. Some flags simply indicate information about the type of interface and its capabilities; others are dynamically manipulated to reflect the current state of the interface. Flags of the former kind are marked .Aq S in this table; the latter are marked .Aq D . Flags which begin with .Dq IFF_DRV_ are stored in .Va if_drv_flags ; all other flags are stored in .Va if_flags . .Pp The macro .Dv IFF_CANTCHANGE defines the bits which cannot be set by a user program using the .Dv SIOCSIFFLAGS command to .Xr ioctl 2 ; these are indicated by an asterisk .Pq Ql * in the following listing. .Pp .Bl -tag -width ".Dv IFF_POINTOPOINT" -offset indent -compact .It Dv IFF_UP .Aq D The interface has been configured up by the user-level code. .It Dv IFF_BROADCAST .Aq S* The interface supports broadcast. .It Dv IFF_DEBUG .Aq D Used to enable/disable driver debugging code. .It Dv IFF_LOOPBACK .Aq S The interface is a loopback device. .It Dv IFF_POINTOPOINT .Aq S* The interface is point-to-point; .Dq broadcast address is actually the address of the other end. .It Dv IFF_DRV_RUNNING .Aq D* The interface has been configured and dynamic resources were successfully allocated. Probably only useful internal to the interface. .It Dv IFF_NOARP .Aq D Disable network address resolution on this interface. .It Dv IFF_PROMISC .Aq D* This interface is in promiscuous mode. .It Dv IFF_PPROMISC .Aq D This interface is in the permanently promiscuous mode (implies .Dv IFF_PROMISC ) . .It Dv IFF_ALLMULTI .Aq D* This interface is in all-multicasts mode (used by multicast routers). .It Dv IFF_PALLMULTI .Aq D This interface is in the permanently all-multicasts mode (implies .Dv IFF_ALLMULTI ) . .It Dv IFF_DRV_OACTIVE .Aq D* The interface's hardware output queue (if any) is full; output packets are to be queued. .It Dv IFF_SIMPLEX .Aq S* The interface cannot hear its own transmissions. .It Dv IFF_LINK0 .It Dv IFF_LINK1 .It Dv IFF_LINK2 .Aq D Control flags for the link layer. (Currently abused to select among multiple physical layers on some devices.) .It Dv IFF_MULTICAST .Aq S* This interface supports multicast. .It Dv IFF_CANTCONFIG .Aq S* The interface is not configurable in a meaningful way. Primarily useful for .Dv IFT_USB interfaces registered at the interface list. .It Dv IFF_MONITOR .Aq D This interface blocks transmission of packets and discards incoming packets after BPF processing. Used to monitor network traffic but not interact with the network in question. .It Dv IFF_STATICARP .Aq D Used to enable/disable ARP requests on this interface. .It Dv IFF_DYING .Aq D* Set when the .Vt ifnet structure of this interface is being released and still has .Va if_refcount references. .It Dv IFF_RENAMING .Aq D Set when this interface is being renamed. .El .Ss "Interface Capabilities Flags" Interface capabilities are specialized features an interface may or may not support. These capabilities are very hardware-specific and allow, when enabled, to offload specific network processing to the interface or to offer a particular feature for use by other kernel parts. .Pp It should be stressed that a capability can be completely uncontrolled (i.e., stay always enabled with no way to disable it) or allow limited control over itself (e.g., depend on another capability's state.) Such peculiarities are determined solely by the hardware and driver of a particular interface. Only the driver possesses the knowledge on whether and how the interface capabilities can be controlled. Consequently, capabilities flags in .Va if_capenable should never be modified directly by kernel code other than the interface driver. The command .Dv SIOCSIFCAP to .Fn ifioctl is the dedicated means to attempt altering .Va if_capenable on an interface. Userland code shall use .Xr ioctl 2 . .Pp The following capabilities are currently supported by the system: .Bl -tag -width ".Dv IFCAP_VLAN_HWTAGGING" -offset indent .It Dv IFCAP_RXCSUM This interface can do checksum validation on receiving data. Some interfaces do not have sufficient buffer storage to store frames above a certain MTU-size completely. The driver for the interface might disable hardware checksum validation if the MTU is set above the hardcoded limit. .It Dv IFCAP_TXCSUM This interface can do checksum calculation on transmitting data. .It Dv IFCAP_HWCSUM A shorthand for .Pq Dv IFCAP_RXCSUM | IFCAP_TXCSUM . .It Dv IFCAP_NETCONS This interface can be a network console. .It Dv IFCAP_VLAN_MTU The .Xr vlan 4 driver can operate over this interface in software tagging mode without having to decrease MTU on .Xr vlan 4 interfaces below 1500 bytes. This implies the ability of this interface to cope with frames somewhat longer than permitted by the Ethernet specification. .It Dv IFCAP_VLAN_HWTAGGING This interface can do VLAN tagging on output and demultiplex frames by their VLAN tag on input. .It Dv IFCAP_JUMBO_MTU This Ethernet interface can transmit and receive frames up to 9000 bytes long. .It Dv IFCAP_POLLING This interface supports .Xr polling 4 . See below for details. .It Dv IFCAP_VLAN_HWCSUM This interface can do checksum calculation on both transmitting and receiving data on .Xr vlan 4 interfaces (implies .Dv IFCAP_HWCSUM ) . .It Dv IFCAP_TSO4 This Ethernet interface supports TCP4 Segmentation offloading. .It Dv IFCAP_TSO6 This Ethernet interface supports TCP6 Segmentation offloading. .It Dv IFCAP_TSO A shorthand for .Pq Dv IFCAP_TSO4 | IFCAP_TSO6 . .It Dv IFCAP_TOE4 This Ethernet interface supports TCP4 Offload Engine. .It Dv IFCAP_TOE6 This Ethernet interface supports TCP6 Offload Engine. .It Dv IFCAP_TOE A shorthand for .Pq Dv IFCAP_TOE4 | IFCAP_TOE6 . .It Dv IFCAP_WOL_UCAST This Ethernet interface supports waking up on any Unicast packet. .It Dv IFCAP_WOL_MCAST This Ethernet interface supports waking up on any Multicast packet. .It Dv IFCAP_WOL_MAGIC This Ethernet interface supports waking up on any Magic packet such as those sent by .Xr wake 8 . .It Dv IFCAP_WOL A shorthand for .Pq Dv IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC . .It Dv IFCAP_VLAN_HWFILTER This interface supports frame filtering in hardware on .Xr vlan 4 interfaces. .It Dv IFCAP_VLAN_HWTSO This interface supports TCP Segmentation offloading on .Xr vlan 4 interfaces (implies .Dv IFCAP_TSO ) . .It Dv IFCAP_LINKSTATE This Ethernet interface supports dynamic link state changes. .It Dv IFCAP_NETMAP This Ethernet interface supports .Xr netmap 4 . .El .Pp The ability of advanced network interfaces to offload certain computational tasks from the host CPU to the board is limited mostly to TCP/IP. Therefore a separate field associated with an interface (see .Va ifnet.if_data.ifi_hwassist below) keeps a detailed description of its enabled capabilities specific to TCP/IP processing. The TCP/IP module consults the field to see which tasks can be done on an .Em outgoing packet by the interface. The flags defined for that field are a superset of those for .Va mbuf.m_pkthdr.csum_flags , namely: .Bl -tag -width ".Dv CSUM_FRAGMENT" -offset indent .It Dv CSUM_IP The interface will compute IP checksums. .It Dv CSUM_TCP The interface will compute TCP checksums. .It Dv CSUM_UDP The interface will compute UDP checksums. .El .Pp An interface notifies the TCP/IP module about the tasks the former has performed on an .Em incoming packet by setting the corresponding flags in the field .Va mbuf.m_pkthdr.csum_flags of the .Vt mbuf chain containing the packet. See .Xr mbuf 9 for details. .Pp The capability of a network interface to operate in .Xr polling 4 mode involves several flags in different global variables and per-interface fields. The capability flag .Dv IFCAP_POLLING set in interface's .Va if_capabilities indicates support for .Xr polling 4 on the particular interface. If set in .Va if_capabilities , the same flag can be marked or cleared in the interface's .Va if_capenable within .Fn ifioctl , thus initiating switch of the interface to .Xr polling 4 mode or interrupt mode, respectively. The actual mode change is managed by the driver-specific .Fn if_ioctl routine. The .Xr polling 4 handler returns the number of packets processed. .Ss The if_data Structure The .Vt if_data structure contains statistics and identifying information used by management programs, and which is exported to user programs by way of the .Xr ifmib 4 branch of the .Xr sysctl 3 MIB. The following elements of the .Vt if_data structure are initialized by the interface and are not expected to change significantly over the course of normal operation: .Bl -tag -width ".Va ifi_lastchange" -offset indent .It Va ifi_type .Pq Vt u_char The type of the interface, as defined in .In net/if_types.h and described below in the .Sx "Interface Types" section. .It Va ifi_physical .Pq Vt u_char Intended to represent a selection of physical layers on devices which support more than one; never implemented. .It Va ifi_addrlen .Pq Vt u_char Length of a link-layer address on this device, or zero if there are none. Used to initialized the address length field in .Vt sockaddr_dl structures referring to this interface. .It Va ifi_hdrlen .Pq Vt u_char Maximum length of any link-layer header which might be prepended by the driver to a packet before transmission. The generic code computes the maximum over all interfaces and uses that value to influence the placement of data in .Vt mbuf Ns s to attempt to ensure that there is always sufficient space to prepend a link-layer header without allocating an additional .Vt mbuf . .It Va ifi_datalen .Pq Vt u_char Length of the .Vt if_data structure. Allows some stabilization of the routing socket ABI in the face of increases in the length of .Vt struct ifdata . .It Va ifi_mtu .Pq Vt u_long The maximum transmission unit of the medium, exclusive of any link-layer overhead. .It Va ifi_metric .Pq Vt u_long A dimensionless metric interpreted by a user-mode routing process. .It Va ifi_epoch .Pq Vt time_t The system uptime when interface was attached or the statistics below were reset. This is intended to be used to set the SNMP variable .Va ifCounterDiscontinuityTime . It may also be used to determine if two successive queries for an interface of the same index have returned results for the same interface. .El .Pp The structure additionally contains generic statistics applicable to a variety of different interface types (except as noted, all members are of type .Vt u_long ) : .Bl -tag -width ".Va ifi_lastchange" -offset indent .It Va ifi_link_state .Pq Vt u_char The current link state of Ethernet interfaces. See the .Sx Interface Link States section for possible values. .It Va ifi_ipackets Number of packets received. .It Va ifi_ierrors Number of receive errors detected (e.g., FCS errors, DMA overruns, etc.). More detailed breakdowns can often be had by way of a link-specific MIB. .It Va ifi_opackets Number of packets transmitted. .It Va ifi_oerrors Number of output errors detected (e.g., late collisions, DMA overruns, etc.). More detailed breakdowns can often be had by way of a link-specific MIB. .It Va ifi_collisions Total number of collisions detected on output for CSMA interfaces. (This member is sometimes [ab]used by other types of interfaces for other output error counts.) .It Va ifi_ibytes Total traffic received, in bytes. .It Va ifi_obytes Total traffic transmitted, in bytes. .It Va ifi_imcasts Number of packets received which were sent by link-layer multicast. .It Va ifi_omcasts Number of packets sent by link-layer multicast. .It Va ifi_iqdrops Number of packets dropped on input. Rarely implemented. .It Va ifi_oqdrops Number of packets dropped on output. .It Va ifi_noproto Number of packets received for unknown network-layer protocol. .It Va ifi_lastchange .Pq Vt "struct timeval" The time of the last administrative change to the interface (as required for SNMP ) . .El .Ss Interface Types The header file .In net/if_types.h defines symbolic constants for a number of different types of interfaces. The most common are: .Pp .Bl -tag -offset indent -width ".Dv IFT_PROPVIRTUAL" -compact .It Dv IFT_OTHER none of the following .It Dv IFT_ETHER Ethernet .It Dv IFT_ISO88023 ISO 8802-3 CSMA/CD .It Dv IFT_ISO88024 ISO 8802-4 Token Bus .It Dv IFT_ISO88025 ISO 8802-5 Token Ring .It Dv IFT_ISO88026 ISO 8802-6 DQDB MAN .It Dv IFT_FDDI FDDI .It Dv IFT_PPP Internet Point-to-Point Protocol .Pq Xr ppp 8 .It Dv IFT_LOOP The loopback .Pq Xr lo 4 interface .It Dv IFT_SLIP Serial Line IP .It Dv IFT_PARA Parallel-port IP .Pq Dq PLIP .It Dv IFT_ATM Asynchronous Transfer Mode .It Dv IFT_USB USB Interface .El .Ss Interface Link States The following link states are currently defined: .Pp .Bl -tag -offset indent -width ".Dv LINK_STATE_UNKNOWN" -compact .It Dv LINK_STATE_UNKNOWN The link is in an invalid or unknown state. .It Dv LINK_STATE_DOWN The link is down. .It Dv LINK_STATE_UP The link is up. .El .Ss The ifaddr Structure Every interface is associated with a list (or, rather, a .Li TAILQ ) of addresses, rooted at the interface structure's .Va if_addrhead member. The first element in this list is always an .Dv AF_LINK address representing the interface itself; multi-access network drivers should complete this structure by filling in their link-layer addresses after calling .Fn if_attach . Other members of the structure represent network-layer addresses which have been configured by means of the .Dv SIOCAIFADDR command to .Xr ioctl 2 , called on a socket of the appropriate protocol family. The elements of this list consist of .Vt ifaddr structures. Most protocols will declare their own protocol-specific interface address structures, but all begin with a .Vt "struct ifaddr" which provides the most-commonly-needed functionality across all protocols. Interface addresses are reference-counted. .Pp The members of .Vt "struct ifaddr" are as follows: .Bl -tag -width ".Va ifa_rtrequest" -offset indent .It Va ifa_addr .Pq Vt "struct sockaddr *" The local address of the interface. .It Va ifa_dstaddr .Pq Vt "struct sockaddr *" The remote address of point-to-point interfaces, and the broadcast address of broadcast interfaces. .Va ( ifa_broadaddr is a macro for .Va ifa_dstaddr . ) .It Va ifa_netmask .Pq Vt "struct sockaddr *" The network mask for multi-access interfaces, and the confusion generator for point-to-point interfaces. .It Va ifa_ifp .Pq Vt "if_t" A link back to the interface structure. .It Va ifa_link .Pq Fn TAILQ_ENTRY ifaddr .Xr queue 3 glue for list of addresses on each interface. .It Va ifa_rtrequest See below. .It Va ifa_flags .Pq Vt u_short Some of the flags which would be used for a route representing this address in the route table. .It Va ifa_refcnt .Pq Vt short The reference count. .El .Pp References to .Vt ifaddr structures are gained by calling the .Fn ifa_ref function and released by calling the .Fn ifa_free function. .Pp .Fn ifa_rtrequest is a pointer to a function which receives callouts from the routing code .Pq Fn rtrequest to perform link-layer-specific actions upon requests to add, or delete routes. The .Fa cmd argument indicates the request in question: .Dv RTM_ADD , or .Dv RTM_DELETE . The .Fa rt argument is the route in question; the .Fa info argument contains the specific destination being manipulated. .Sh FUNCTIONS The functions provided by the generic interface code can be divided into two groups: those which manipulate interfaces, and those which manipulate interface addresses. In addition to these functions, there may also be link-layer support routines which are used by a number of drivers implementing a specific link layer over different hardware; see the documentation for that link layer for more details. .Ss The ifmultiaddr Structure Every multicast-capable interface is associated with a list of multicast group memberships, which indicate at a low level which link-layer multicast addresses (if any) should be accepted, and at a high level, in which network-layer multicast groups a user process has expressed interest. .Pp The elements of the structure are as follows: .Bl -tag -width ".Va ifma_refcount" -offset indent .It Va ifma_link .Pq Fn LIST_ENTRY ifmultiaddr .Xr queue 3 macro glue. .It Va ifma_addr .Pq Vt "struct sockaddr *" A pointer to the address which this record represents. The memberships for various address families are stored in arbitrary order. .It Va ifma_lladdr .Pq Vt "struct sockaddr *" A pointer to the link-layer multicast address, if any, to which the network-layer multicast address in .Va ifma_addr is mapped, else a null pointer. If this element is non-nil, this membership also holds an invisible reference to another membership for that link-layer address. .It Va ifma_refcount .Pq Vt u_int A reference count of requests for this particular membership. .El .Ss Interface Manipulation Functions .Bl -ohang -offset indent .It Fn if_alloc Allocate and initialize .Vt "struct ifnet" . Initialization includes the allocation of an interface index and may include the allocation of a .Fa type specific structure in .Va if_l2com . .It Fn if_alloc_dev Allocate and initialize .Vt "struct ifnet" as .Fn if_alloc does, with the addition that the ifnet can be tagged with the appropriate NUMA domain derived from the .Fa dev argument passed by the caller. .It Fn if_alloc_domain Allocate and initialize .Vt "struct ifnet" as .Fn if_alloc does, with the addition that the ifnet will be tagged with the NUMA domain via the .Fa numa_domain argument passed by the caller. .It Fn if_attach Link the specified interface .Fa ifp into the list of network interfaces. Also initialize the list of addresses on that interface, and create a link-layer .Vt ifaddr structure to be the first element in that list. (A pointer to this address structure is saved in the .Vt ifnet structure.) The .Fa ifp must have been allocated by .Fn if_alloc , .Fn if_alloc_dev or .Fn if_alloc_domain . .It Fn if_detach Shut down and unlink the specified .Fa ifp from the interface list. .It Fn if_free Free the given .Fa ifp back to the system. The interface must have been previously detached if it was ever attached. .It Fn if_free_type Identical to .Fn if_free except that the given .Fa type is used to free .Va if_l2com instead of the type in .Va if_type . This is intended for use with drivers that change their interface type. .It Fn if_down Mark the interface .Fa ifp as down (i.e., .Dv IFF_UP is not set), flush its output queue, notify protocols of the transition, and generate a message from the .Xr route 4 routing socket. .It Fn if_up Mark the interface .Fa ifp as up, notify protocols of the transition, and generate a message from the .Xr route 4 routing socket. .It Fn ifpromisc Add or remove a promiscuous reference to .Fa ifp . If .Fa pswitch is true, add a reference; if it is false, remove a reference. On reference count transitions from zero to one and one to zero, set the .Dv IFF_PROMISC flag appropriately and call .Fn if_ioctl to set up the interface in the desired mode. .It Fn if_allmulti As .Fn ifpromisc , but for the all-multicasts .Pq Dv IFF_ALLMULTI flag instead of the promiscuous flag. .It Fn ifunit Return an .Vt ifnet pointer for the interface named .Fa name . .It Fn ifunit_ref Return a reference-counted (via .Fn ifa_ref ) .Vt ifnet pointer for the interface named .Fa name . This is the preferred function over .Fn ifunit . The caller is responsible for releasing the reference with .Fn if_rele when it is finished with the ifnet. .It Fn ifioctl Process the ioctl request .Fa cmd , issued on socket .Fa so by thread .Fa td , with data parameter .Fa data . This is the main routine for handling all interface configuration requests from user mode. It is ordinarily only called from the socket-layer .Xr ioctl 2 handler, and only for commands with class .Sq Li i . Any unrecognized commands will be passed down to socket .Fa so Ns 's protocol for further interpretation. The following commands are handled by .Fn ifioctl : .Pp .Bl -tag -width ".Dv SIOCGIFNETMASK" -offset indent -compact .It Dv SIOCGIFCONF Get interface configuration. (No call-down to driver.) .Pp .It Dv SIOCSIFNAME Set the interface name. .Dv RTM_IFANNOUNCE departure and arrival messages are sent so that routing code that relies on the interface name will update its interface list. Caller must have appropriate privilege. (No call-down to driver.) .It Dv SIOCGIFCAP .It Dv SIOCGIFDATA .It Dv SIOCGIFFIB .It Dv SIOCGIFFLAGS .It Dv SIOCGIFMETRIC .It Dv SIOCGIFMTU .It Dv SIOCGIFPHYS Get interface capabilities, data, FIB, flags, metric, MTU, medium selection. (No call-down to driver.) .Pp .It Dv SIOCSIFCAP Enable or disable interface capabilities. Caller must have appropriate privilege. Before a call to the driver-specific .Fn if_ioctl routine, the requested mask for enabled capabilities is checked against the mask of capabilities supported by the interface, .Va if_capabilities . Requesting to enable an unsupported capability is invalid. The rest is supposed to be done by the driver, which includes updating .Va if_capenable and .Va if_data.ifi_hwassist appropriately. .Pp .It Dv SIOCGIFCAPNV .Xr nv 9 version of the .Dv SIOCGIFCAP ioctl. Caller must provide a pointer to .Vt struct ifreq_cap_nv as .Fa data , where the member .Dv buffer points to some buffer containing .Dv buf_length bytes. The serialized nvlist with description of the device capabilities is written to the buffer. If buffer is too short, the structure is updated with .Dv buffer member set to .Dv NULL , .Dv length set to the minimal required length, and error .Er EFBIG is returned. .Pp Elements of the returned nvlist for simple capabilities are boolean, identified by names. Presence of the boolean element means that corresponding capability is supported by the interface. Element's value describes the current configured state: .Dv true means that the capability is enabled, and .Dv false that it is disabled. .Pp Driver indicates support for both .Dv SIOCGIFCAPNV and .Dv SIOCSIFCAPNV requests by setting .Dv IFCAP_NV non-modifiable capability bit in .Dv if_capabilities . .Pp .It Dv SIOCSIFCAPNV .Xr nv 9 version of the .Dv SIOCSIFCAP ioctl. Caller must provide the pointer to .Vt struct ifreq_cap_nv as .Fa data , where the member .Dv buffer points to serialized nvlist of .Dv length bytes. Each element of nvlist describes a requested update of one capability, identified by the element name. For simple capabilities, the element must be boolean. Its .Dv true value means that the caller asks to enable the capability, and .Dv false value to disable. Only capabilities listed in the nvlist are affected by the call. .Pp .It Dv SIOCSIFFIB Sets interface FIB. Caller must have appropriate privilege. FIB values start at 0 and values greater or equals than .Va net.fibs are considered invalid. .It Dv SIOCSIFFLAGS Change interface flags. Caller must have appropriate privilege. If a change to the .Dv IFF_UP flag is requested, .Fn if_up or .Fn if_down is called as appropriate. Flags listed in .Dv IFF_CANTCHANGE are masked off, and the field .Va if_flags in the interface structure is updated. Finally, the driver .Fn if_ioctl routine is called to perform any setup requested. .Pp .It Dv SIOCSIFMETRIC .It Dv SIOCSIFPHYS Change interface metric or medium. Caller must have appropriate privilege. .Pp .It Dv SIOCSIFMTU Change interface MTU. Caller must have appropriate privilege. MTU values less than 72 or greater than 65535 are considered invalid. The driver .Fn if_ioctl routine is called to implement the change; it is responsible for any additional sanity checking and for actually modifying the MTU in the interface structure. .Pp .It Dv SIOCADDMULTI .It Dv SIOCDELMULTI Add or delete permanent multicast group memberships on the interface. Caller must have appropriate privilege. The .Fn if_addmulti or .Fn if_delmulti function is called to perform the operation; qq.v. .Pp .It Dv SIOCAIFADDR .It Dv SIOCDIFADDR The socket's protocol control routine is called to implement the requested action. .El .El .Ss "Interface Address Functions" Several functions exist to look up an interface address structure given an address. .Fn ifa_ifwithaddr returns an interface address with either a local address or a broadcast address precisely matching the parameter .Fa addr . .Fn ifa_ifwithdstaddr returns an interface address for a point-to-point interface whose remote .Pq Dq destination address is .Fa addr and a fib is .Fa fib . If .Fa fib is .Dv RT_ALL_FIBS , then the first interface address matching .Fa addr will be returned. .Pp .Fn ifa_ifwithnet returns the most specific interface address which matches the specified address, .Fa addr , subject to its configured netmask, or a point-to-point interface address whose remote address is .Fa addr if one is found. If .Fa ignore_ptp is true, skip point-to-point interface addresses. The .Fa fib parameter is handled the same way as by .Fn ifa_ifwithdstaddr . .Pp .Fn ifaof_ifpforaddr returns the most specific address configured on interface .Fa ifp which matches address .Fa addr , subject to its configured netmask. If the interface is point-to-point, only an interface address whose remote address is precisely .Fa addr will be returned. .Pp All of these functions return a null pointer if no such address can be found. .Ss "Interface Multicast Address Functions" The .Fn if_addmulti , .Fn if_delmulti , and .Fn if_findmulti functions provide support for requesting and relinquishing multicast group memberships, and for querying an interface's membership list, respectively. The .Fn if_addmulti function takes a pointer to an interface, .Fa ifp , and a generic address, .Fa sa . It also takes a pointer to a .Vt "struct ifmultiaddr *" which is filled in on successful return with the address of the group membership control block. The .Fn if_addmulti function performs the following four-step process: .Bl -enum -offset indent .It Call the interface's .Fn if_resolvemulti entry point to determine the link-layer address, if any, corresponding to this membership request, and also to give the link layer an opportunity to veto this membership request should it so desire. .It Check the interface's group membership list for a pre-existing membership for this group. If one is not found, allocate a new one; if one is, increment its reference count. .It If the .Fn if_resolvemulti routine returned a link-layer address corresponding to the group, repeat the previous step for that address as well. .It If the interface's multicast address filter needs to be changed because a new membership was added, call the interface's .Fn if_ioctl routine (with a .Fa cmd argument of .Dv SIOCADDMULTI ) to request that it do so. .El .Pp The .Fn if_delmulti function, given an interface .Fa ifp and an address, .Fa sa , reverses this process. Both functions return zero on success, or a standard error number on failure. .Pp The .Fn if_findmulti function examines the membership list of interface .Fa ifp for an address matching .Fa sa , and returns a pointer to that .Vt "struct ifmultiaddr" if one is found, else it returns a null pointer. .Sh SEE ALSO .Xr ioctl 2 , .Xr link_addr 3 , .Xr queue 3 , .Xr sysctl 3 , .Xr bpf 4 , .Xr ifmib 4 , .Xr lo 4 , .Xr netintro 4 , .Xr polling 4 , .Xr config 8 , .Xr ppp 8 , .Xr mbuf 9 , .Xr rtentry 9 .Rs .%A Gary R. Wright .%A W. Richard Stevens .%B TCP/IP Illustrated .%V Vol. 2 .%O Addison-Wesley, ISBN 0-201-63354-X .Re .Sh AUTHORS This manual page was written by .An Garrett A. Wollman . diff --git a/sys/net/if.c b/sys/net/if.c index 6a68d627c07f..b6a798aa0fab 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1,5248 +1,5242 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2010 Bjoern A. Zeeb * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_bpf.h" #include "opt_inet6.h" #include "opt_inet.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #ifdef INET6 #include #include #endif /* INET6 */ #endif /* INET || INET6 */ #include /* * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name * and ifr_ifru when it is used in SIOCGIFCONF. */ _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) == offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru"); __read_mostly epoch_t net_epoch_preempt; #ifdef COMPAT_FREEBSD32 #include #include struct ifreq_buffer32 { uint32_t length; /* (size_t) */ uint32_t buffer; /* (void *) */ }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq32 { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer32 ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; uint32_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; }; CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32)); CTASSERT(__offsetof(struct ifreq, ifr_ifru) == __offsetof(struct ifreq32, ifr_ifru)); struct ifconf32 { int32_t ifc_len; union { uint32_t ifcu_buf; uint32_t ifcu_req; } ifc_ifcu; }; #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) struct ifdrv32 { char ifd_name[IFNAMSIZ]; uint32_t ifd_cmd; uint32_t ifd_len; uint32_t ifd_data; }; #define SIOCSDRVSPEC32 _IOC_NEWTYPE(SIOCSDRVSPEC, struct ifdrv32) #define SIOCGDRVSPEC32 _IOC_NEWTYPE(SIOCGDRVSPEC, struct ifdrv32) struct ifgroupreq32 { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; uint32_t ifgru_groups; } ifgr_ifgru; }; #define SIOCAIFGROUP32 _IOC_NEWTYPE(SIOCAIFGROUP, struct ifgroupreq32) #define SIOCGIFGROUP32 _IOC_NEWTYPE(SIOCGIFGROUP, struct ifgroupreq32) #define SIOCDIFGROUP32 _IOC_NEWTYPE(SIOCDIFGROUP, struct ifgroupreq32) #define SIOCGIFGMEMB32 _IOC_NEWTYPE(SIOCGIFGMEMB, struct ifgroupreq32) struct ifmediareq32 { char ifm_name[IFNAMSIZ]; int ifm_current; int ifm_mask; int ifm_status; int ifm_active; int ifm_count; uint32_t ifm_ulist; /* (int *) */ }; #define SIOCGIFMEDIA32 _IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32) #define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32) #endif /* COMPAT_FREEBSD32 */ union ifreq_union { struct ifreq ifr; #ifdef COMPAT_FREEBSD32 struct ifreq32 ifr32; #endif }; SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Generic link-management"); SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, &ifqmaxlen, 0, "max send queue size"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); /* Log promiscuous mode change events */ static int log_promisc_mode_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, &log_promisc_mode_change, 1, "log promiscuous mode change events"); /* Interface description */ static unsigned int ifdescr_maxlen = 1024; SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); void (*carp_demote_adj_p)(int, char *); int (*carp_master_p)(struct ifaddr *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); void (*carp_detach_p)(struct ifaddr *, bool); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr); #endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; /* * XXX: Style; these should be sorted alphabetically, and unprototyped * static functions should be prototyped. Currently they are sorted by * declaration order. */ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit_default(struct ifnet *ifp, struct mbuf *m); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, bool); static void if_detach_internal(struct ifnet *, bool); static void if_siocaddmulti(void *, int); static void if_link_ifnet(struct ifnet *); static bool if_unlink_ifnet(struct ifnet *, bool); #ifdef VIMAGE static void if_vmove(struct ifnet *, struct vnet *); #endif #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif /* ipsec helper hooks */ VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); /* Table of ifnet by index. */ static int if_index; static int if_indexlim = 8; static struct ifindex_entry { struct ifnet *ife_ifnet; uint16_t ife_gencnt; } *ifindex_table; SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Variables global to all interfaces"); static int sysctl_ifcount(SYSCTL_HANDLER_ARGS) { int rv = 0; IFNET_RLOCK(); for (int i = 1; i <= if_index; i++) if (ifindex_table[i].ife_ifnet != NULL && ifindex_table[i].ife_ifnet->if_vnet == curvnet) rv = i; IFNET_RUNLOCK(); return (sysctl_handle_int(oidp, &rv, 0, req)); } SYSCTL_PROC(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RD, NULL, 0, sysctl_ifcount, "I", "Maximum known interface index"); /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock. * This may be acquired to stabilise the list, or we may rely on NET_EPOCH. */ struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); struct sx ifnet_detach_sxlock; SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx", SX_RECURSE); #ifdef VIMAGE #define VNET_IS_SHUTTING_DOWN(_vnet) \ ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE) #endif static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); struct ifnet * ifnet_byindex(u_int idx) { struct ifnet *ifp; NET_EPOCH_ASSERT(); if (__predict_false(idx > if_index)) return (NULL); ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet); if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet) ifp = NULL; return (ifp); } struct ifnet * ifnet_byindex_ref(u_int idx) { struct ifnet *ifp; ifp = ifnet_byindex(idx); if (ifp == NULL || (ifp->if_flags & IFF_DYING)) return (NULL); if (!if_try_ref(ifp)) return (NULL); return (ifp); } struct ifnet * ifnet_byindexgen(uint16_t idx, uint16_t gen) { struct ifnet *ifp; NET_EPOCH_ASSERT(); if (__predict_false(idx > if_index)) return (NULL); ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet); if (ifindex_table[idx].ife_gencnt == gen) return (ifp); else return (NULL); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ static void if_init_idxtable(void *arg __unused) { ifindex_table = malloc(if_indexlim * sizeof(*ifindex_table), M_IFNET, M_WAITOK | M_ZERO); } SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init_idxtable, NULL); static void vnet_if_init(const void *unused __unused) { CK_STAILQ_INIT(&V_ifnet); CK_STAILQ_INIT(&V_ifg_head); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); static void if_link_ifnet(struct ifnet *ifp) { IFNET_WLOCK(); CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); #ifdef VIMAGE curvnet->vnet_ifcnt++; #endif IFNET_WUNLOCK(); } static bool if_unlink_ifnet(struct ifnet *ifp, bool vmove) { struct ifnet *iter; int found = 0; IFNET_WLOCK(); CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); if (!vmove) ifp->if_flags |= IFF_DYING; found = 1; break; } #ifdef VIMAGE curvnet->vnet_ifcnt--; #endif IFNET_WUNLOCK(); return (found); } #ifdef VIMAGE static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; struct ifnet **pending; int found __diagused; int i; i = 0; /* * We need to protect our access to the V_ifnet tailq. Ordinarily we'd * enter NET_EPOCH, but that's not possible, because if_vmove() calls * if_detach_internal(), which waits for NET_EPOCH callbacks to * complete. We can't do that from within NET_EPOCH. * * However, we can also use the IFNET_xLOCK, which is the V_ifnet * read/write lock. We cannot hold the lock as we call if_vmove() * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib * ctx lock. */ IFNET_WLOCK(); pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt, M_IFNET, M_WAITOK | M_ZERO); /* Return all inherited interfaces to their parent vnets. */ CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { if (ifp->if_home_vnet != ifp->if_vnet) { found = if_unlink_ifnet(ifp, true); MPASS(found); pending[i++] = ifp; } } IFNET_WUNLOCK(); for (int j = 0; j < i; j++) { sx_xlock(&ifnet_detach_sxlock); if_vmove(pending[j], pending[j]->if_home_vnet); sx_xunlock(&ifnet_detach_sxlock); } free(pending, M_IFNET); } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); #endif /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. */ static struct ifnet * if_alloc_domain(u_char type, int numa_domain) { struct ifnet *ifp; u_short idx; KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large")); if (numa_domain == IF_NODOM) ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK | M_ZERO); else ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET, DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO); ifp->if_type = type; ifp->if_alloctype = type; ifp->if_numa_domain = numa_domain; #ifdef VIMAGE ifp->if_vnet = curvnet; #endif if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); KASSERT(ifp->if_l2com, ("%s: if_com_alloc[%u] failed", __func__, type)); } IF_ADDR_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp); ifp->if_afdata_initialized = 0; IF_AFDATA_LOCK_INIT(ifp); CK_STAILQ_INIT(&ifp->if_addrhead); CK_STAILQ_INIT(&ifp->if_multiaddrs); CK_STAILQ_INIT(&ifp->if_groups); #ifdef MAC mac_ifnet_init(ifp); #endif ifq_init(&ifp->if_snd, ifp); refcount_init(&ifp->if_refcount, 1); /* Index reference. */ for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); ifp->if_get_counter = if_get_counter_default; ifp->if_pcp = IFNET_PCP_NONE; /* Allocate an ifindex array entry. */ IFNET_WLOCK(); /* * Try to find an empty slot below if_index. If we fail, take the * next slot. */ for (idx = 1; idx <= if_index; idx++) { if (ifindex_table[idx].ife_ifnet == NULL) break; } /* Catch if_index overflow. */ if (idx >= if_indexlim) { struct ifindex_entry *new, *old; int newlim; newlim = if_indexlim * 2; new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO); memcpy(new, ifindex_table, if_indexlim * sizeof(*new)); old = ifindex_table; ck_pr_store_ptr(&ifindex_table, new); if_indexlim = newlim; NET_EPOCH_WAIT(); free(old, M_IFNET); } if (idx > if_index) if_index = idx; ifp->if_index = idx; ifp->if_idxgen = ifindex_table[idx].ife_gencnt; ck_pr_store_ptr(&ifindex_table[idx].ife_ifnet, ifp); IFNET_WUNLOCK(); return (ifp); } struct ifnet * if_alloc_dev(u_char type, device_t dev) { int numa_domain; if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0) return (if_alloc_domain(type, IF_NODOM)); return (if_alloc_domain(type, numa_domain)); } struct ifnet * if_alloc(u_char type) { return (if_alloc_domain(type, IF_NODOM)); } /* * Do the actual work of freeing a struct ifnet, and layer 2 common * structure. This call is made when the network epoch guarantees * us that nobody holds a pointer to the interface. */ static void if_free_deferred(epoch_context_t ctx) { struct ifnet *ifp = __containerof(ctx, struct ifnet, if_epoch_ctx); KASSERT((ifp->if_flags & IFF_DYING), ("%s: interface not dying", __func__)); if (if_com_free[ifp->if_alloctype] != NULL) if_com_free[ifp->if_alloctype](ifp->if_l2com, ifp->if_alloctype); #ifdef MAC mac_ifnet_destroy(ifp); #endif /* MAC */ IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); ifq_delete(&ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); if_freedescr(ifp->if_description); free(ifp->if_hw_addr, M_IFADDR); free(ifp, M_IFNET); } /* * Deregister an interface and free the associated storage. */ void if_free(struct ifnet *ifp) { ifp->if_flags |= IFF_DYING; /* XXX: Locking */ /* * XXXGL: An interface index is really an alias to ifp pointer. * Why would we clear the alias now, and not in the deferred * context? Indeed there is nothing wrong with some network * thread obtaining ifp via ifnet_byindex() inside the network * epoch and then dereferencing ifp while we perform if_free(), * and after if_free() finished, too. * * This early index freeing was important back when ifindex was * virtualized and interface would outlive the vnet. */ IFNET_WLOCK(); MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); ck_pr_store_ptr(&ifindex_table[ifp->if_index].ife_ifnet, NULL); ifindex_table[ifp->if_index].ife_gencnt++; while (if_index > 0 && ifindex_table[if_index].ife_ifnet == NULL) if_index--; IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx); } /* * Interfaces to keep an ifnet type-stable despite the possibility of the * driver calling if_free(). If there are additional references, we defer * freeing the underlying data structure. */ void if_ref(struct ifnet *ifp) { u_int old __diagused; /* We don't assert the ifnet list lock here, but arguably should. */ old = refcount_acquire(&ifp->if_refcount); KASSERT(old > 0, ("%s: ifp %p has 0 refs", __func__, ifp)); } bool if_try_ref(struct ifnet *ifp) { NET_EPOCH_ASSERT(); return (refcount_acquire_if_not_zero(&ifp->if_refcount)); } void if_rele(struct ifnet *ifp) { if (!refcount_release(&ifp->if_refcount)) return; NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx); } void ifq_init(struct ifaltq *ifq, struct ifnet *ifp) { mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); if (ifq->ifq_maxlen == 0) ifq->ifq_maxlen = ifqmaxlen; ifq->altq_type = 0; ifq->altq_disc = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; ifq->altq_tbr = NULL; ifq->altq_ifp = ifp; } void ifq_delete(struct ifaltq *ifq) { mtx_destroy(&ifq->ifq_mtx); } /* * Perform generic interface initialization tasks and attach the interface * to the list of "active" interfaces. If vmove flag is set on entry * to if_attach_internal(), perform only a limited subset of initialization * tasks, given that we are moving from one vnet to another an ifnet which * has already been fully initialized. * * Note that if_detach_internal() removes group membership unconditionally * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. * Thus, when if_vmove() is applied to a cloned interface, group membership * is lost while a cloned one always joins a group whose name is * ifc->ifc_name. To recover this after if_detach_internal() and * if_attach_internal(), the cloner should be specified to * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() * attempts to join a group whose name is ifc->ifc_name. * * XXX: * - The decision to return void and thus require this function to * succeed is questionable. * - We should probably do more sanity checking. For instance we don't * do anything to insure if_xname is unique or non-empty. */ void if_attach(struct ifnet *ifp) { if_attach_internal(ifp, false); } /* * Compute the least common TSO limit. */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) { /* * 1) If there is no limit currently, take the limit from * the network adapter. * * 2) If the network adapter has a limit below the current * limit, apply it. */ if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && ifp->if_hw_tsomax < pmax->tsomaxbytes)) { pmax->tsomaxbytes = ifp->if_hw_tsomax; } if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; } if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } /* * Update TSO limit of a network adapter. * * Returns zero if no change. Else non-zero. */ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) { int retval = 0; if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { ifp->if_hw_tsomax = pmax->tsomaxbytes; retval++; } if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; retval++; } if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; retval++; } return (retval); } static void if_attach_internal(struct ifnet *ifp, bool vmove) { unsigned socksize, ifasize; int namelen, masklen; struct sockaddr_dl *sdl; struct ifaddr *ifa; MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); #ifdef VIMAGE CURVNET_ASSERT_SET(); ifp->if_vnet = curvnet; if (ifp->if_home_vnet == NULL) ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); #ifdef VIMAGE /* Restore group membership for cloned interface. */ if (vmove) if_clone_restoregroup(ifp); #endif getmicrotime(&ifp->if_lastchange); ifp->if_epoch = time_uptime; KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || (ifp->if_transmit != NULL && ifp->if_qflush != NULL), ("transmit and qflush must both either be set or both be NULL")); if (ifp->if_transmit == NULL) { ifp->if_transmit = if_transmit_default; ifp->if_qflush = if_qflush; } if (ifp->if_input == NULL) ifp->if_input = if_input_default; if (ifp->if_requestencap == NULL) ifp->if_requestencap = if_requestencap_default; if (!vmove) { #ifdef MAC mac_ifnet_create(ifp); #endif /* * Create a Link Level name for this device. */ namelen = strlen(ifp->if_xname); /* * Always save enough space for any possible name so we * can do a rename in place later. */ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifp->if_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = ifa_alloc(ifasize, M_WAITOK); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifp->if_addr = ifa; ifa->ifa_ifp = ifp; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; if (ifp->if_type == IFT_ETHER) { ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR, M_WAITOK | M_ZERO); } #if defined(INET) || defined(INET6) /* Use defaults for TSO, if nothing is set */ if (ifp->if_hw_tsomax == 0 && ifp->if_hw_tsomaxsegcount == 0 && ifp->if_hw_tsomaxsegsize == 0) { /* * The TSO defaults needs to be such that an * NFS mbuf list of 35 mbufs totalling just * below 64K works and that a chain of mbufs * can be defragged into at most 32 segments: */ ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); ifp->if_hw_tsomaxsegcount = 35; ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ /* XXX some drivers set IFCAP_TSO after ethernet attach */ if (ifp->if_capabilities & IFCAP_TSO) { if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } } #endif } if (domain_init_status >= 2) if_attachdomain1(ifp); if_link_ifnet(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); } static void if_epochalloc(void *dummy __unused) { net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT); } SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL); static void if_attachdomain(void *dummy) { struct ifnet *ifp; CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) if_attachdomain1(ifp); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ IF_AFDATA_LOCK(ifp); if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); log(LOG_WARNING, "%s called more than once on %s\n", __func__, ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); SLIST_FOREACH(dp, &domains, dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } } /* * Remove any unicast or broadcast network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa; #ifdef INET6 /* * Need to leave multicast addresses of proxy NDP llentries * before in6_purgeifaddr() because the llentries are keys * for in6_multi objects of proxy NDP entries. * in6_purgeifaddr()s clean up llentries including proxy NDPs * then we would lose the keys if they are called earlier. */ in6_purge_proxy_ndp(ifp); #endif while (1) { struct epoch_tracker et; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) break; } NET_EPOCH_EXIT(et); if (ifa == NULL) break; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { struct ifreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifr_addr = *ifa->ifa_addr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeifaddr((struct in6_ifaddr *)ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } } /* * Remove any multicast network addresses from an interface when an ifnet * is going away. */ static void if_purgemaddrs(struct ifnet *ifp) { struct ifmultiaddr *ifma; IF_ADDR_WLOCK(ifp); while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) { ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs); CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); if_delmulti_locked(ifp, ifma, 1); } IF_ADDR_WUNLOCK(ifp); } /* * Detach an interface, removing it from the list of "active" interfaces. * If vmove flag is set on entry to if_detach_internal(), perform only a * limited subset of cleanup tasks, given that we are moving an ifnet from * one vnet to another, where it must be fully operational. * * XXXRW: There are some significant questions about event ordering, and * how to prevent things from starting to use the interface during detach. */ void if_detach(struct ifnet *ifp) { bool found; CURVNET_SET_QUIET(ifp->if_vnet); found = if_unlink_ifnet(ifp, false); if (found) { sx_xlock(&ifnet_detach_sxlock); if_detach_internal(ifp, false); sx_xunlock(&ifnet_detach_sxlock); } CURVNET_RESTORE(); } /* * The vmove flag, if set, indicates that we are called from a callpath * that is moving an interface to a different vnet instance. * * The shutdown flag, if set, indicates that we are called in the * process of shutting down a vnet instance. Currently only the * vnet_if_return SYSUNINIT function sets it. Note: we can be called * on a vnet instance shutdown without this flag being set, e.g., when * the cloned interfaces are destoyed as first thing of teardown. */ static void if_detach_internal(struct ifnet *ifp, bool vmove) { struct ifaddr *ifa; int i; struct domain *dp; void *if_afdata[AF_MAX]; #ifdef VIMAGE bool shutdown; shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); #endif sx_assert(&ifnet_detach_sxlock, SX_XLOCKED); /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ NET_EPOCH_WAIT(); /* * Ensure all pending EPOCH(9) callbacks have been executed. This * fixes issues about late destruction of multicast options * which lead to leave group calls, which in turn access the * belonging ifnet structure: */ NET_EPOCH_DRAIN_CALLBACKS(); /* * In any case (destroy or vmove) detach us from the groups * and remove/wait for pending events on the taskq. * XXX-BZ in theory an interface could still enqueue a taskq change? */ if_delgroups(ifp); taskqueue_drain(taskqueue_swi, &ifp->if_linktask); taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask); if_down(ifp); #ifdef VIMAGE /* * On VNET shutdown abort here as the stack teardown will do all * the work top-down for us. */ if (shutdown) { /* Give interface users the chance to clean up. */ EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); /* * In case of a vmove we are done here without error. * If we would signal an error it would lead to the same * abort as if we did not find the ifnet anymore. * if_detach() calls us in void context and does not care * about an early abort notification, so life is splendid :) */ goto finish_vnet_shutdown; } #endif /* * At this point we are not tearing down a VNET and are either * going to destroy or vmove the interface and have to cleanup * accordingly. */ /* * Remove routes and flush queues. */ #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET in_ifdetach(ifp); #endif #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif if_purgemaddrs(ifp); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if (!vmove) { /* * Prevent further calls into the device driver via ifnet. */ if_dead(ifp); /* * Clean up all addresses. */ IF_ADDR_WLOCK(ifp); if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) { ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } else IF_ADDR_WUNLOCK(ifp); } rt_flushifroutes(ifp); #ifdef VIMAGE finish_vnet_shutdown: #endif /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the * theoretical race with re-attaching. */ IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; if (i != 0) { /* * Defer the dom_ifdetach call. */ _Static_assert(sizeof(if_afdata) == sizeof(ifp->if_afdata), "array size mismatch"); memcpy(if_afdata, ifp->if_afdata, sizeof(if_afdata)); memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata)); } IF_AFDATA_UNLOCK(ifp); if (i == 0) return; /* * XXXZL: This net epoch wait is not necessary if we have done right. * But if we do not, at least we can make a guarantee that threads those * enter net epoch will see NULL address family dependent data, * e.g. if_afdata[AF_INET6]. A clear NULL pointer derefence is much * better than writing to freed memory. */ NET_EPOCH_WAIT(); SLIST_FOREACH(dp, &domains, dom_next) { if (dp->dom_ifdetach != NULL && if_afdata[dp->dom_family] != NULL) (*dp->dom_ifdetach)(ifp, if_afdata[dp->dom_family]); } } #ifdef VIMAGE /* * if_vmove() performs a limited version of if_detach() in current * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. */ static void if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { #ifdef DEV_BPF /* * Detach BPF file descriptors from its interface. */ bpf_ifdetach(ifp); #endif /* * Detach from current vnet, but preserve LLADDR info, do not * mark as dead etc. so that the ifnet can be reattached later. */ if_detach_internal(ifp, true); /* * Perform interface-specific reassignment tasks, if provided by * the driver. */ if (ifp->if_reassign != NULL) ifp->if_reassign(ifp, new_vnet, NULL); /* * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); if_attach_internal(ifp, true); CURVNET_RESTORE(); } /* * Move an ifnet to or from another child prison/vnet, specified by the jail id. */ static int if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; bool found; bool shutdown; MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Do not try to move the iface from and to the same prison. */ if (pr->pr_vnet == ifp->if_vnet) { prison_free(pr); return (EEXIST); } /* Make sure the named iface does not exists in the dst. prison/vnet. */ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); CURVNET_RESTORE(); if (difp != NULL) { prison_free(pr); return (EEXIST); } sx_xlock(&ifnet_detach_sxlock); /* Make sure the VNET is stable. */ shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); if (shutdown) { sx_xunlock(&ifnet_detach_sxlock); prison_free(pr); return (EBUSY); } found = if_unlink_ifnet(ifp, true); if (! found) { sx_xunlock(&ifnet_detach_sxlock); prison_free(pr); return (ENODEV); } /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); sx_xunlock(&ifnet_detach_sxlock); prison_free(pr); return (0); } static int if_vmove_reclaim(struct thread *td, char *ifname, int jid) { struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; int found __diagused; bool shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Make sure the named iface exists in the source prison/vnet. */ CURVNET_SET(pr->pr_vnet); ifp = ifunit(ifname); /* XXX Lock to avoid races. */ if (ifp == NULL) { CURVNET_RESTORE(); prison_free(pr); return (ENXIO); } /* Do not try to move the iface from and to the same prison. */ vnet_dst = TD_TO_VNET(td); if (vnet_dst == ifp->if_vnet) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } /* Get interface back from child jail/vnet. */ found = if_unlink_ifnet(ifp, true); MPASS(found); sx_xlock(&ifnet_detach_sxlock); if_vmove(ifp, vnet_dst); sx_xunlock(&ifnet_detach_sxlock); CURVNET_RESTORE(); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } #endif /* VIMAGE */ /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') return (EINVAL); IFNET_WLOCK(); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { IFNET_WUNLOCK(); return (EEXIST); } if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) { IFNET_WUNLOCK(); return (ENOMEM); } if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); free(ifgm, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; CK_STAILQ_INIT(&ifg->ifg_members); CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); new = 1; } ifg->ifg_refcnt++; ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; IF_ADDR_WLOCK(ifp); CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); IFNET_WUNLOCK(); if (new) EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Helper function to remove a group out of an interface. Expects the global * ifnet lock to be write-locked, and drops it before returning. */ static void _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl, const char *groupname) { struct ifg_member *ifgm; bool freeifgl; IFNET_WLOCK_ASSERT(); IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next); IF_ADDR_WUNLOCK(ifp); CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { if (ifgm->ifgm_ifp == ifp) { CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member, ifgm_next); break; } } if (--ifgl->ifgl_group->ifg_refcnt == 0) { CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next); freeifgl = true; } else { freeifgl = false; } IFNET_WUNLOCK(); NET_EPOCH_WAIT(); EVENTHANDLER_INVOKE(group_change_event, groupname); if (freeifgl) { EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } free(ifgm, M_TEMP); free(ifgl, M_TEMP); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; IFNET_WLOCK(); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) break; if (ifgl == NULL) { IFNET_WUNLOCK(); return (ENOENT); } _if_delgroup_locked(ifp, ifgl, groupname); return (0); } /* * Remove an interface from all groups */ static void if_delgroups(struct ifnet *ifp) { struct ifg_list *ifgl; char groupname[IFNAMSIZ]; IFNET_WLOCK(); while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) { strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); _if_delgroup_locked(ifp, ifgl, groupname); IFNET_WLOCK(); } IFNET_WUNLOCK(); } /* * Stores all groups from an interface in memory pointed to by ifgr. */ static int if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; NET_EPOCH_ASSERT(); if (ifgr->ifgr_len == 0) { CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; /* XXX: wire */ CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) return (EINVAL); bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) return (error); len -= sizeof(ifgrq); ifgp++; } return (0); } /* * Stores all members of a group in memory pointed to by igfr */ static int if_getgroupmembers(struct ifgroupreq *ifgr) { struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) break; if (ifg == NULL) { IFNET_RUNLOCK(); return (ENOENT); } if (ifgr->ifgr_len == 0) { CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); IFNET_RUNLOCK(); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) { IFNET_RUNLOCK(); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IFNET_RUNLOCK(); return (error); } len -= sizeof(ifgrq); ifgp++; } IFNET_RUNLOCK(); return (0); } /* * Return counter values from counter(9)s stored in ifnet. */ uint64_t if_get_counter_default(struct ifnet *ifp, ift_counter cnt) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); return (counter_u64_fetch(ifp->if_counters[cnt])); } /* * Increase an ifnet counter. Usually used for counters shared * between the stack and a driver, but function supports them all. */ void if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); counter_u64_add(ifp->if_counters[cnt], inc); } /* * Copy data from ifnet to userland API structure if_data. */ void if_data_copy(struct ifnet *ifp, struct if_data *ifd) { ifd->ifi_type = ifp->if_type; ifd->ifi_physical = 0; ifd->ifi_addrlen = ifp->if_addrlen; ifd->ifi_hdrlen = ifp->if_hdrlen; ifd->ifi_link_state = ifp->if_link_state; ifd->ifi_vhid = 0; ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_mtu = ifp->if_mtu; ifd->ifi_metric = ifp->if_metric; ifd->ifi_baudrate = ifp->if_baudrate; ifd->ifi_hwassist = ifp->if_hwassist; ifd->ifi_epoch = ifp->if_epoch; ifd->ifi_lastchange = ifp->if_lastchange; ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); } /* * Initialization, destruction and refcounting functions for ifaddrs. */ struct ifaddr * ifa_alloc(size_t size, int flags) { struct ifaddr *ifa; KASSERT(size >= sizeof(struct ifaddr), ("%s: invalid size %zu", __func__, size)); ifa = malloc(size, M_IFADDR, M_ZERO | flags); if (ifa == NULL) return (NULL); if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) goto fail; refcount_init(&ifa->ifa_refcnt, 1); return (ifa); fail: /* free(NULL) is okay */ counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); return (NULL); } void ifa_ref(struct ifaddr *ifa) { u_int old __diagused; old = refcount_acquire(&ifa->ifa_refcnt); KASSERT(old > 0, ("%s: ifa %p has 0 refs", __func__, ifa)); } int ifa_try_ref(struct ifaddr *ifa) { NET_EPOCH_ASSERT(); return (refcount_acquire_if_not_zero(&ifa->ifa_refcnt)); } static void ifa_destroy(epoch_context_t ctx) { struct ifaddr *ifa; ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx); counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } void ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx); } /* * XXX: Because sockaddr_dl has deeper structure than the sockaddr * structs used to represent other address families, it is necessary * to perform a different comparison. */ static bool sa_dl_equal(const struct sockaddr *a, const struct sockaddr *b) { const struct sockaddr_dl *sdl1 = (const struct sockaddr_dl *)a; const struct sockaddr_dl *sdl2 = (const struct sockaddr_dl *)b; return (sdl1->sdl_len == sdl2->sdl_len && bcmp(sdl1->sdl_data + sdl1->sdl_nlen, sdl2->sdl_data + sdl2->sdl_nlen, sdl1->sdl_alen) == 0); } /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) { goto done; } /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { goto done; } } } ifa = NULL; done: return (ifa); } int ifa_ifwithaddr_check(const struct sockaddr *addr) { struct epoch_tracker et; int rc; NET_EPOCH_ENTER(et); rc = (ifa_ifwithaddr(addr) != NULL); NET_EPOCH_EXIT(et); return (rc); } /* * Locate an interface based on the broadcast address. */ /* ARGSUSED */ struct ifaddr * ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { goto done; } } } ifa = NULL; done: return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { goto done; } } } ifa = NULL; done: return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; NET_EPOCH_ASSERT(); /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { ifp = ifnet_byindex( ((const struct sockaddr_dl *)addr)->sdl_index); return (ifp ? ifp->if_addr : NULL); } /* * Scan though each interface, looking for ones that have addresses * in this address family and the requested fib. */ CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { const char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { goto done; } } else { /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one), or if the virtual status * of new prefix is better than of the old one, * then remember the new one before continuing * to search for an even better one. */ if (ifa_maybe == NULL || ifa_preferred(ifa_maybe, ifa) || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) { ifa_maybe = ifa; } } } } ifa = ifa_maybe; ifa_maybe = NULL; done: return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: return (ifa); } /* * See whether new ifa is better than current one: * 1) A non-virtual one is preferred over virtual. * 2) A virtual in master state preferred over any other state. * * Used in several address selecting functions. */ int ifa_preferred(struct ifaddr *cur, struct ifaddr *next) { return (cur->ifa_carp && (!next->ifa_carp || ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); } struct sockaddr_dl * link_alloc_sdl(size_t size, int flags) { return (malloc(size, M_TEMP, flags)); } void link_free_sdl(struct sockaddr *sa) { free(sa, M_TEMP); } /* * Fills in given sdl with interface basic info. * Returns pointer to filled sdl. */ struct sockaddr_dl * link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)paddr; memset(sdl, 0, sizeof(struct sockaddr_dl)); sdl->sdl_len = sizeof(struct sockaddr_dl); sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = iftype; return (sdl); } void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); int (*vlan_pcp_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; /* XXXGL: reference ifp? */ taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp; int link_state; ifp = arg; link_state = ifp->if_link_state; CURVNET_SET(ifp->if_vnet); rt_ifmsg(ifp, 0); if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && ifp->if_l2com != NULL) (*ng_ether_link_state_p)(ifp, link_state); if (ifp->if_carp) (*carp_linkstate_p)(ifp); if (ifp->if_bridge) ifp->if_bridge_linkstate(ifp); if (ifp->if_lagg) (*lagg_linkstate_p)(ifp, link_state); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) if_printf(ifp, "link state changed to %s\n", (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); CURVNET_RESTORE(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); ifp->if_flags &= ~IFF_UP; getmicrotime(&ifp->if_lastchange); ifp->if_qflush(ifp); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp, IFF_UP); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { ifp->if_flags |= IFF_UP; getmicrotime(&ifp->if_lastchange); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp, IFF_UP); EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); } /* * Flush an interface queue. */ void if_qflush(struct ifnet *ifp) { struct mbuf *m, *n; struct ifaltq *ifq; ifq = &ifp->if_snd; IFQ_LOCK(ifq); #ifdef ALTQ if (ALTQ_IS_ENABLED(ifq)) ALTQ_PURGE(ifq); #endif n = ifq->ifq_head; while ((m = n) != NULL) { n = m->m_nextpkt; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; IFQ_UNLOCK(ifq); } /* * Map interface name to interface structure pointer, with or without * returning a reference. */ struct ifnet * ifunit_ref(const char *name) { struct epoch_tracker et; struct ifnet *ifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && !(ifp->if_flags & IFF_DYING)) break; } if (ifp != NULL) { if_ref(ifp); MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); } NET_EPOCH_EXIT(et); return (ifp); } struct ifnet * ifunit(const char *name) { struct epoch_tracker et; struct ifnet *ifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } NET_EPOCH_EXIT(et); return (ifp); } void * ifr_buffer_get_buffer(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer); } static void ifr_buffer_set_buffer_null(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL; } size_t ifr_buffer_get_length(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return (ifrup->ifr32.ifr_ifru.ifru_buffer.length); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.length); } static void ifr_buffer_set_length(void *data, size_t len) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.length = len; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.length = len; } void * ifr_data_get_ptr(void *ifrp) { union ifreq_union *ifrup; ifrup = ifrp; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_data); #endif return (ifrup->ifr.ifr_ifru.ifru_data); } struct ifcap_nv_bit_name { uint64_t cap_bit; const char *cap_name; }; #define CAPNV(x) {.cap_bit = IFCAP_##x, \ .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) } const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = { CAPNV(RXCSUM), CAPNV(TXCSUM), CAPNV(NETCONS), CAPNV(VLAN_MTU), CAPNV(VLAN_HWTAGGING), CAPNV(JUMBO_MTU), CAPNV(POLLING), CAPNV(VLAN_HWCSUM), CAPNV(TSO4), CAPNV(TSO6), CAPNV(LRO), CAPNV(WOL_UCAST), CAPNV(WOL_MCAST), CAPNV(WOL_MAGIC), CAPNV(TOE4), CAPNV(TOE6), CAPNV(VLAN_HWFILTER), CAPNV(VLAN_HWTSO), CAPNV(LINKSTATE), CAPNV(NETMAP), CAPNV(RXCSUM_IPV6), CAPNV(TXCSUM_IPV6), CAPNV(HWSTATS), CAPNV(TXRTLMT), CAPNV(HWRXTSTMP), CAPNV(MEXTPG), CAPNV(TXTLS4), CAPNV(TXTLS6), CAPNV(VXLAN_HWCSUM), CAPNV(VXLAN_HWTSO), CAPNV(TXTLS_RTLMT), {0, NULL} }; #define CAP2NV(x) {.cap_bit = IFCAP2_BIT(IFCAP2_##x), \ .cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) } const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = { CAP2NV(RXTLS4), CAP2NV(RXTLS6), CAP2NV(IPSEC_OFFLOAD), {0, NULL} }; #undef CAPNV #undef CAP2NV int if_capnv_to_capint(const nvlist_t *nv, int *old_cap, const struct ifcap_nv_bit_name *nn, bool all) { int i, res; res = 0; for (i = 0; nn[i].cap_name != NULL; i++) { if (nvlist_exists_bool(nv, nn[i].cap_name)) { if (all || nvlist_get_bool(nv, nn[i].cap_name)) res |= nn[i].cap_bit; } else { res |= *old_cap & nn[i].cap_bit; } } return (res); } void if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn, int ifr_cap, int ifr_req) { int i; for (i = 0; nn[i].cap_name != NULL; i++) { if ((nn[i].cap_bit & ifr_cap) != 0) { nvlist_add_bool(nv, nn[i].cap_name, (nn[i].cap_bit & ifr_req) != 0); } } } /* * Hardware specific interface ioctls. */ int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; int error = 0, do_ifup = 0; int new_flags, temp_flags; size_t descrlen, nvbuflen; char *descrbuf; char new_name[IFNAMSIZ]; void *buf; nvlist_t *nvcap; struct siocsifcapnv_driver_data drv_ioctl_data; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: temp_flags = ifp->if_flags | ifp->if_drv_flags; ifr->ifr_flags = temp_flags & 0xffff; ifr->ifr_flagshigh = temp_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; case SIOCGIFCAPNV: if ((ifp->if_capabilities & IFCAP_NV) == 0) { error = EINVAL; break; } buf = NULL; nvcap = nvlist_create(0); for (;;) { if_capint_to_capnv(nvcap, ifcap_nv_bit_names, ifp->if_capabilities, ifp->if_capenable); if_capint_to_capnv(nvcap, ifcap2_nv_bit_names, ifp->if_capabilities2, ifp->if_capenable2); error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV, __DECONST(caddr_t, nvcap)); if (error != 0) { if_printf(ifp, "SIOCGIFCAPNV driver mistake: nvlist error %d\n", error); break; } buf = nvlist_pack(nvcap, &nvbuflen); if (buf == NULL) { error = nvlist_error(nvcap); if (error == 0) error = EDOOFUS; break; } if (nvbuflen > ifr->ifr_cap_nv.buf_length) { ifr->ifr_cap_nv.length = nvbuflen; ifr->ifr_cap_nv.buffer = NULL; error = EFBIG; break; } ifr->ifr_cap_nv.length = nvbuflen; error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen); break; } free(buf, M_NVLIST); nvlist_destroy(nvcap); break; case SIOCGIFDATA: { struct if_data ifd; /* Ensure uninitialised padding is not leaked. */ memset(&ifd, 0, sizeof(ifd)); if_data_copy(ifp, &ifd); error = copyout(&ifd, ifr_data_get_ptr(ifr), sizeof(ifd)); break; } #ifdef MAC case SIOCGIFMAC: error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: /* XXXGL: did this ever worked? */ ifr->ifr_phys = 0; break; case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); if (ifp->if_description == NULL) error = ENOMSG; else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr_buffer_get_length(ifr) < descrlen) ifr_buffer_set_buffer_null(ifr); else error = copyout(ifp->if_description, ifr_buffer_get_buffer(ifr), descrlen); ifr_buffer_set_length(ifr, descrlen); } sx_sunlock(&ifdescr_sx); break; case SIOCSIFDESCR: error = priv_check(td, PRIV_NET_SETIFDESCR); if (error) return (error); /* * Copy only (length-1) bytes to make sure that * if_description is always nul terminated. The * length parameter is supposed to count the * terminating nul in. */ if (ifr_buffer_get_length(ifr) > ifdescr_maxlen) return (ENAMETOOLONG); else if (ifr_buffer_get_length(ifr) == 0) descrbuf = NULL; else { descrbuf = if_allocdescr(ifr_buffer_get_length(ifr), M_WAITOK); error = copyin(ifr_buffer_get_buffer(ifr), descrbuf, ifr_buffer_get_length(ifr) - 1); if (error) { if_freedescr(descrbuf); break; } } if_setdescr(ifp, descrbuf); getmicrotime(&ifp->if_lastchange); break; case SIOCGIFFIB: ifr->ifr_fib = ifp->if_fib; break; case SIOCSIFFIB: error = priv_check(td, PRIV_NET_SETIFFIB); if (error) return (error); if (ifr->ifr_fib >= rt_numfibs) return (EINVAL); ifp->if_fib = ifr->ifr_fib; break; case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) return (error); /* * Currently, no driver owned flags pass the IFF_CANTCHANGE * check, so we don't need special handling here yet. */ new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { if_down(ifp); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { do_ifup = 1; } /* * See if the promiscuous mode or allmulti bits are about to * flip. They require special handling because in-kernel * consumers may indepdently toggle them. */ if_setppromisc(ifp, new_flags & IFF_PPROMISC); if ((ifp->if_flags ^ new_flags) & IFF_PALLMULTI) { if (new_flags & IFF_PALLMULTI) ifp->if_flags |= IFF_ALLMULTI; else if (ifp->if_amcount == 0) ifp->if_flags &= ~IFF_ALLMULTI; } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (ifp->if_ioctl) { (void) (*ifp->if_ioctl)(ifp, cmd, data); } if (do_ifup) if_up(ifp); getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); if (error != 0) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAPNV: error = priv_check(td, PRIV_NET_SETIFCAP); if (error != 0) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if ((ifp->if_capabilities & IFCAP_NV) == 0) return (EINVAL); if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) return (EINVAL); nvcap = NULL; buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); for (;;) { error = copyin(ifr->ifr_cap_nv.buffer, buf, ifr->ifr_cap_nv.length); if (error != 0) break; nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0); if (nvcap == NULL) { error = EINVAL; break; } drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap, &ifp->if_capenable, ifcap_nv_bit_names, false); if ((drv_ioctl_data.reqcap & ~ifp->if_capabilities) != 0) { error = EINVAL; break; } drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap, &ifp->if_capenable2, ifcap2_nv_bit_names, false); if ((drv_ioctl_data.reqcap2 & ~ifp->if_capabilities2) != 0) { error = EINVAL; break; } drv_ioctl_data.nvcap = nvcap; error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV, (caddr_t)&drv_ioctl_data); break; } nvlist_destroy(nvcap); free(buf, M_TEMP); if (error == 0) getmicrotime(&ifp->if_lastchange); break; #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = priv_check(td, PRIV_NET_SETIFNAME); if (error) return (error); error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ, NULL); if (error != 0) return (error); error = if_rename(ifp, new_name); break; #ifdef VIMAGE case SIOCSIFVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error) return (error); error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); break; #endif case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = priv_check(td, PRIV_NET_SETIFPHYS); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = priv_check(td, PRIV_NET_SETIFMTU); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); /* Disallow MTU changes on bridge member interfaces. */ if (ifp->if_bridge) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp, 0); #ifdef INET DEBUGNET_NOTIFY_MTU(ifp); #endif } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) if_notifymtu(ifp); break; } case SIOCADDMULTI: case SIOCDELMULTI: if (cmd == SIOCADDMULTI) error = priv_check(td, PRIV_NET_ADDMULTI); else error = priv_check(td, PRIV_NET_DELMULTI); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct epoch_tracker et; struct ifmultiaddr *ifma; /* * Userland is only permitted to join groups once * via the if_addmulti() KPI, because it cannot hold * struct ifmultiaddr * between calls. It may also * lose a race while we check if the membership * already exists. */ NET_EPOCH_ENTER(et); ifma = if_findmulti(ifp, &ifr->ifr_addr); NET_EPOCH_EXIT(et); if (ifma != NULL) error = EADDRINUSE; else error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = priv_check(td, PRIV_NET_HWIOCTL); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: case SIOCGIFGENERIC: case SIOCGIFRSSKEY: case SIOCGIFRSSHASH: case SIOCGIFDOWNREASON: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); break; case SIOCSIFLLADDR: error = priv_check(td, PRIV_NET_SETLLADDR); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; case SIOCGHWADDR: error = if_gethwaddr(ifp, ifr); break; case SIOCAIFGROUP: error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); error = if_addgroup(ifp, ((struct ifgroupreq *)data)->ifgr_group); if (error != 0) return (error); break; case SIOCGIFGROUP: { struct epoch_tracker et; NET_EPOCH_ENTER(et); error = if_getgroup((struct ifgroupreq *)data, ifp); NET_EPOCH_EXIT(et); break; } case SIOCDIFGROUP: error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); error = if_delgroup(ifp, ((struct ifgroupreq *)data)->ifgr_group); if (error != 0) return (error); break; default: error = ENOIOCTL; break; } return (error); } /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { #ifdef COMPAT_FREEBSD32 union { struct ifconf ifc; struct ifdrv ifd; struct ifgroupreq ifgr; struct ifmediareq ifmr; } thunk; u_long saved_cmd; struct ifconf32 *ifc32; struct ifdrv32 *ifd32; struct ifgroupreq32 *ifgr32; struct ifmediareq32 *ifmr32; #endif struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; #ifdef VIMAGE bool shutdown; #endif CURVNET_SET(so->so_vnet); #ifdef VIMAGE /* Make sure the VNET is stable. */ shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet); if (shutdown) { CURVNET_RESTORE(); return (EBUSY); } #endif #ifdef COMPAT_FREEBSD32 saved_cmd = cmd; switch (cmd) { case SIOCGIFCONF32: ifc32 = (struct ifconf32 *)data; thunk.ifc.ifc_len = ifc32->ifc_len; thunk.ifc.ifc_buf = PTRIN(ifc32->ifc_buf); data = (caddr_t)&thunk.ifc; cmd = SIOCGIFCONF; break; case SIOCGDRVSPEC32: case SIOCSDRVSPEC32: ifd32 = (struct ifdrv32 *)data; memcpy(thunk.ifd.ifd_name, ifd32->ifd_name, sizeof(thunk.ifd.ifd_name)); thunk.ifd.ifd_cmd = ifd32->ifd_cmd; thunk.ifd.ifd_len = ifd32->ifd_len; thunk.ifd.ifd_data = PTRIN(ifd32->ifd_data); data = (caddr_t)&thunk.ifd; cmd = _IOC_NEWTYPE(cmd, struct ifdrv); break; case SIOCAIFGROUP32: case SIOCGIFGROUP32: case SIOCDIFGROUP32: case SIOCGIFGMEMB32: ifgr32 = (struct ifgroupreq32 *)data; memcpy(thunk.ifgr.ifgr_name, ifgr32->ifgr_name, sizeof(thunk.ifgr.ifgr_name)); thunk.ifgr.ifgr_len = ifgr32->ifgr_len; switch (cmd) { case SIOCAIFGROUP32: case SIOCDIFGROUP32: memcpy(thunk.ifgr.ifgr_group, ifgr32->ifgr_group, sizeof(thunk.ifgr.ifgr_group)); break; case SIOCGIFGROUP32: case SIOCGIFGMEMB32: thunk.ifgr.ifgr_groups = PTRIN(ifgr32->ifgr_groups); break; } data = (caddr_t)&thunk.ifgr; cmd = _IOC_NEWTYPE(cmd, struct ifgroupreq); break; case SIOCGIFMEDIA32: case SIOCGIFXMEDIA32: ifmr32 = (struct ifmediareq32 *)data; memcpy(thunk.ifmr.ifm_name, ifmr32->ifm_name, sizeof(thunk.ifmr.ifm_name)); thunk.ifmr.ifm_current = ifmr32->ifm_current; thunk.ifmr.ifm_mask = ifmr32->ifm_mask; thunk.ifmr.ifm_status = ifmr32->ifm_status; thunk.ifmr.ifm_active = ifmr32->ifm_active; thunk.ifmr.ifm_count = ifmr32->ifm_count; thunk.ifmr.ifm_ulist = PTRIN(ifmr32->ifm_ulist); data = (caddr_t)&thunk.ifmr; cmd = _IOC_NEWTYPE(cmd, struct ifmediareq); break; } #endif switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); goto out_noref; } ifr = (struct ifreq *)data; switch (cmd) { #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); goto out_noref; #endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); if (error == 0) error = if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? ifr_data_get_ptr(ifr) : NULL); goto out_noref; case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) { sx_xlock(&ifnet_detach_sxlock); error = if_clone_destroy(ifr->ifr_name); sx_xunlock(&ifnet_detach_sxlock); } goto out_noref; case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); goto out_noref; case SIOCGIFGMEMB: error = if_getgroupmembers((struct ifgroupreq *)data); goto out_noref; #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: if (carp_ioctl_p == NULL) error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); goto out_noref; #endif } ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) { error = ENXIO; goto out_noref; } error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) goto out_ref; oif_flags = ifp->if_flags; if (so->so_proto == NULL) { error = EOPNOTSUPP; goto out_ref; } /* * Pass the request on to the socket control method, and if the * latter returns EOPNOTSUPP, directly to the interface. * * Make an exception for the legacy SIOCSIF* requests. Drivers * trust SIOCSIFADDR et al to come from an already privileged * layer, and do not perform any credentials checks or input * validation. */ error = so->so_proto->pr_control(so, cmd, data, ifp, td); if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) error = (*ifp->if_ioctl)(ifp, cmd, data); if (!(oif_flags & IFF_UP) && (ifp->if_flags & IFF_UP)) if_up(ifp); out_ref: if_rele(ifp); out_noref: CURVNET_RESTORE(); #ifdef COMPAT_FREEBSD32 if (error != 0) return (error); switch (saved_cmd) { case SIOCGIFCONF32: ifc32->ifc_len = thunk.ifc.ifc_len; break; case SIOCGDRVSPEC32: /* * SIOCGDRVSPEC is IOWR, but nothing actually touches * the struct so just assert that ifd_len (the only * field it might make sense to update) hasn't * changed. */ KASSERT(thunk.ifd.ifd_len == ifd32->ifd_len, ("ifd_len was updated %u -> %zu", ifd32->ifd_len, thunk.ifd.ifd_len)); break; case SIOCGIFGROUP32: case SIOCGIFGMEMB32: ifgr32->ifgr_len = thunk.ifgr.ifgr_len; break; case SIOCGIFMEDIA32: case SIOCGIFXMEDIA32: ifmr32->ifm_current = thunk.ifmr.ifm_current; ifmr32->ifm_mask = thunk.ifmr.ifm_mask; ifmr32->ifm_status = thunk.ifmr.ifm_status; ifmr32->ifm_active = thunk.ifmr.ifm_active; ifmr32->ifm_count = thunk.ifmr.ifm_count; break; } #endif return (error); } int if_rename(struct ifnet *ifp, char *new_name) { struct ifaddr *ifa; struct sockaddr_dl *sdl; size_t namelen, onamelen; char old_name[IFNAMSIZ]; char strbuf[IFNAMSIZ + 8]; if (new_name[0] == '\0') return (EINVAL); if (strcmp(new_name, ifp->if_xname) == 0) return (0); if (ifunit(new_name) != NULL) return (EEXIST); /* * XXX: Locking. Nothing else seems to lock if_flags, * and there are numerous other races with the * ifunit() checks not being atomic with namespace * changes (renames, vmoves, if_attach, etc). */ ifp->if_flags |= IFF_RENAMING; EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if_printf(ifp, "changing name to '%s'\n", new_name); IF_ADDR_WLOCK(ifp); strlcpy(old_name, ifp->if_xname, sizeof(old_name)); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); ifp->if_flags &= ~IFF_RENAMING; snprintf(strbuf, sizeof(strbuf), "name=%s", new_name); devctl_notify("IFNET", old_name, "RENAME", strbuf); return (0); } /* * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag to check, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. * * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; /* Sanity checks to catch programming errors */ KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, ("%s: setting driver-owned flag %d", __func__, flag)); if (onswitch) KASSERT(*refcount >= 0, ("%s: increment negative refcount %d for flag %d", __func__, *refcount, flag)); else KASSERT(*refcount > 0, ("%s: decrement non-positive refcount %d for flag %d", __func__, *refcount, flag)); /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ if (ifp->if_ioctl == NULL) { error = EOPNOTSUPP; goto recover; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error) goto recover; /* Notify userland that interface flags have changed */ rt_ifmsg(ifp, flag); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && log_promisc_mode_change) if_printf(ifp, "promiscuous mode %s\n", (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to maxphys to avoid DoS from userspace. */ max_len = maxphys - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { struct epoch_tracker et; int addrs; /* * Zero the ifr to make sure we don't disclose the contents * of the stack. */ memset(&ifr, 0, sizeof(ifr)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) { sbuf_delete(sb); IFNET_RUNLOCK(); return (ENAMETOOLONG); } addrs = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; if (sa->sa_len <= sizeof(*sa)) { if (sa->sa_len < sizeof(*sa)) { memset(&ifr.ifr_ifru.ifru_addr, 0, sizeof(ifr.ifr_ifru.ifru_addr)); memcpy(&ifr.ifr_ifru.ifru_addr, sa, sa->sa_len); } else ifr.ifr_ifru.ifru_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } NET_EPOCH_EXIT(et); if (addrs == 0) { sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, IFF_PALLMULTI, &ifp->if_amcount, onswitch)); } struct ifmultiaddr * if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa->sa_family == AF_LINK) { if (sa_dl_equal(ifma->ifma_addr, sa)) break; } else { if (sa_equal(ifma->ifma_addr, sa)) break; } } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ #ifdef MCAST_VERBOSE extern void kdb_backtrace(void); #endif static void if_freemulti_internal(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", ifma->ifma_refcount)); if (ifma->ifma_lladdr != NULL) free(ifma->ifma_lladdr, M_IFMADDR); #ifdef MCAST_VERBOSE kdb_backtrace(); printf("%s freeing ifma: %p\n", __func__, ifma); #endif free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); } static void if_destroymulti(epoch_context_t ctx) { struct ifmultiaddr *ifma; ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx); if_freemulti_internal(ifma); } void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d", ifma->ifma_refcount)); NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; struct sockaddr_dl sdl; int error; #ifdef INET IN_MULTI_LIST_UNLOCK_ASSERT(); #endif #ifdef INET6 IN6_MULTI_LIST_UNLOCK_ASSERT(); #endif /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_WLOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_WUNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. * Most link layer resolving functions returns address data which * fits inside default sockaddr_dl structure. However callback * can allocate another sockaddr structure, in that case we need to * free it later. */ llsa = NULL; ll_ifma = NULL; if (ifp->if_resolvemulti != NULL) { /* Provide called function with buffer size information */ sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) goto unlock_out; } /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { --ifma->ifma_refcount; if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } ll_ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; ifma->ifma_llifma = ll_ifma; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_WUNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if (ifp->if_ioctl != NULL) { if (THREAD_CAN_SLEEP()) (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); else taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask); } if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); return (0); free_llsa_out: if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); unlock_out: IF_ADDR_WUNLOCK(ifp); return (error); } static void if_siocaddmulti(void *arg, int pending) { struct ifnet *ifp; ifp = arg; #ifdef DIAGNOSTIC if (pending > 1) if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending); #endif CURVNET_SET(ifp->if_vnet); (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); CURVNET_RESTORE(); } /* * Delete a multicast group membership by network-layer group address. * * Returns ENOENT if the entry could not be found. If ifp no longer * exists, results are undefined. This entry point should only be used * from subsystems which do appropriate locking to hold ifp for the * duration of the call. * Network-layer protocol domains must use if_delmulti_ifma(). */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; int lastref; KASSERT(ifp, ("%s: NULL ifp", __func__)); IF_ADDR_WLOCK(ifp); lastref = 0; ifma = if_findmulti(ifp, sa); if (ifma != NULL) lastref = if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); if (ifma == NULL) return (ENOENT); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } return (0); } /* * Delete all multicast group membership for an interface. * Should be used to quickly flush all multicast filters. */ void if_delallmulti(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); } void if_delmulti_ifma(struct ifmultiaddr *ifma) { if_delmulti_ifma_flags(ifma, 0); } /* * Delete a multicast group membership by group membership pointer. * Network-layer protocol domains must use this routine. * * It is safe to call this routine if the ifp disappeared. */ void if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags) { struct ifnet *ifp; int lastref; MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma); #ifdef INET IN_MULTI_LIST_UNLOCK_ASSERT(); #endif ifp = ifma->ifma_ifp; #ifdef DIAGNOSTIC if (ifp == NULL) { printf("%s: ifma_ifp seems to be detached\n", __func__); } else { struct epoch_tracker et; struct ifnet *oifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; NET_EPOCH_EXIT(et); if (ifp != oifp) ifp = NULL; } #endif /* * If and only if the ifnet instance exists: Acquire the address lock. */ if (ifp != NULL) IF_ADDR_WLOCK(ifp); lastref = if_delmulti_locked(ifp, ifma, flags); if (ifp != NULL) { /* * If and only if the ifnet instance exists: * Release the address lock. * If the group was left: update the hardware hash filter. */ IF_ADDR_WUNLOCK(ifp); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } } } /* * Perform deletion of network-layer and/or link-layer multicast address. * * Return 0 if the reference count was decremented. * Return 1 if the final reference was released, indicating that the * hardware hash filter should be reprogrammed. */ static int if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) { struct ifmultiaddr *ll_ifma; if (ifp != NULL && ifma->ifma_ifp != NULL) { KASSERT(ifma->ifma_ifp == ifp, ("%s: inconsistent ifp %p", __func__, ifp)); IF_ADDR_WLOCK_ASSERT(ifp); } ifp = ifma->ifma_ifp; MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : ""); /* * If the ifnet is detaching, null out references to ifnet, * so that upper protocol layers will notice, and not attempt * to obtain locks for an ifnet which no longer exists. The * routing socket announcement must happen before the ifnet * instance is detached from the system. */ if (detaching) { #ifdef DIAGNOSTIC printf("%s: detaching ifnet instance %p\n", __func__, ifp); #endif /* * ifp may already be nulled out if we are being reentered * to delete the ll_ifma. */ if (ifp != NULL) { rt_newmaddrmsg(RTM_DELMADDR, ifma); ifma->ifma_ifp = NULL; } } if (--ifma->ifma_refcount > 0) return 0; if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. */ ll_ifma = ifma->ifma_llifma; if (ll_ifma != NULL) { KASSERT(ifma->ifma_lladdr != NULL, ("%s: llifma w/o lladdr", __func__)); if (detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } } if_freemulti(ll_ifma); } } #ifdef INVARIANTS if (ifp) { struct ifmultiaddr *ifmatmp; CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link) MPASS(ifma != ifmatmp); } #endif if_freemulti(ifma); /* * The last reference to this instance of struct ifmultiaddr * was released; the hardware should be notified of this change. */ return 1; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. * * Set noinline to be dtrace-friendly */ __noinline int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; ifa = ifp->if_addr; if (ifa == NULL) return (EINVAL); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) return (EINVAL); if (len != sdl->sdl_alen) /* don't allow length to change */ return (EINVAL); switch (ifp->if_type) { case IFT_ETHER: case IFT_XETHER: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_IEEE8023ADLAG: bcopy(lladdr, LLADDR(sdl), len); break; default: return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { if (ifp->if_ioctl) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } EVENTHANDLER_INVOKE(iflladdr_event, ifp); return (0); } /* * Compat function for handling basic encapsulation requests. * Not converted stacks (FDDI, IB, ..) supports traditional * output model: ARP (and other similar L2 protocols) are handled * inside output routine, arpresolve/nd6_resolve() returns MAC * address instead of full prepend. * * This function creates calculated header==MAC for IPv4/IPv6 and * returns EAFNOSUPPORT (which is then handled in ARP code) for other * address families. */ static int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) { if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < req->lladdr_len) return (ENOMEM); switch (req->family) { case AF_INET: case AF_INET6: break; default: return (EAFNOSUPPORT); } /* Copy lladdr to storage as is */ memmove(req->buf, req->lladdr, req->lladdr_len); req->bufsize = req->lladdr_len; req->lladdr_off = 0; return (0); } /* * Tunnel interfaces can nest, also they may cause infinite recursion * calls when misconfigured. We'll prevent this by detecting loops. * High nesting level may cause stack exhaustion. We'll prevent this * by introducing upper limit. * * Return 0, if tunnel nesting count is equal or less than limit. */ int if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, int limit) { struct m_tag *mtag; int count; count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); return (EIO); } count++; } if (count > limit) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", if_name(ifp), count); return (EIO); } mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } /* * Get the link layer address that was read from the hardware at attach. * * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type * their component interfaces as IFT_IEEE8023ADLAG. */ int if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr) { if (ifp->if_hw_addr == NULL) return (ENODEV); switch (ifp->if_type) { case IFT_ETHER: case IFT_IEEE8023ADLAG: bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen); return (0); default: return (ENODEV); } } /* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a * static string works well. */ void if_initname(struct ifnet *ifp, const char *name, int unit) { ifp->if_dname = name; ifp->if_dunit = unit; if (unit != IF_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); else strlcpy(ifp->if_xname, name, IFNAMSIZ); } static int if_vlog(struct ifnet *ifp, int pri, const char *fmt, va_list ap) { char if_fmt[256]; snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt); vlog(pri, if_fmt, ap); return (0); } int if_printf(struct ifnet *ifp, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if_vlog(ifp, LOG_INFO, fmt, ap); va_end(ap); return (0); } int if_log(struct ifnet *ifp, int pri, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if_vlog(ifp, pri, fmt, ap); va_end(ap); return (0); } void if_start(struct ifnet *ifp) { (*(ifp)->if_start)(ifp); } /* * Backwards compatibility interface for drivers * that have not implemented it */ static int if_transmit_default(struct ifnet *ifp, struct mbuf *m) { int error; IFQ_HANDOFF(ifp, m, error); return (error); } static void if_input_default(struct ifnet *ifp __unused, struct mbuf *m) { m_freem(m); } int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) { int active = 0; IF_LOCK(ifq); if (_IF_QFULL(ifq)) { IF_UNLOCK(ifq); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); return (0); } if (ifp != NULL) { if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); active = ifp->if_drv_flags & IFF_DRV_OACTIVE; } _IF_ENQUEUE(ifq, m); IF_UNLOCK(ifq); if (ifp != NULL && !active) (*(ifp)->if_start)(ifp); return (1); } void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f) { KASSERT(if_com_alloc[type] == NULL, ("if_register_com_alloc: %d already registered", type)); KASSERT(if_com_free[type] == NULL, ("if_register_com_alloc: %d free already registered", type)); if_com_alloc[type] = a; if_com_free[type] = f; } void if_deregister_com_alloc(u_char type) { KASSERT(if_com_alloc[type] != NULL, ("if_deregister_com_alloc: %d not registered", type)); KASSERT(if_com_free[type] != NULL, ("if_deregister_com_alloc: %d free not registered", type)); /* * Ensure all pending EPOCH(9) callbacks have been executed. This * fixes issues about late invocation of if_destroy(), which leads * to memory leak from if_com_alloc[type] allocated if_l2com. */ NET_EPOCH_DRAIN_CALLBACKS(); if_com_alloc[type] = NULL; if_com_free[type] = NULL; } /* API for driver access to network stack owned ifnet.*/ uint64_t if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) { uint64_t oldbrate; oldbrate = ifp->if_baudrate; ifp->if_baudrate = baudrate; return (oldbrate); } uint64_t if_getbaudrate(const if_t ifp) { return (ifp->if_baudrate); } int if_setcapabilities(if_t ifp, int capabilities) { ifp->if_capabilities = capabilities; return (0); } int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) { ifp->if_capabilities &= ~clearbit; ifp->if_capabilities |= setbit; return (0); } int if_getcapabilities(const if_t ifp) { return (ifp->if_capabilities); } int if_setcapenable(if_t ifp, int capabilities) { ifp->if_capenable = capabilities; return (0); } int if_setcapenablebit(if_t ifp, int setcap, int clearcap) { ifp->if_capenable &= ~clearcap; ifp->if_capenable |= setcap; return (0); } int if_setcapabilities2(if_t ifp, int capabilities) { ifp->if_capabilities2 = capabilities; return (0); } int if_setcapabilities2bit(if_t ifp, int setbit, int clearbit) { ifp->if_capabilities2 &= ~clearbit; ifp->if_capabilities2 |= setbit; return (0); } int if_getcapabilities2(const if_t ifp) { return (ifp->if_capabilities2); } int if_setcapenable2(if_t ifp, int capabilities2) { ifp->if_capenable2 = capabilities2; return (0); } int if_setcapenable2bit(if_t ifp, int setcap, int clearcap) { ifp->if_capenable2 &= ~clearcap; ifp->if_capenable2 |= setcap; return (0); } const char * if_getdname(const if_t ifp) { return (ifp->if_dname); } void if_setdname(if_t ifp, const char *dname) { ifp->if_dname = dname; } const char * if_name(if_t ifp) { return (ifp->if_xname); } int if_setname(if_t ifp, const char *name) { if (strlen(name) > sizeof(ifp->if_xname) - 1) return (ENAMETOOLONG); strcpy(ifp->if_xname, name); return (0); } int if_togglecapenable(if_t ifp, int togglecap) { ifp->if_capenable ^= togglecap; return (0); } int if_getcapenable(const if_t ifp) { return (ifp->if_capenable); } int if_togglecapenable2(if_t ifp, int togglecap) { ifp->if_capenable2 ^= togglecap; return (0); } int if_getcapenable2(const if_t ifp) { return (ifp->if_capenable2); } int if_getdunit(const if_t ifp) { return (ifp->if_dunit); } int if_getindex(const if_t ifp) { return (ifp->if_index); } int if_getidxgen(const if_t ifp) { return (ifp->if_idxgen); } const char * if_getdescr(if_t ifp) { return (ifp->if_description); } void if_setdescr(if_t ifp, char *descrbuf) { sx_xlock(&ifdescr_sx); char *odescrbuf = ifp->if_description; ifp->if_description = descrbuf; sx_xunlock(&ifdescr_sx); if_freedescr(odescrbuf); } char * if_allocdescr(size_t sz, int malloc_flag) { malloc_flag &= (M_WAITOK | M_NOWAIT); return (malloc(sz, M_IFDESCR, M_ZERO | malloc_flag)); } void if_freedescr(char *descrbuf) { free(descrbuf, M_IFDESCR); } int if_getalloctype(const if_t ifp) { return (ifp->if_alloctype); } void if_setlastchange(if_t ifp) { getmicrotime(&ifp->if_lastchange); } /* * This is largely undesirable because it ties ifnet to a device, but does * provide flexiblity for an embedded product vendor. Should be used with * the understanding that it violates the interface boundaries, and should be * a last resort only. */ int if_setdev(if_t ifp, void *dev) { return (0); } int if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) { ifp->if_drv_flags &= ~clear_flags; ifp->if_drv_flags |= set_flags; return (0); } int if_getdrvflags(const if_t ifp) { return (ifp->if_drv_flags); } int if_setdrvflags(if_t ifp, int flags) { ifp->if_drv_flags = flags; return (0); } int if_setflags(if_t ifp, int flags) { ifp->if_flags = flags; return (0); } int if_setflagbits(if_t ifp, int set, int clear) { ifp->if_flags &= ~clear; ifp->if_flags |= set; return (0); } int if_getflags(const if_t ifp) { return (ifp->if_flags); } int if_clearhwassist(if_t ifp) { ifp->if_hwassist = 0; return (0); } int if_sethwassistbits(if_t ifp, int toset, int toclear) { ifp->if_hwassist &= ~toclear; ifp->if_hwassist |= toset; return (0); } int if_sethwassist(if_t ifp, int hwassist_bit) { ifp->if_hwassist = hwassist_bit; return (0); } int if_gethwassist(const if_t ifp) { return (ifp->if_hwassist); } int if_togglehwassist(if_t ifp, int toggle_bits) { ifp->if_hwassist ^= toggle_bits; return (0); } int if_setmtu(if_t ifp, int mtu) { ifp->if_mtu = mtu; return (0); } void if_notifymtu(if_t ifp) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } int if_getmtu(const if_t ifp) { return (ifp->if_mtu); } int if_getmtu_family(const if_t ifp, int family) { struct domain *dp; SLIST_FOREACH(dp, &domains, dom_next) { if (dp->dom_family == family && dp->dom_ifmtu != NULL) return (dp->dom_ifmtu(ifp)); } return (ifp->if_mtu); } void if_setppromisc(if_t ifp, bool ppromisc) { int new_flags; if (ppromisc) new_flags = ifp->if_flags | IFF_PPROMISC; else new_flags = ifp->if_flags & ~IFF_PPROMISC; if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { if (new_flags & IFF_PPROMISC) new_flags |= IFF_PROMISC; /* * Only unset IFF_PROMISC if there are no more consumers of * promiscuity, i.e. the ifp->if_pcount refcount is 0. */ else if (ifp->if_pcount == 0) new_flags &= ~IFF_PROMISC; if (log_promisc_mode_change) if_printf(ifp, "permanently promiscuous mode %s\n", ((new_flags & IFF_PPROMISC) ? "enabled" : "disabled")); } ifp->if_flags = new_flags; } /* * Methods for drivers to access interface unicast and multicast * link level addresses. Driver shall not know 'struct ifaddr' neither * 'struct ifmultiaddr'. */ u_int if_lladdr_count(if_t ifp) { struct epoch_tracker et; struct ifaddr *ifa; u_int count; count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_LINK) count++; NET_EPOCH_EXIT(et); return (count); } int if_foreach(if_foreach_cb_t cb, void *cb_arg) { if_t ifp; int error; NET_EPOCH_ASSERT(); MPASS(cb); error = 0; CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { error = cb(ifp, cb_arg); if (error != 0) break; } return (error); } /* * Iterates over the list of interfaces, permitting callback function @cb to sleep. * Stops iteration if @cb returns non-zero error code. * Returns the last error code from @cb. * @match_cb: optional match callback limiting the iteration to only matched interfaces * @match_arg: argument to pass to @match_cb * @cb: iteration callback * @cb_arg: argument to pass to @cb */ int if_foreach_sleep(if_foreach_match_t match_cb, void *match_arg, if_foreach_cb_t cb, void *cb_arg) { int match_count = 0, array_size = 16; /* 128 bytes for malloc */ struct ifnet **match_array = NULL; int error = 0; MPASS(cb); while (true) { struct ifnet **new_array; int new_size = array_size; struct epoch_tracker et; struct ifnet *ifp; while (new_size < match_count) new_size *= 2; new_array = malloc(new_size * sizeof(void *), M_TEMP, M_WAITOK); if (match_array != NULL) memcpy(new_array, match_array, array_size * sizeof(void *)); free(match_array, M_TEMP); match_array = new_array; array_size = new_size; match_count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (match_cb != NULL && !match_cb(ifp, match_arg)) continue; if (match_count < array_size) { if (if_try_ref(ifp)) match_array[match_count++] = ifp; } else match_count++; } NET_EPOCH_EXIT(et); if (match_count > array_size) { for (int i = 0; i < array_size; i++) if_rele(match_array[i]); continue; } else { for (int i = 0; i < match_count; i++) { if (error == 0) error = cb(match_array[i], cb_arg); if_rele(match_array[i]); } free(match_array, M_TEMP); break; } } return (error); } /* * Uses just 1 pointer of the 4 available in the public struct. */ if_t if_iter_start(struct if_iter *iter) { if_t ifp; NET_EPOCH_ASSERT(); bzero(iter, sizeof(*iter)); ifp = CK_STAILQ_FIRST(&V_ifnet); if (ifp != NULL) iter->context[0] = CK_STAILQ_NEXT(ifp, if_link); else iter->context[0] = NULL; return (ifp); } if_t if_iter_next(struct if_iter *iter) { if_t cur_ifp = iter->context[0]; if (cur_ifp != NULL) iter->context[0] = CK_STAILQ_NEXT(cur_ifp, if_link); return (cur_ifp); } void if_iter_finish(struct if_iter *iter) { /* Nothing to do here for now. */ } u_int if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) { struct epoch_tracker et; struct ifaddr *ifa; u_int count; MPASS(cb); count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr, count); } NET_EPOCH_EXIT(et); return (count); } u_int if_llmaddr_count(if_t ifp) { struct epoch_tracker et; struct ifmultiaddr *ifma; int count; count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) if (ifma->ifma_addr->sa_family == AF_LINK) count++; NET_EPOCH_EXIT(et); return (count); } bool if_maddr_empty(if_t ifp) { return (CK_STAILQ_EMPTY(&ifp->if_multiaddrs)); } u_int if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) { struct epoch_tracker et; struct ifmultiaddr *ifma; u_int count; MPASS(cb); count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr, count); } NET_EPOCH_EXIT(et); return (count); } u_int if_foreach_addr_type(if_t ifp, int type, if_addr_cb_t cb, void *cb_arg) { struct epoch_tracker et; struct ifaddr *ifa; u_int count; MPASS(cb); count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != type) continue; count += (*cb)(cb_arg, ifa, count); } NET_EPOCH_EXIT(et); return (count); } struct ifaddr * ifa_iter_start(if_t ifp, struct ifa_iter *iter) { struct ifaddr *ifa; NET_EPOCH_ASSERT(); bzero(iter, sizeof(*iter)); ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); if (ifa != NULL) iter->context[0] = CK_STAILQ_NEXT(ifa, ifa_link); else iter->context[0] = NULL; return (ifa); } struct ifaddr * ifa_iter_next(struct ifa_iter *iter) { struct ifaddr *ifa = iter->context[0]; if (ifa != NULL) iter->context[0] = CK_STAILQ_NEXT(ifa, ifa_link); return (ifa); } void ifa_iter_finish(struct ifa_iter *iter) { /* Nothing to do here for now. */ } int if_setsoftc(if_t ifp, void *softc) { ifp->if_softc = softc; return (0); } void * if_getsoftc(const if_t ifp) { return (ifp->if_softc); } void if_setrcvif(struct mbuf *m, if_t ifp) { MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); m->m_pkthdr.rcvif = (struct ifnet *)ifp; } void if_setvtag(struct mbuf *m, uint16_t tag) { m->m_pkthdr.ether_vtag = tag; } uint16_t if_getvtag(struct mbuf *m) { return (m->m_pkthdr.ether_vtag); } int if_sendq_empty(if_t ifp) { return (IFQ_DRV_IS_EMPTY(&ifp->if_snd)); } struct ifaddr * if_getifaddr(const if_t ifp) { return (ifp->if_addr); } int if_setsendqready(if_t ifp) { IFQ_SET_READY(&ifp->if_snd); return (0); } int if_setsendqlen(if_t ifp, int tx_desc_count) { IFQ_SET_MAXLEN(&ifp->if_snd, tx_desc_count); ifp->if_snd.ifq_drv_maxlen = tx_desc_count; return (0); } void if_setnetmapadapter(if_t ifp, struct netmap_adapter *na) { ifp->if_netmap = na; } struct netmap_adapter * if_getnetmapadapter(if_t ifp) { return (ifp->if_netmap); } int if_vlantrunkinuse(if_t ifp) { return (ifp->if_vlantrunk != NULL); } void if_init(if_t ifp, void *ctx) { (*ifp->if_init)(ctx); } void if_input(if_t ifp, struct mbuf* sendmp) { (*ifp->if_input)(ifp, sendmp); } int if_transmit(if_t ifp, struct mbuf *m) { return ((*ifp->if_transmit)(ifp, m)); } int if_resolvemulti(if_t ifp, struct sockaddr **srcs, struct sockaddr *dst) { if (ifp->if_resolvemulti == NULL) return (EOPNOTSUPP); return (ifp->if_resolvemulti(ifp, srcs, dst)); } int if_ioctl(if_t ifp, u_long cmd, void *data) { if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); return (ifp->if_ioctl(ifp, cmd, data)); } struct mbuf * if_dequeue(if_t ifp) { struct mbuf *m; IFQ_DRV_DEQUEUE(&ifp->if_snd, m); return (m); } int if_sendq_prepend(if_t ifp, struct mbuf *m) { IFQ_DRV_PREPEND(&ifp->if_snd, m); return (0); } int if_setifheaderlen(if_t ifp, int len) { ifp->if_hdrlen = len; return (0); } char * if_getlladdr(const if_t ifp) { return (IF_LLADDR(ifp)); } void * if_gethandle(u_char type) { return (if_alloc(type)); } void if_vlancap(if_t ifp) { VLAN_CAPABILITIES(ifp); } int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax) { ifp->if_hw_tsomax = if_hw_tsomax; return (0); } int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount) { ifp->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount; return (0); } int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize) { ifp->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize; return (0); } u_int if_gethwtsomax(const if_t ifp) { return (ifp->if_hw_tsomax); } u_int if_gethwtsomaxsegcount(const if_t ifp) { return (ifp->if_hw_tsomaxsegcount); } u_int if_gethwtsomaxsegsize(const if_t ifp) { return (ifp->if_hw_tsomaxsegsize); } void if_setinitfn(if_t ifp, if_init_fn_t init_fn) { ifp->if_init = init_fn; } void if_setinputfn(if_t ifp, if_input_fn_t input_fn) { ifp->if_input = input_fn; } if_input_fn_t if_getinputfn(if_t ifp) { return (ifp->if_input); } void if_setioctlfn(if_t ifp, if_ioctl_fn_t ioctl_fn) { ifp->if_ioctl = ioctl_fn; } void if_setoutputfn(if_t ifp, if_output_fn_t output_fn) { ifp->if_output = output_fn; } void if_setstartfn(if_t ifp, if_start_fn_t start_fn) { ifp->if_start = start_fn; } if_start_fn_t if_getstartfn(if_t ifp) { return (ifp->if_start); } void if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) { ifp->if_transmit = start_fn; } if_transmit_fn_t if_gettransmitfn(if_t ifp) { return (ifp->if_transmit); } void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) { ifp->if_qflush = flush_fn; } void if_setsndtagallocfn(if_t ifp, if_snd_tag_alloc_t alloc_fn) { ifp->if_snd_tag_alloc = alloc_fn; } int if_snd_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **mstp) { if (ifp->if_snd_tag_alloc == NULL) return (EOPNOTSUPP); return (ifp->if_snd_tag_alloc(ifp, params, mstp)); } void if_setgetcounterfn(if_t ifp, if_get_counter_t fn) { ifp->if_get_counter = fn; } void if_setreassignfn(if_t ifp, if_reassign_fn_t fn) { ifp->if_reassign = fn; } void if_setratelimitqueryfn(if_t ifp, if_ratelimit_query_t fn) { ifp->if_ratelimit_query = fn; } void if_setdebugnet_methods(if_t ifp, struct debugnet_methods *m) { ifp->if_debugnet_methods = m; } struct label * if_getmaclabel(if_t ifp) { return (ifp->if_label); } void if_setmaclabel(if_t ifp, struct label *label) { ifp->if_label = label; } int if_gettype(if_t ifp) { return (ifp->if_type); } void * if_getllsoftc(if_t ifp) { return (ifp->if_llsoftc); } void if_setllsoftc(if_t ifp, void *llsoftc) { ifp->if_llsoftc = llsoftc; }; int if_getlinkstate(if_t ifp) { return (ifp->if_link_state); } const uint8_t * if_getbroadcastaddr(if_t ifp) { return (ifp->if_broadcastaddr); } void if_setbroadcastaddr(if_t ifp, const uint8_t *addr) { ifp->if_broadcastaddr = addr; } int if_getnumadomain(if_t ifp) { return (ifp->if_numa_domain); } uint64_t if_getcounter(if_t ifp, ift_counter counter) { return (ifp->if_get_counter(ifp, counter)); } bool if_altq_is_enabled(if_t ifp) { return (ALTQ_IS_ENABLED(&ifp->if_snd)); } struct vnet * if_getvnet(if_t ifp) { return (ifp->if_vnet); } -struct vnet * -if_gethomevnet(if_t ifp) -{ - return (ifp->if_home_vnet); -} - void * if_getafdata(if_t ifp, int af) { return (ifp->if_afdata[af]); } u_int if_getfib(if_t ifp) { return (ifp->if_fib); } uint8_t if_getaddrlen(if_t ifp) { return (ifp->if_addrlen); } struct bpf_if * if_getbpf(if_t ifp) { return (ifp->if_bpf); } struct ifvlantrunk * if_getvlantrunk(if_t ifp) { return (ifp->if_vlantrunk); } uint8_t if_getpcp(if_t ifp) { return (ifp->if_pcp); } void * if_getl2com(if_t ifp) { return (ifp->if_l2com); } void if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *m) { ifp->if_ipsec_accel_m = m; } #ifdef DDB static void if_show_ifnet(struct ifnet *ifp) { if (ifp == NULL) return; db_printf("%s:\n", ifp->if_xname); #define IF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, ifp->e); IF_DB_PRINTF("%s", if_dname); IF_DB_PRINTF("%d", if_dunit); IF_DB_PRINTF("%s", if_description); IF_DB_PRINTF("%u", if_index); IF_DB_PRINTF("%d", if_idxgen); IF_DB_PRINTF("%u", if_refcount); IF_DB_PRINTF("%p", if_softc); IF_DB_PRINTF("%p", if_l2com); IF_DB_PRINTF("%p", if_llsoftc); IF_DB_PRINTF("%d", if_amcount); IF_DB_PRINTF("%p", if_addr); IF_DB_PRINTF("%p", if_broadcastaddr); IF_DB_PRINTF("%p", if_afdata); IF_DB_PRINTF("%d", if_afdata_initialized); IF_DB_PRINTF("%u", if_fib); IF_DB_PRINTF("%p", if_vnet); IF_DB_PRINTF("%p", if_home_vnet); IF_DB_PRINTF("%p", if_vlantrunk); IF_DB_PRINTF("%p", if_bpf); IF_DB_PRINTF("%u", if_pcount); IF_DB_PRINTF("%p", if_bridge); IF_DB_PRINTF("%p", if_lagg); IF_DB_PRINTF("%p", if_pf_kif); IF_DB_PRINTF("%p", if_carp); IF_DB_PRINTF("%p", if_label); IF_DB_PRINTF("%p", if_netmap); IF_DB_PRINTF("0x%08x", if_flags); IF_DB_PRINTF("0x%08x", if_drv_flags); IF_DB_PRINTF("0x%08x", if_capabilities); IF_DB_PRINTF("0x%08x", if_capenable); IF_DB_PRINTF("%p", if_snd.ifq_head); IF_DB_PRINTF("%p", if_snd.ifq_tail); IF_DB_PRINTF("%d", if_snd.ifq_len); IF_DB_PRINTF("%d", if_snd.ifq_maxlen); IF_DB_PRINTF("%p", if_snd.ifq_drv_head); IF_DB_PRINTF("%p", if_snd.ifq_drv_tail); IF_DB_PRINTF("%d", if_snd.ifq_drv_len); IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen); IF_DB_PRINTF("%d", if_snd.altq_type); IF_DB_PRINTF("%x", if_snd.altq_flags); #undef IF_DB_PRINTF } DB_SHOW_COMMAND(ifnet, db_show_ifnet) { if (!have_addr) { db_printf("usage: show ifnet \n"); return; } if_show_ifnet((struct ifnet *)addr); } DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets) { struct ifnet *ifp; u_short idx; for (idx = 1; idx <= if_index; idx++) { ifp = ifindex_table[idx].ife_ifnet; if (ifp == NULL) continue; db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); if (db_pager_quit) break; } } #endif /* DDB */ diff --git a/sys/net/if_var.h b/sys/net/if_var.h index e71fe798fdec..f2df612b19c1 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -1,762 +1,761 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, ro) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ struct rtentry; /* ifa_rtrequest */ struct socket; struct carp_if; struct carp_softc; struct ifvlantrunk; struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; struct debugnet_methods; #ifdef _KERNEL #include #include /* ifqueue only? */ #include #include #endif /* _KERNEL */ #include #include #include #include /* XXX */ #include /* struct ifqueue */ #include /* XXX */ #include /* XXX */ #include /* if_link_task */ #define IF_DUNIT_NONE -1 #include CK_STAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ CK_STAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr); CK_STAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL VNET_DECLARE(struct pfil_head *, link_pfil_head); #define V_link_pfil_head VNET(link_pfil_head) #define PFIL_ETHER_NAME "ethernet" #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 #define HHOOK_IPSEC_COUNT 2 VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); #define V_ipsec_hhh_in VNET(ipsec_hhh_in) #define V_ipsec_hhh_out VNET(ipsec_hhh_out) #endif /* _KERNEL */ typedef enum { IFCOUNTER_IPACKETS = 0, IFCOUNTER_IERRORS, IFCOUNTER_OPACKETS, IFCOUNTER_OERRORS, IFCOUNTER_COLLISIONS, IFCOUNTER_IBYTES, IFCOUNTER_OBYTES, IFCOUNTER_IMCASTS, IFCOUNTER_OMCASTS, IFCOUNTER_IQDROPS, IFCOUNTER_OQDROPS, IFCOUNTER_NOPROTO, IFCOUNTERS /* Array size. */ } ift_counter; typedef void (*if_start_fn_t)(if_t); typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t); typedef void (*if_init_fn_t)(void *); typedef void (*if_input_fn_t)(if_t, struct mbuf *); typedef int (*if_output_fn_t)(if_t, struct mbuf *, const struct sockaddr *, struct route *); typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); typedef void (*if_reassign_fn_t)(if_t, struct vnet *, char *); typedef int (*if_spdadd_fn_t)(if_t ifp, void *sp, void *inp, void **priv); typedef int (*if_spddel_fn_t)(if_t ifp, void *sp, void *priv); typedef int (*if_sa_newkey_fn_t)(if_t ifp, void *sav, u_int drv_spi, void **privp); typedef int (*if_sa_deinstall_fn_t)(if_t ifp, u_int drv_spi, void *priv); struct seclifetime; #define IF_SA_CNT_UPD 0x80000000 enum IF_SA_CNT_WHICH { IF_SA_CNT_IFP_HW_VAL = 1, IF_SA_CNT_TOTAL_SW_VAL, IF_SA_CNT_TOTAL_HW_VAL, IF_SA_CNT_IFP_HW_UPD = IF_SA_CNT_IFP_HW_VAL | IF_SA_CNT_UPD, IF_SA_CNT_TOTAL_SW_UPD = IF_SA_CNT_TOTAL_SW_VAL | IF_SA_CNT_UPD, IF_SA_CNT_TOTAL_HW_UPD = IF_SA_CNT_TOTAL_HW_VAL | IF_SA_CNT_UPD, }; typedef int (*if_sa_cnt_fn_t)(if_t ifp, void *sa, uint32_t drv_spi, void *priv, struct seclifetime *lt); typedef int (*if_ipsec_hwassist_fn_t)(if_t ifp, void *sav, u_int drv_spi,void *priv); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ u_int tsomaxsegcount; /* TSO maximum segment count */ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ }; /* Interface encap request types */ typedef enum { IFENCAP_LL = 1 /* pre-calculate link-layer header */ } ife_type; /* * The structure below allows to request various pre-calculated L2/L3 headers * for different media. Requests varies by type (rtype field). * * IFENCAP_LL type: pre-calculates link header based on address family * and destination lladdr. * * Input data fields: * buf: pointer to destination buffer * bufsize: buffer size * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast * family: address family defined by AF_ constant. * lladdr: pointer to link-layer address * lladdr_len: length of link-layer address * hdata: pointer to L3 header (optional, used for ARP requests). * Output data fields: * buf: encap data is stored here * bufsize: resulting encap length is stored here * lladdr_off: offset of link-layer address from encap hdr start * hdata: L3 header may be altered if necessary */ struct if_encap_req { u_char *buf; /* Destination buffer (w) */ size_t bufsize; /* size of provided buffer (r) */ ife_type rtype; /* request type (r) */ uint32_t flags; /* Request flags (r) */ int family; /* Address family AF_* (r) */ int lladdr_off; /* offset from header start (w) */ int lladdr_len; /* lladdr length (r) */ char *lladdr; /* link-level address pointer (r) */ char *hdata; /* Upper layer header data (rw) */ }; #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ /* * Network interface send tag support. The storage of "struct * m_snd_tag" comes from the network driver and it is free to allocate * as much additional space as it wants for its own use. */ struct ktls_session; struct m_snd_tag; #define IF_SND_TAG_TYPE_RATE_LIMIT 0 #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_TLS 2 #define IF_SND_TAG_TYPE_TLS_RATE_LIMIT 3 #define IF_SND_TAG_TYPE_TLS_RX 4 #define IF_SND_TAG_TYPE_MAX 5 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ uint32_t flowid; /* mbuf hash value */ uint32_t flowtype; /* mbuf hash type */ uint8_t numa_domain; /* numa domain of associated inp */ }; struct if_snd_tag_alloc_rate_limit { struct if_snd_tag_alloc_header hdr; uint64_t max_rate; /* in bytes/s */ uint32_t flags; /* M_NOWAIT or M_WAITOK */ uint32_t reserved; /* alignment */ }; struct if_snd_tag_alloc_tls { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; }; struct if_snd_tag_alloc_tls_rx { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; uint16_t vlan_id; /* valid if non-zero */ }; struct if_snd_tag_alloc_tls_rate_limit { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; uint64_t max_rate; /* in bytes/s */ }; struct if_snd_tag_rate_limit_params { uint64_t max_rate; /* in bytes/s */ uint32_t queue_level; /* 0 (empty) .. 65535 (full) */ #define IF_SND_QUEUE_LEVEL_MIN 0 #define IF_SND_QUEUE_LEVEL_MAX 65535 uint32_t flags; /* M_NOWAIT or M_WAITOK */ }; struct if_snd_tag_modify_tls_rx { /* TCP sequence number of TLS header in host endian format */ uint32_t tls_hdr_tcp_sn; /* * TLS record length, including all headers, data and trailers. * If the tls_rec_length is zero, it means HW encryption resumed. */ uint32_t tls_rec_length; /* TLS sequence number in host endian format */ uint64_t tls_seq_number; }; union if_snd_tag_alloc_params { struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; struct if_snd_tag_alloc_tls tls; struct if_snd_tag_alloc_tls_rx tls_rx; struct if_snd_tag_alloc_tls_rate_limit tls_rate_limit; }; union if_snd_tag_modify_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; struct if_snd_tag_rate_limit_params tls_rate_limit; struct if_snd_tag_modify_tls_rx tls_rx; }; union if_snd_tag_query_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; struct if_snd_tag_rate_limit_params tls_rate_limit; }; typedef int (if_snd_tag_alloc_t)(if_t, union if_snd_tag_alloc_params *, struct m_snd_tag **); typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *); typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *); typedef void (if_snd_tag_free_t)(struct m_snd_tag *); typedef struct m_snd_tag *(if_next_send_tag_t)(struct m_snd_tag *); typedef int (if_snd_tag_status_str_t)(struct m_snd_tag *, char *buf, size_t *sz); struct if_snd_tag_sw { if_snd_tag_modify_t *snd_tag_modify; if_snd_tag_query_t *snd_tag_query; if_snd_tag_free_t *snd_tag_free; if_next_send_tag_t *next_snd_tag; if_snd_tag_status_str_t *snd_tag_status_str; u_int type; /* One of IF_SND_TAG_TYPE_*. */ }; /* Query return flags */ #define RT_NOSUPPORT 0x00000000 /* Not supported */ #define RT_IS_INDIRECT 0x00000001 /* * Interface like a lagg, select * the actual interface for * capabilities. */ #define RT_IS_SELECTABLE 0x00000002 /* * No rate table, you select * rates and the first * number_of_rates are created. */ #define RT_IS_FIXED_TABLE 0x00000004 /* A fixed table is attached */ #define RT_IS_UNUSABLE 0x00000008 /* It is not usable for this */ #define RT_IS_SETUP_REQ 0x00000010 /* The interface setup must be called before use */ struct if_ratelimit_query_results { const uint64_t *rate_table; /* Pointer to table if present */ uint32_t flags; /* Flags indicating results */ uint32_t max_flows; /* Max flows using, 0=unlimited */ uint32_t number_of_rates; /* How many unique rates can be created */ uint32_t min_segment_burst; /* The amount the adapter bursts at each send */ }; typedef void (if_ratelimit_query_t)(if_t, struct if_ratelimit_query_results *); typedef int (if_ratelimit_setup_t)(if_t, uint64_t, uint32_t); #define IF_NODOM 255 /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock) #define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock) #define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) #ifdef _KERNEL /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, if_t); EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, if_t); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); typedef void (*ifaddr_event_ext_handler_t)(void *, if_t, struct ifaddr *, int); EVENTHANDLER_DECLARE(ifaddr_event_ext, ifaddr_event_ext_handler_t); #define IFADDR_EVENT_ADD 0 #define IFADDR_EVENT_DEL 1 /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, if_t); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, if_t); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, if_t, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); /* Interface up/down event */ #define IFNET_EVENT_UP 0 #define IFNET_EVENT_DOWN 1 #define IFNET_EVENT_PCP 2 /* priority code point, PCP */ #define IFNET_EVENT_UPDATE_BAUDRATE 3 typedef void (*ifnet_event_fn)(void *, if_t ifp, int event); EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); /* * interface groups */ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; CK_STAILQ_HEAD(, ifg_member) ifg_members; /* (CK_) */ CK_STAILQ_ENTRY(ifg_group) ifg_next; /* (CK_) */ }; struct ifg_member { CK_STAILQ_ENTRY(ifg_member) ifgm_next; /* (CK_) */ if_t ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; CK_STAILQ_ENTRY(ifg_list) ifgl_next; /* (CK_) */ }; #ifdef _SYS_EVENTHANDLER_H_ /* group attach event */ typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); /* group detach event */ typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); /* group change event */ typedef void (*group_change_event_handler_t)(void *, const char *); EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #define TOEDEV(ifp) if_getllsoftc(ifp) #define SETTOEDEV(ifp, sc) if_setllsoftc((ifp), (sc)) /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ if_t ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ CK_STAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ u_short ifa_flags; /* mostly rt_flags for cloning */ #define IFA_ROUTE RTF_UP /* route installed */ #define IFA_RTSELF RTF_HOST /* loopback route to self installed */ u_int ifa_refcnt; /* references to this structure */ counter_u64_t ifa_ipackets; counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; struct epoch_context ifa_epoch_ctx; }; struct ifaddr * ifa_alloc(size_t size, int flags); void ifa_free(struct ifaddr *ifa); void ifa_ref(struct ifaddr *ifa); int __result_use_check ifa_try_ref(struct ifaddr *ifa); /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ #define IFMA_F_ENQUEUED 0x1 struct ifmultiaddr { CK_STAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ if_t ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ int ifma_flags; void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ struct epoch_context ifma_epoch_ctx; }; extern struct sx ifnet_sxlock; #define IFNET_WLOCK() sx_xlock(&ifnet_sxlock) #define IFNET_WUNLOCK() sx_xunlock(&ifnet_sxlock) #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_WLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_XLOCKED) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) /* * Look up an ifnet given its index. The returned value protected from * being freed by the network epoch. The _ref variant also acquires a * reference that must be freed using if_rele(). */ if_t ifnet_byindex(u_int); if_t ifnet_byindex_ref(u_int); /* * ifnet_byindexgen() looks up ifnet by index and generation count, * attempting to restore a weak pointer that had been stored across * the epoch. */ if_t ifnet_byindexgen(uint16_t idx, uint16_t gen); VNET_DECLARE(struct ifnethead, ifnet); VNET_DECLARE(struct ifgrouphead, ifg_head); VNET_DECLARE(if_t, loif); /* first loopback interface */ #define V_ifnet VNET(ifnet) #define V_ifg_head VNET(ifg_head) #define V_loif VNET(loif) #ifdef MCAST_VERBOSE #define MCDPRINTF printf #else #define MCDPRINTF(...) #endif int if_addgroup(if_t, const char *); int if_delgroup(if_t, const char *); int if_addmulti(if_t, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(if_t, int); if_t if_alloc(u_char); if_t if_alloc_dev(u_char, device_t dev); void if_attach(if_t); void if_dead(if_t); int if_delmulti(if_t, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_delmulti_ifma_flags(struct ifmultiaddr *, int flags); void if_detach(if_t); void if_purgeaddrs(if_t); void if_delallmulti(if_t); void if_down(if_t); struct ifmultiaddr * if_findmulti(if_t, const struct sockaddr *); void if_freemulti(struct ifmultiaddr *ifma); void if_free(if_t); void if_initname(if_t, const char *, int); void if_link_state_change(if_t, int); int if_printf(if_t, const char *, ...) __printflike(2, 3); int if_log(if_t, int, const char *, ...) __printflike(3, 4); void if_ref(if_t); void if_rele(if_t); bool __result_use_check if_try_ref(if_t); int if_setlladdr(if_t, const u_char *, int); int if_tunnel_check_nesting(if_t, struct mbuf *, uint32_t, int); void if_up(if_t); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(if_t, int); if_t ifunit(const char *); if_t ifunit_ref(const char *); int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); int ifa_ifwithaddr_check(const struct sockaddr *); struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, const struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, if_t); int ifa_preferred(struct ifaddr *, struct ifaddr *); int if_simloop(if_t ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, if_t ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); void if_data_copy(if_t, struct if_data *); uint64_t if_get_counter_default(if_t, ift_counter); void if_inc_counter(if_t, ift_counter, int64_t); uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate); uint64_t if_getbaudrate(const if_t ifp); int if_setcapabilities(if_t ifp, int capabilities); int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit); int if_getcapabilities(const if_t ifp); int if_togglecapenable(if_t ifp, int togglecap); int if_setcapenable(if_t ifp, int capenable); int if_setcapenablebit(if_t ifp, int setcap, int clearcap); int if_getcapenable(const if_t ifp); int if_setcapabilities2(if_t ifp, int capabilities); int if_setcapabilities2bit(if_t ifp, int setbit, int clearbit); int if_getcapabilities2(const if_t ifp); int if_togglecapenable2(if_t ifp, int togglecap); int if_setcapenable2(if_t ifp, int capenable); int if_setcapenable2bit(if_t ifp, int setcap, int clearcap); int if_getcapenable2(const if_t ifp); int if_getdunit(const if_t ifp); int if_getindex(const if_t ifp); int if_getidxgen(const if_t ifp); const char *if_getdname(const if_t ifp); void if_setdname(if_t ifp, const char *name); const char *if_name(if_t ifp); int if_setname(if_t ifp, const char *name); int if_rename(if_t ifp, char *new_name); const char *if_getdescr(if_t ifp); void if_setdescr(if_t ifp, char *descrbuf); char *if_allocdescr(size_t sz, int malloc_flag); void if_freedescr(char *descrbuf); void if_setlastchange(if_t ifp); int if_getalloctype(const if_t ifp); int if_gettype(const if_t ifp); int if_setdev(if_t ifp, void *dev); int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags); int if_getdrvflags(const if_t ifp); int if_setdrvflags(if_t ifp, int flags); int if_getlinkstate(if_t ifp); int if_clearhwassist(if_t ifp); int if_sethwassistbits(if_t ifp, int toset, int toclear); int if_sethwassist(if_t ifp, int hwassist_bit); int if_gethwassist(const if_t ifp); int if_togglehwassist(if_t ifp, int toggle_bits); int if_setsoftc(if_t ifp, void *softc); void *if_getsoftc(if_t ifp); int if_setflags(if_t ifp, int flags); void if_setllsoftc(if_t ifp, void *softc); void *if_getllsoftc(if_t ifp); u_int if_getfib(if_t ifp); uint8_t if_getaddrlen(if_t ifp); int if_gethwaddr(const if_t ifp, struct ifreq *); const uint8_t *if_getbroadcastaddr(const if_t ifp); void if_setbroadcastaddr(if_t ifp, const uint8_t *); int if_setmtu(if_t ifp, int mtu); int if_getmtu(const if_t ifp); int if_getmtu_family(const if_t ifp, int family); void if_notifymtu(if_t ifp); void if_setppromisc(const if_t ifp, bool ppromisc); int if_setflagbits(if_t ifp, int set, int clear); int if_setflags(if_t ifp, int flags); int if_getflags(const if_t ifp); int if_getnumadomain(if_t ifp); int if_sendq_empty(if_t ifp); int if_setsendqready(if_t ifp); int if_setsendqlen(if_t ifp, int tx_desc_count); int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax); int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount); int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize); u_int if_gethwtsomax(const if_t ifp); u_int if_gethwtsomaxsegcount(const if_t ifp); u_int if_gethwtsomaxsegsize(const if_t ifp); void if_setnetmapadapter(if_t ifp, struct netmap_adapter *na); struct netmap_adapter *if_getnetmapadapter(if_t ifp); void if_input(if_t ifp, struct mbuf* sendmp); int if_sendq_prepend(if_t ifp, struct mbuf *m); struct mbuf *if_dequeue(if_t ifp); int if_setifheaderlen(if_t ifp, int len); void if_setrcvif(struct mbuf *m, if_t ifp); void if_setvtag(struct mbuf *m, u_int16_t tag); u_int16_t if_getvtag(struct mbuf *m); int if_vlantrunkinuse(if_t ifp); char *if_getlladdr(const if_t ifp); struct vnet *if_getvnet(const if_t ifp); -struct vnet *if_gethomevnet(const if_t ifp); void *if_gethandle(u_char); void if_vlancap(if_t ifp); int if_transmit(if_t ifp, struct mbuf *m); void if_init(if_t ifp, void *ctx); int if_ioctl(if_t ifp, u_long cmd, void *data); int if_resolvemulti(if_t ifp, struct sockaddr **, struct sockaddr *); uint64_t if_getcounter(if_t ifp, ift_counter counter); struct label *if_getmaclabel(if_t ifp); void if_setmaclabel(if_t ifp, struct label *label); struct bpf_if *if_getbpf(if_t ifp); uint8_t if_getpcp(if_t ifp); void *if_getl2com(if_t ifp); struct ifvlantrunk *if_getvlantrunk(if_t ifp); bool if_altq_is_enabled(if_t ifp); void *if_getafdata(if_t ifp, int); int if_snd_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **mstp); /* * Traversing through interface address lists. */ struct sockaddr_dl; typedef u_int iflladdr_cb_t(void *, struct sockaddr_dl *, u_int); u_int if_foreach_lladdr(if_t, iflladdr_cb_t, void *); u_int if_foreach_llmaddr(if_t, iflladdr_cb_t, void *); u_int if_lladdr_count(if_t); u_int if_llmaddr_count(if_t); bool if_maddr_empty(if_t); struct ifaddr * if_getifaddr(const if_t ifp); typedef u_int if_addr_cb_t(void *, struct ifaddr *, u_int); u_int if_foreach_addr_type(if_t ifp, int type, if_addr_cb_t cb, void *cb_arg); typedef int (*if_foreach_cb_t)(if_t, void *); typedef bool (*if_foreach_match_t)(if_t, void *); int if_foreach(if_foreach_cb_t, void *); int if_foreach_sleep(if_foreach_match_t, void *, if_foreach_cb_t, void *); /* Opaque iterator structure for iterating over interfaces. */ struct if_iter { void *context[4]; }; if_t if_iter_start(struct if_iter *); if_t if_iter_next(struct if_iter *); void if_iter_finish(struct if_iter *); struct ifa_iter { void *context[4]; }; struct ifaddr *ifa_iter_start(if_t ifp, struct ifa_iter *iter); struct ifaddr *ifa_iter_next(struct ifa_iter *iter); void ifa_iter_finish(struct ifa_iter *iter); /* Functions */ void if_setinitfn(if_t ifp, if_init_fn_t); void if_setinputfn(if_t ifp, if_input_fn_t); if_input_fn_t if_getinputfn(if_t ifp); void if_setioctlfn(if_t ifp, if_ioctl_fn_t); void if_setoutputfn(if_t ifp, if_output_fn_t); void if_setstartfn(if_t ifp, if_start_fn_t); if_start_fn_t if_getstartfn(if_t ifp); void if_settransmitfn(if_t ifp, if_transmit_fn_t); if_transmit_fn_t if_gettransmitfn(if_t ifp); void if_setqflushfn(if_t ifp, if_qflush_fn_t); void if_setgetcounterfn(if_t ifp, if_get_counter_t); void if_setsndtagallocfn(if_t ifp, if_snd_tag_alloc_t); void if_setdebugnet_methods(if_t, struct debugnet_methods *); void if_setreassignfn(if_t ifp, if_reassign_fn_t); void if_setratelimitqueryfn(if_t ifp, if_ratelimit_query_t); /* * NB: The interface is not yet stable, drivers implementing IPSEC * offload need to be prepared to adapt to changes. */ struct if_ipsec_accel_methods { if_spdadd_fn_t if_spdadd; if_spddel_fn_t if_spddel; if_sa_newkey_fn_t if_sa_newkey; if_sa_deinstall_fn_t if_sa_deinstall; if_sa_cnt_fn_t if_sa_cnt; if_ipsec_hwassist_fn_t if_hwassist; }; void if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *); /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); /* accessors for struct ifreq */ void *ifr_data_get_ptr(void *ifrp); void *ifr_buffer_get_buffer(void *data); size_t ifr_buffer_get_length(void *data); int ifhwioctl(u_long, if_t, caddr_t, struct thread *); #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS }; typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count); int ether_poll_register(poll_handler_t *h, if_t ifp); int ether_poll_deregister(if_t ifp); #endif /* DEVICE_POLLING */ #endif /* _KERNEL */ #include /* XXXAO: temporary unconditional include */ #endif /* !_NET_IF_VAR_H_ */