diff --git a/sys/dev/usb/usb_pf.c b/sys/dev/usb/usb_pf.c index d0b7f0889cea..54b5b0d55a99 100644 --- a/sys/dev/usb/usb_pf.c +++ b/sys/dev/usb/usb_pf.c @@ -1,541 +1,541 @@ /* $FreeBSD$ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef USB_GLOBAL_INCLUDE_FILE #include USB_GLOBAL_INCLUDE_FILE #else #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* USB_GLOBAL_INCLUDE_FILE */ static void usbpf_init(void *); static void usbpf_uninit(void *); static int usbpf_ioctl(struct ifnet *, u_long, caddr_t); static int usbpf_clone_match(struct if_clone *, const char *); static int usbpf_clone_create(struct if_clone *, char *, size_t, struct ifc_data *, struct ifnet **); static int usbpf_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); static struct usb_bus *usbpf_ifname2ubus(const char *); static uint32_t usbpf_aggregate_xferflags(struct usb_xfer_flags *); static uint32_t usbpf_aggregate_status(struct usb_xfer_flags_int *); static int usbpf_xfer_frame_is_read(struct usb_xfer *, uint32_t); static uint32_t usbpf_xfer_precompute_size(struct usb_xfer *, int); static struct if_clone *usbpf_cloner; static const char usbusname[] = "usbus"; SYSINIT(usbpf_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, usbpf_init, NULL); SYSUNINIT(usbpf_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, usbpf_uninit, NULL); static void usbpf_init(void *arg) { struct if_clone_addreq req = { .match_f = usbpf_clone_match, .create_f = usbpf_clone_create, .destroy_f = usbpf_clone_destroy, }; usbpf_cloner = ifc_attach_cloner(usbusname, &req); } static void usbpf_uninit(void *arg) { int devlcnt; device_t *devlp; devclass_t dc; struct usb_bus *ubus; int error; int i; if_clone_detach(usbpf_cloner); dc = devclass_find(usbusname); if (dc == NULL) return; error = devclass_get_devices(dc, &devlp, &devlcnt); if (error) return; for (i = 0; i < devlcnt; i++) { ubus = device_get_softc(devlp[i]); if (ubus != NULL && ubus->ifp != NULL) usbpf_clone_destroy(usbpf_cloner, ubus->ifp, 0); } free(devlp, M_TEMP); } static int usbpf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { /* No configuration allowed. */ return (EINVAL); } static struct usb_bus * usbpf_ifname2ubus(const char *ifname) { device_t dev; devclass_t dc; int unit; int error; if (strncmp(ifname, usbusname, sizeof(usbusname) - 1) != 0) return (NULL); error = ifc_name2unit(ifname, &unit); if (error || unit < 0) return (NULL); dc = devclass_find(usbusname); if (dc == NULL) return (NULL); dev = devclass_get_device(dc, unit); if (dev == NULL) return (NULL); return (device_get_softc(dev)); } static int usbpf_clone_match(struct if_clone *ifc, const char *name) { struct usb_bus *ubus; ubus = usbpf_ifname2ubus(name); if (ubus == NULL) return (0); if (ubus->ifp != NULL) return (0); return (1); } static int usbpf_clone_create(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp) { int error; int unit; struct ifnet *ifp; struct usb_bus *ubus; error = ifc_name2unit(name, &unit); if (error) return (error); if (unit < 0) return (EINVAL); ubus = usbpf_ifname2ubus(name); if (ubus == NULL) return (1); if (ubus->ifp != NULL) return (1); error = ifc_alloc_unit(ifc, &unit); if (error) { device_printf(ubus->parent, "usbpf: Could not allocate " "instance\n"); return (error); } ifp = ubus->ifp = if_alloc(IFT_USB); if (ifp == NULL) { ifc_free_unit(ifc, unit); device_printf(ubus->parent, "usbpf: Could not allocate " "instance\n"); return (ENOSPC); } strlcpy(ifp->if_xname, name, sizeof(ifp->if_xname)); ifp->if_softc = ubus; ifp->if_dname = usbusname; ifp->if_dunit = unit; ifp->if_ioctl = usbpf_ioctl; if_attach(ifp); ifp->if_flags |= IFF_UP; - rt_ifmsg(ifp); + rt_ifmsg(ifp, IFF_UP); /* * XXX According to the specification of DLT_USB, it indicates * packets beginning with USB setup header. But not sure all * packets would be. */ bpfattach(ifp, DLT_USB, USBPF_HDR_LEN); *ifpp = ifp; return (0); } static int usbpf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { struct usb_bus *ubus; int unit; ubus = ifp->if_softc; unit = ifp->if_dunit; /* * Lock USB before clearing the "ifp" pointer, to avoid * clearing the pointer in the middle of a TAP operation: */ USB_BUS_LOCK(ubus); ubus->ifp = NULL; USB_BUS_UNLOCK(ubus); bpfdetach(ifp); if_detach(ifp); if_free(ifp); ifc_free_unit(ifc, unit); return (0); } void usbpf_attach(struct usb_bus *ubus) { if (bootverbose) device_printf(ubus->parent, "usbpf: Attached\n"); } void usbpf_detach(struct usb_bus *ubus) { if (ubus->ifp != NULL) usbpf_clone_destroy(usbpf_cloner, ubus->ifp, 0); if (bootverbose) device_printf(ubus->parent, "usbpf: Detached\n"); } static uint32_t usbpf_aggregate_xferflags(struct usb_xfer_flags *flags) { uint32_t val = 0; if (flags->force_short_xfer == 1) val |= USBPF_FLAG_FORCE_SHORT_XFER; if (flags->short_xfer_ok == 1) val |= USBPF_FLAG_SHORT_XFER_OK; if (flags->short_frames_ok == 1) val |= USBPF_FLAG_SHORT_FRAMES_OK; if (flags->pipe_bof == 1) val |= USBPF_FLAG_PIPE_BOF; if (flags->proxy_buffer == 1) val |= USBPF_FLAG_PROXY_BUFFER; if (flags->ext_buffer == 1) val |= USBPF_FLAG_EXT_BUFFER; if (flags->manual_status == 1) val |= USBPF_FLAG_MANUAL_STATUS; if (flags->no_pipe_ok == 1) val |= USBPF_FLAG_NO_PIPE_OK; if (flags->stall_pipe == 1) val |= USBPF_FLAG_STALL_PIPE; return (val); } static uint32_t usbpf_aggregate_status(struct usb_xfer_flags_int *flags) { uint32_t val = 0; if (flags->open == 1) val |= USBPF_STATUS_OPEN; if (flags->transferring == 1) val |= USBPF_STATUS_TRANSFERRING; if (flags->did_dma_delay == 1) val |= USBPF_STATUS_DID_DMA_DELAY; if (flags->did_close == 1) val |= USBPF_STATUS_DID_CLOSE; if (flags->draining == 1) val |= USBPF_STATUS_DRAINING; if (flags->started == 1) val |= USBPF_STATUS_STARTED; if (flags->bandwidth_reclaimed == 1) val |= USBPF_STATUS_BW_RECLAIMED; if (flags->control_xfr == 1) val |= USBPF_STATUS_CONTROL_XFR; if (flags->control_hdr == 1) val |= USBPF_STATUS_CONTROL_HDR; if (flags->control_act == 1) val |= USBPF_STATUS_CONTROL_ACT; if (flags->control_stall == 1) val |= USBPF_STATUS_CONTROL_STALL; if (flags->short_frames_ok == 1) val |= USBPF_STATUS_SHORT_FRAMES_OK; if (flags->short_xfer_ok == 1) val |= USBPF_STATUS_SHORT_XFER_OK; #if USB_HAVE_BUSDMA if (flags->bdma_enable == 1) val |= USBPF_STATUS_BDMA_ENABLE; if (flags->bdma_no_post_sync == 1) val |= USBPF_STATUS_BDMA_NO_POST_SYNC; if (flags->bdma_setup == 1) val |= USBPF_STATUS_BDMA_SETUP; #endif if (flags->isochronous_xfr == 1) val |= USBPF_STATUS_ISOCHRONOUS_XFR; if (flags->curr_dma_set == 1) val |= USBPF_STATUS_CURR_DMA_SET; if (flags->can_cancel_immed == 1) val |= USBPF_STATUS_CAN_CANCEL_IMMED; if (flags->doing_callback == 1) val |= USBPF_STATUS_DOING_CALLBACK; return (val); } static int usbpf_xfer_frame_is_read(struct usb_xfer *xfer, uint32_t frame) { int isread; if ((frame == 0) && (xfer->flags_int.control_xfr != 0) && (xfer->flags_int.control_hdr != 0)) { /* special case */ if (xfer->flags_int.usb_mode == USB_MODE_DEVICE) { /* The device controller writes to memory */ isread = 1; } else { /* The host controller reads from memory */ isread = 0; } } else { isread = USB_GET_DATA_ISREAD(xfer); } return (isread); } static uint32_t usbpf_xfer_precompute_size(struct usb_xfer *xfer, int type) { uint32_t totlen; uint32_t x; uint32_t nframes; if (type == USBPF_XFERTAP_SUBMIT) nframes = xfer->nframes; else nframes = xfer->aframes; totlen = USBPF_HDR_LEN + (USBPF_FRAME_HDR_LEN * nframes); /* precompute all trace lengths */ for (x = 0; x != nframes; x++) { if (usbpf_xfer_frame_is_read(xfer, x)) { if (type != USBPF_XFERTAP_SUBMIT) { totlen += USBPF_FRAME_ALIGN( xfer->frlengths[x]); } } else { if (type == USBPF_XFERTAP_SUBMIT) { totlen += USBPF_FRAME_ALIGN( xfer->frlengths[x]); } } } return (totlen); } void usbpf_xfertap(struct usb_xfer *xfer, int type) { struct usb_bus *bus; struct usbpf_pkthdr *up; struct usbpf_framehdr *uf; usb_frlength_t offset; uint32_t totlen; uint32_t frame; uint32_t temp; uint32_t nframes; uint32_t x; uint8_t *buf; uint8_t *ptr; bus = xfer->xroot->bus; /* sanity checks */ if (bus->ifp == NULL || bus->ifp->if_bpf == NULL) return; if (!bpf_peers_present(bus->ifp->if_bpf)) return; totlen = usbpf_xfer_precompute_size(xfer, type); if (type == USBPF_XFERTAP_SUBMIT) nframes = xfer->nframes; else nframes = xfer->aframes; /* * XXX TODO XXX * * When BPF supports it we could pass a fragmented array of * buffers avoiding the data copy operation here. */ buf = ptr = malloc(totlen, M_TEMP, M_NOWAIT); if (buf == NULL) { device_printf(bus->parent, "usbpf: Out of memory\n"); return; } up = (struct usbpf_pkthdr *)ptr; ptr += USBPF_HDR_LEN; /* fill out header */ temp = device_get_unit(bus->bdev); up->up_totlen = htole32(totlen); up->up_busunit = htole32(temp); up->up_address = xfer->xroot->udev->device_index; if (xfer->flags_int.usb_mode == USB_MODE_DEVICE) up->up_mode = USBPF_MODE_DEVICE; else up->up_mode = USBPF_MODE_HOST; up->up_type = type; up->up_xfertype = xfer->endpoint->edesc->bmAttributes & UE_XFERTYPE; temp = usbpf_aggregate_xferflags(&xfer->flags); up->up_flags = htole32(temp); temp = usbpf_aggregate_status(&xfer->flags_int); up->up_status = htole32(temp); temp = xfer->error; up->up_error = htole32(temp); temp = xfer->interval; up->up_interval = htole32(temp); up->up_frames = htole32(nframes); temp = xfer->max_packet_size; up->up_packet_size = htole32(temp); temp = xfer->max_packet_count; up->up_packet_count = htole32(temp); temp = xfer->endpointno; up->up_endpoint = htole32(temp); up->up_speed = xfer->xroot->udev->speed; /* clear reserved area */ memset(up->up_reserved, 0, sizeof(up->up_reserved)); /* init offset and frame */ offset = 0; frame = 0; /* iterate all the USB frames and copy data, if any */ for (x = 0; x != nframes; x++) { uint32_t length; int isread; /* get length */ length = xfer->frlengths[x]; /* get frame header pointer */ uf = (struct usbpf_framehdr *)ptr; ptr += USBPF_FRAME_HDR_LEN; /* fill out packet header */ uf->length = htole32(length); uf->flags = 0; /* get information about data read/write */ isread = usbpf_xfer_frame_is_read(xfer, x); /* check if we need to copy any data */ if (isread) { if (type == USBPF_XFERTAP_SUBMIT) length = 0; else { uf->flags |= htole32( USBPF_FRAMEFLAG_DATA_FOLLOWS); } } else { if (type != USBPF_XFERTAP_SUBMIT) length = 0; else { uf->flags |= htole32( USBPF_FRAMEFLAG_DATA_FOLLOWS); } } /* check if data is read direction */ if (isread) uf->flags |= htole32(USBPF_FRAMEFLAG_READ); /* copy USB data, if any */ if (length != 0) { /* copy data */ usbd_copy_out(&xfer->frbuffers[frame], offset, ptr, length); /* align length */ temp = USBPF_FRAME_ALIGN(length); /* zero pad */ if (temp != length) memset(ptr + length, 0, temp - length); ptr += temp; } if (xfer->flags_int.isochronous_xfr) { offset += usbd_xfer_old_frame_length(xfer, x); } else { frame ++; } } bpf_tap(bus->ifp->if_bpf, buf, totlen); free(buf, M_TEMP); } diff --git a/sys/net/if.c b/sys/net/if.c index 8333c85b5eb1..7cbb750d3ab1 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1,4782 +1,4782 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2010 Bjoern A. Zeeb * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_bpf.h" #include "opt_inet6.h" #include "opt_inet.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #ifdef INET6 #include #include #endif /* INET6 */ #endif /* INET || INET6 */ #include /* * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name * and ifr_ifru when it is used in SIOCGIFCONF. */ _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) == offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru"); __read_mostly epoch_t net_epoch_preempt; #ifdef COMPAT_FREEBSD32 #include #include struct ifreq_buffer32 { uint32_t length; /* (size_t) */ uint32_t buffer; /* (void *) */ }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq32 { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer32 ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; uint32_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; }; CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32)); CTASSERT(__offsetof(struct ifreq, ifr_ifru) == __offsetof(struct ifreq32, ifr_ifru)); struct ifconf32 { int32_t ifc_len; union { uint32_t ifcu_buf; uint32_t ifcu_req; } ifc_ifcu; }; #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) struct ifdrv32 { char ifd_name[IFNAMSIZ]; uint32_t ifd_cmd; uint32_t ifd_len; uint32_t ifd_data; }; #define SIOCSDRVSPEC32 _IOC_NEWTYPE(SIOCSDRVSPEC, struct ifdrv32) #define SIOCGDRVSPEC32 _IOC_NEWTYPE(SIOCGDRVSPEC, struct ifdrv32) struct ifgroupreq32 { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; uint32_t ifgru_groups; } ifgr_ifgru; }; #define SIOCAIFGROUP32 _IOC_NEWTYPE(SIOCAIFGROUP, struct ifgroupreq32) #define SIOCGIFGROUP32 _IOC_NEWTYPE(SIOCGIFGROUP, struct ifgroupreq32) #define SIOCDIFGROUP32 _IOC_NEWTYPE(SIOCDIFGROUP, struct ifgroupreq32) #define SIOCGIFGMEMB32 _IOC_NEWTYPE(SIOCGIFGMEMB, struct ifgroupreq32) struct ifmediareq32 { char ifm_name[IFNAMSIZ]; int ifm_current; int ifm_mask; int ifm_status; int ifm_active; int ifm_count; uint32_t ifm_ulist; /* (int *) */ }; #define SIOCGIFMEDIA32 _IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32) #define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32) #endif /* COMPAT_FREEBSD32 */ union ifreq_union { struct ifreq ifr; #ifdef COMPAT_FREEBSD32 struct ifreq32 ifr32; #endif }; SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Generic link-management"); SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, &ifqmaxlen, 0, "max send queue size"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); /* Log promiscuous mode change events */ static int log_promisc_mode_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, &log_promisc_mode_change, 1, "log promiscuous mode change events"); /* Interface description */ static unsigned int ifdescr_maxlen = 1024; SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); void (*carp_demote_adj_p)(int, char *); int (*carp_master_p)(struct ifaddr *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); void (*carp_detach_p)(struct ifaddr *, bool); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr); #endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; /* * XXX: Style; these should be sorted alphabetically, and unprototyped * static functions should be prototyped. Currently they are sorted by * declaration order. */ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, bool); static int if_detach_internal(struct ifnet *, bool); static void if_siocaddmulti(void *, int); static void if_link_ifnet(struct ifnet *); static bool if_unlink_ifnet(struct ifnet *, bool); #ifdef VIMAGE static int if_vmove(struct ifnet *, struct vnet *); #endif #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif /* ipsec helper hooks */ VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); /* Table of ifnet by index. */ static int if_index; static int if_indexlim = 8; static struct ifindex_entry { struct ifnet *ife_ifnet; uint16_t ife_gencnt; } *ifindex_table; SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Variables global to all interfaces"); static int sysctl_ifcount(SYSCTL_HANDLER_ARGS) { int rv = 0; IFNET_RLOCK(); for (int i = 1; i <= if_index; i++) if (ifindex_table[i].ife_ifnet != NULL && ifindex_table[i].ife_ifnet->if_vnet == curvnet) rv = i; IFNET_RUNLOCK(); return (sysctl_handle_int(oidp, &rv, 0, req)); } SYSCTL_PROC(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RD, NULL, 0, sysctl_ifcount, "I", "Maximum known interface index"); /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock. * This may be acquired to stabilise the list, or we may rely on NET_EPOCH. */ struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); struct sx ifnet_detach_sxlock; SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx", SX_RECURSE); #ifdef VIMAGE #define VNET_IS_SHUTTING_DOWN(_vnet) \ ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE) #endif static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); struct ifnet * ifnet_byindex(u_int idx) { struct ifnet *ifp; NET_EPOCH_ASSERT(); if (__predict_false(idx > if_index)) return (NULL); ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet); if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet) ifp = NULL; return (ifp); } struct ifnet * ifnet_byindex_ref(u_int idx) { struct ifnet *ifp; ifp = ifnet_byindex(idx); if (ifp == NULL || (ifp->if_flags & IFF_DYING)) return (NULL); if (!if_try_ref(ifp)) return (NULL); return (ifp); } struct ifnet * ifnet_byindexgen(uint16_t idx, uint16_t gen) { struct ifnet *ifp; NET_EPOCH_ASSERT(); if (__predict_false(idx > if_index)) return (NULL); ifp = ck_pr_load_ptr(&ifindex_table[idx].ife_ifnet); if (ifindex_table[idx].ife_gencnt == gen) return (ifp); else return (NULL); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ static void if_init(void *arg __unused) { ifindex_table = malloc(if_indexlim * sizeof(*ifindex_table), M_IFNET, M_WAITOK | M_ZERO); } SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL); static void vnet_if_init(const void *unused __unused) { CK_STAILQ_INIT(&V_ifnet); CK_STAILQ_INIT(&V_ifg_head); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); static void if_link_ifnet(struct ifnet *ifp) { IFNET_WLOCK(); CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); #ifdef VIMAGE curvnet->vnet_ifcnt++; #endif IFNET_WUNLOCK(); } static bool if_unlink_ifnet(struct ifnet *ifp, bool vmove) { struct ifnet *iter; int found = 0; IFNET_WLOCK(); CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); if (!vmove) ifp->if_flags |= IFF_DYING; found = 1; break; } #ifdef VIMAGE curvnet->vnet_ifcnt--; #endif IFNET_WUNLOCK(); return (found); } #ifdef VIMAGE static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; struct ifnet **pending; int found __diagused; int i; i = 0; /* * We need to protect our access to the V_ifnet tailq. Ordinarily we'd * enter NET_EPOCH, but that's not possible, because if_vmove() calls * if_detach_internal(), which waits for NET_EPOCH callbacks to * complete. We can't do that from within NET_EPOCH. * * However, we can also use the IFNET_xLOCK, which is the V_ifnet * read/write lock. We cannot hold the lock as we call if_vmove() * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib * ctx lock. */ IFNET_WLOCK(); pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt, M_IFNET, M_WAITOK | M_ZERO); /* Return all inherited interfaces to their parent vnets. */ CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { if (ifp->if_home_vnet != ifp->if_vnet) { found = if_unlink_ifnet(ifp, true); MPASS(found); pending[i++] = ifp; } } IFNET_WUNLOCK(); for (int j = 0; j < i; j++) { sx_xlock(&ifnet_detach_sxlock); if_vmove(pending[j], pending[j]->if_home_vnet); sx_xunlock(&ifnet_detach_sxlock); } free(pending, M_IFNET); } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); #endif /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. */ static struct ifnet * if_alloc_domain(u_char type, int numa_domain) { struct ifnet *ifp; u_short idx; KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large")); if (numa_domain == IF_NODOM) ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK | M_ZERO); else ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET, DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO); ifp->if_type = type; ifp->if_alloctype = type; ifp->if_numa_domain = numa_domain; #ifdef VIMAGE ifp->if_vnet = curvnet; #endif if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); KASSERT(ifp->if_l2com, ("%s: if_com_alloc[%u] failed", __func__, type)); } IF_ADDR_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp); ifp->if_afdata_initialized = 0; IF_AFDATA_LOCK_INIT(ifp); CK_STAILQ_INIT(&ifp->if_addrhead); CK_STAILQ_INIT(&ifp->if_multiaddrs); CK_STAILQ_INIT(&ifp->if_groups); #ifdef MAC mac_ifnet_init(ifp); #endif ifq_init(&ifp->if_snd, ifp); refcount_init(&ifp->if_refcount, 1); /* Index reference. */ for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); ifp->if_get_counter = if_get_counter_default; ifp->if_pcp = IFNET_PCP_NONE; /* Allocate an ifindex array entry. */ IFNET_WLOCK(); /* * Try to find an empty slot below if_index. If we fail, take the * next slot. */ for (idx = 1; idx <= if_index; idx++) { if (ifindex_table[idx].ife_ifnet == NULL) break; } /* Catch if_index overflow. */ if (idx >= if_indexlim) { struct ifindex_entry *new, *old; int newlim; newlim = if_indexlim * 2; new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO); memcpy(new, ifindex_table, if_indexlim * sizeof(*new)); old = ifindex_table; ck_pr_store_ptr(&ifindex_table, new); if_indexlim = newlim; epoch_wait_preempt(net_epoch_preempt); free(old, M_IFNET); } if (idx > if_index) if_index = idx; ifp->if_index = idx; ifp->if_idxgen = ifindex_table[idx].ife_gencnt; ck_pr_store_ptr(&ifindex_table[idx].ife_ifnet, ifp); IFNET_WUNLOCK(); return (ifp); } struct ifnet * if_alloc_dev(u_char type, device_t dev) { int numa_domain; if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0) return (if_alloc_domain(type, IF_NODOM)); return (if_alloc_domain(type, numa_domain)); } struct ifnet * if_alloc(u_char type) { return (if_alloc_domain(type, IF_NODOM)); } /* * Do the actual work of freeing a struct ifnet, and layer 2 common * structure. This call is made when the network epoch guarantees * us that nobody holds a pointer to the interface. */ static void if_free_deferred(epoch_context_t ctx) { struct ifnet *ifp = __containerof(ctx, struct ifnet, if_epoch_ctx); KASSERT((ifp->if_flags & IFF_DYING), ("%s: interface not dying", __func__)); if (if_com_free[ifp->if_alloctype] != NULL) if_com_free[ifp->if_alloctype](ifp->if_l2com, ifp->if_alloctype); #ifdef MAC mac_ifnet_destroy(ifp); #endif /* MAC */ IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); ifq_delete(&ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); if_freedescr(ifp->if_description); free(ifp->if_hw_addr, M_IFADDR); free(ifp, M_IFNET); } /* * Deregister an interface and free the associated storage. */ void if_free(struct ifnet *ifp) { ifp->if_flags |= IFF_DYING; /* XXX: Locking */ /* * XXXGL: An interface index is really an alias to ifp pointer. * Why would we clear the alias now, and not in the deferred * context? Indeed there is nothing wrong with some network * thread obtaining ifp via ifnet_byindex() inside the network * epoch and then dereferencing ifp while we perform if_free(), * and after if_free() finished, too. * * This early index freeing was important back when ifindex was * virtualized and interface would outlive the vnet. */ IFNET_WLOCK(); MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); ck_pr_store_ptr(&ifindex_table[ifp->if_index].ife_ifnet, NULL); ifindex_table[ifp->if_index].ife_gencnt++; while (if_index > 0 && ifindex_table[if_index].ife_ifnet == NULL) if_index--; IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx); } /* * Interfaces to keep an ifnet type-stable despite the possibility of the * driver calling if_free(). If there are additional references, we defer * freeing the underlying data structure. */ void if_ref(struct ifnet *ifp) { u_int old __diagused; /* We don't assert the ifnet list lock here, but arguably should. */ old = refcount_acquire(&ifp->if_refcount); KASSERT(old > 0, ("%s: ifp %p has 0 refs", __func__, ifp)); } bool if_try_ref(struct ifnet *ifp) { NET_EPOCH_ASSERT(); return (refcount_acquire_if_not_zero(&ifp->if_refcount)); } void if_rele(struct ifnet *ifp) { if (!refcount_release(&ifp->if_refcount)) return; NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx); } void ifq_init(struct ifaltq *ifq, struct ifnet *ifp) { mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); if (ifq->ifq_maxlen == 0) ifq->ifq_maxlen = ifqmaxlen; ifq->altq_type = 0; ifq->altq_disc = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; ifq->altq_tbr = NULL; ifq->altq_ifp = ifp; } void ifq_delete(struct ifaltq *ifq) { mtx_destroy(&ifq->ifq_mtx); } /* * Perform generic interface initialization tasks and attach the interface * to the list of "active" interfaces. If vmove flag is set on entry * to if_attach_internal(), perform only a limited subset of initialization * tasks, given that we are moving from one vnet to another an ifnet which * has already been fully initialized. * * Note that if_detach_internal() removes group membership unconditionally * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. * Thus, when if_vmove() is applied to a cloned interface, group membership * is lost while a cloned one always joins a group whose name is * ifc->ifc_name. To recover this after if_detach_internal() and * if_attach_internal(), the cloner should be specified to * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() * attempts to join a group whose name is ifc->ifc_name. * * XXX: * - The decision to return void and thus require this function to * succeed is questionable. * - We should probably do more sanity checking. For instance we don't * do anything to insure if_xname is unique or non-empty. */ void if_attach(struct ifnet *ifp) { if_attach_internal(ifp, false); } /* * Compute the least common TSO limit. */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) { /* * 1) If there is no limit currently, take the limit from * the network adapter. * * 2) If the network adapter has a limit below the current * limit, apply it. */ if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && ifp->if_hw_tsomax < pmax->tsomaxbytes)) { pmax->tsomaxbytes = ifp->if_hw_tsomax; } if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; } if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } /* * Update TSO limit of a network adapter. * * Returns zero if no change. Else non-zero. */ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) { int retval = 0; if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { ifp->if_hw_tsomax = pmax->tsomaxbytes; retval++; } if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; retval++; } if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; retval++; } return (retval); } static void if_attach_internal(struct ifnet *ifp, bool vmove) { unsigned socksize, ifasize; int namelen, masklen; struct sockaddr_dl *sdl; struct ifaddr *ifa; MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); #ifdef VIMAGE ifp->if_vnet = curvnet; if (ifp->if_home_vnet == NULL) ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); #ifdef VIMAGE /* Restore group membership for cloned interface. */ if (vmove) if_clone_restoregroup(ifp); #endif getmicrotime(&ifp->if_lastchange); ifp->if_epoch = time_uptime; KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || (ifp->if_transmit != NULL && ifp->if_qflush != NULL), ("transmit and qflush must both either be set or both be NULL")); if (ifp->if_transmit == NULL) { ifp->if_transmit = if_transmit; ifp->if_qflush = if_qflush; } if (ifp->if_input == NULL) ifp->if_input = if_input_default; if (ifp->if_requestencap == NULL) ifp->if_requestencap = if_requestencap_default; if (!vmove) { #ifdef MAC mac_ifnet_create(ifp); #endif /* * Create a Link Level name for this device. */ namelen = strlen(ifp->if_xname); /* * Always save enough space for any possiable name so we * can do a rename in place later. */ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifp->if_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = ifa_alloc(ifasize, M_WAITOK); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifp->if_addr = ifa; ifa->ifa_ifp = ifp; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; if (ifp->if_type == IFT_ETHER) { ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR, M_WAITOK | M_ZERO); } #if defined(INET) || defined(INET6) /* Use defaults for TSO, if nothing is set */ if (ifp->if_hw_tsomax == 0 && ifp->if_hw_tsomaxsegcount == 0 && ifp->if_hw_tsomaxsegsize == 0) { /* * The TSO defaults needs to be such that an * NFS mbuf list of 35 mbufs totalling just * below 64K works and that a chain of mbufs * can be defragged into at most 32 segments: */ ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); ifp->if_hw_tsomaxsegcount = 35; ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ /* XXX some drivers set IFCAP_TSO after ethernet attach */ if (ifp->if_capabilities & IFCAP_TSO) { if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } } #endif } #ifdef VIMAGE else { /* * Update the interface index in the link layer address * of the interface. */ for (ifa = ifp->if_addr; ifa != NULL; ifa = CK_STAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_index = ifp->if_index; } } } #endif if_link_ifnet(ifp); if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); } static void if_epochalloc(void *dummy __unused) { net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT); } SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL); static void if_attachdomain(void *dummy) { struct ifnet *ifp; CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) if_attachdomain1(ifp); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ IF_AFDATA_LOCK(ifp); if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); log(LOG_WARNING, "%s called more than once on %s\n", __func__, ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); SLIST_FOREACH(dp, &domains, dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } } /* * Remove any unicast or broadcast network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa; #ifdef INET6 /* * Need to leave multicast addresses of proxy NDP llentries * before in6_purgeifaddr() because the llentries are keys * for in6_multi objects of proxy NDP entries. * in6_purgeifaddr()s clean up llentries including proxy NDPs * then we would lose the keys if they are called earlier. */ in6_purge_proxy_ndp(ifp); #endif while (1) { struct epoch_tracker et; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) break; } NET_EPOCH_EXIT(et); if (ifa == NULL) break; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeifaddr((struct in6_ifaddr *)ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } } /* * Remove any multicast network addresses from an interface when an ifnet * is going away. */ static void if_purgemaddrs(struct ifnet *ifp) { struct ifmultiaddr *ifma; IF_ADDR_WLOCK(ifp); while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) { ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs); CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); if_delmulti_locked(ifp, ifma, 1); } IF_ADDR_WUNLOCK(ifp); } /* * Detach an interface, removing it from the list of "active" interfaces. * If vmove flag is set on entry to if_detach_internal(), perform only a * limited subset of cleanup tasks, given that we are moving an ifnet from * one vnet to another, where it must be fully operational. * * XXXRW: There are some significant questions about event ordering, and * how to prevent things from starting to use the interface during detach. */ void if_detach(struct ifnet *ifp) { bool found; CURVNET_SET_QUIET(ifp->if_vnet); found = if_unlink_ifnet(ifp, false); if (found) { sx_xlock(&ifnet_detach_sxlock); if_detach_internal(ifp, false); sx_xunlock(&ifnet_detach_sxlock); } CURVNET_RESTORE(); } /* * The vmove flag, if set, indicates that we are called from a callpath * that is moving an interface to a different vnet instance. * * The shutdown flag, if set, indicates that we are called in the * process of shutting down a vnet instance. Currently only the * vnet_if_return SYSUNINIT function sets it. Note: we can be called * on a vnet instance shutdown without this flag being set, e.g., when * the cloned interfaces are destoyed as first thing of teardown. */ static int if_detach_internal(struct ifnet *ifp, bool vmove) { struct ifaddr *ifa; int i; struct domain *dp; #ifdef VIMAGE bool shutdown; shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); #endif /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ epoch_wait_preempt(net_epoch_preempt); /* * Ensure all pending EPOCH(9) callbacks have been executed. This * fixes issues about late destruction of multicast options * which lead to leave group calls, which in turn access the * belonging ifnet structure: */ NET_EPOCH_DRAIN_CALLBACKS(); /* * In any case (destroy or vmove) detach us from the groups * and remove/wait for pending events on the taskq. * XXX-BZ in theory an interface could still enqueue a taskq change? */ if_delgroups(ifp); taskqueue_drain(taskqueue_swi, &ifp->if_linktask); taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask); if_down(ifp); #ifdef VIMAGE /* * On VNET shutdown abort here as the stack teardown will do all * the work top-down for us. */ if (shutdown) { /* Give interface users the chance to clean up. */ EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); /* * In case of a vmove we are done here without error. * If we would signal an error it would lead to the same * abort as if we did not find the ifnet anymore. * if_detach() calls us in void context and does not care * about an early abort notification, so life is splendid :) */ goto finish_vnet_shutdown; } #endif /* * At this point we are not tearing down a VNET and are either * going to destroy or vmove the interface and have to cleanup * accordingly. */ /* * Remove routes and flush queues. */ #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET in_ifdetach(ifp); #endif #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif if_purgemaddrs(ifp); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if (!vmove) { /* * Prevent further calls into the device driver via ifnet. */ if_dead(ifp); /* * Clean up all addresses. */ IF_ADDR_WLOCK(ifp); if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) { ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } else IF_ADDR_WUNLOCK(ifp); } rt_flushifroutes(ifp); #ifdef VIMAGE finish_vnet_shutdown: #endif /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the * theoretical race with re-attaching. */ IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; IF_AFDATA_UNLOCK(ifp); if (i == 0) return (0); SLIST_FOREACH(dp, &domains, dom_next) { if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); ifp->if_afdata[dp->dom_family] = NULL; } } return (0); } #ifdef VIMAGE /* * if_vmove() performs a limited version of if_detach() in current * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. */ static int if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { #ifdef DEV_BPF u_int bif_dlt, bif_hdrlen; #endif int rc; #ifdef DEV_BPF /* * if_detach_internal() will call the eventhandler to notify * interface departure. That will detach if_bpf. We need to * safe the dlt and hdrlen so we can re-attach it later. */ bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen); #endif /* * Detach from current vnet, but preserve LLADDR info, do not * mark as dead etc. so that the ifnet can be reattached later. * If we cannot find it, we lost the race to someone else. */ rc = if_detach_internal(ifp, true); if (rc != 0) return (rc); /* * Perform interface-specific reassignment tasks, if provided by * the driver. */ if (ifp->if_reassign != NULL) ifp->if_reassign(ifp, new_vnet, NULL); /* * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); if_attach_internal(ifp, true); #ifdef DEV_BPF if (ifp->if_bpf == NULL) bpfattach(ifp, bif_dlt, bif_hdrlen); #endif CURVNET_RESTORE(); return (0); } /* * Move an ifnet to or from another child prison/vnet, specified by the jail id. */ static int if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; int error; bool found __diagused; bool shutdown; MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Do not try to move the iface from and to the same prison. */ if (pr->pr_vnet == ifp->if_vnet) { prison_free(pr); return (EEXIST); } /* Make sure the named iface does not exists in the dst. prison/vnet. */ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); if (difp != NULL) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } sx_xlock(&ifnet_detach_sxlock); /* Make sure the VNET is stable. */ shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); if (shutdown) { sx_xunlock(&ifnet_detach_sxlock); CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } CURVNET_RESTORE(); found = if_unlink_ifnet(ifp, true); if (! found) { sx_xunlock(&ifnet_detach_sxlock); CURVNET_RESTORE(); prison_free(pr); return (ENODEV); } /* Move the interface into the child jail/vnet. */ error = if_vmove(ifp, pr->pr_vnet); /* Report the new if_xname back to the userland on success. */ if (error == 0) sprintf(ifname, "%s", ifp->if_xname); sx_xunlock(&ifnet_detach_sxlock); prison_free(pr); return (error); } static int if_vmove_reclaim(struct thread *td, char *ifname, int jid) { struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; int error, found __diagused; bool shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Make sure the named iface exists in the source prison/vnet. */ CURVNET_SET(pr->pr_vnet); ifp = ifunit(ifname); /* XXX Lock to avoid races. */ if (ifp == NULL) { CURVNET_RESTORE(); prison_free(pr); return (ENXIO); } /* Do not try to move the iface from and to the same prison. */ vnet_dst = TD_TO_VNET(td); if (vnet_dst == ifp->if_vnet) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } /* Get interface back from child jail/vnet. */ found = if_unlink_ifnet(ifp, true); MPASS(found); sx_xlock(&ifnet_detach_sxlock); error = if_vmove(ifp, vnet_dst); sx_xunlock(&ifnet_detach_sxlock); CURVNET_RESTORE(); /* Report the new if_xname back to the userland on success. */ if (error == 0) sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (error); } #endif /* VIMAGE */ /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') return (EINVAL); IFNET_WLOCK(); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { IFNET_WUNLOCK(); return (EEXIST); } if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) { IFNET_WUNLOCK(); return (ENOMEM); } if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); free(ifgm, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; CK_STAILQ_INIT(&ifg->ifg_members); CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); new = 1; } ifg->ifg_refcnt++; ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; IF_ADDR_WLOCK(ifp); CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); IFNET_WUNLOCK(); if (new) EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Helper function to remove a group out of an interface. Expects the global * ifnet lock to be write-locked, and drops it before returning. */ static void _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl, const char *groupname) { struct ifg_member *ifgm; bool freeifgl; IFNET_WLOCK_ASSERT(); IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next); IF_ADDR_WUNLOCK(ifp); CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { if (ifgm->ifgm_ifp == ifp) { CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member, ifgm_next); break; } } if (--ifgl->ifgl_group->ifg_refcnt == 0) { CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next); freeifgl = true; } else { freeifgl = false; } IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); if (freeifgl) { EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } free(ifgm, M_TEMP); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; IFNET_WLOCK(); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) break; if (ifgl == NULL) { IFNET_WUNLOCK(); return (ENOENT); } _if_delgroup_locked(ifp, ifgl, groupname); return (0); } /* * Remove an interface from all groups */ static void if_delgroups(struct ifnet *ifp) { struct ifg_list *ifgl; char groupname[IFNAMSIZ]; IFNET_WLOCK(); while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) { strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); _if_delgroup_locked(ifp, ifgl, groupname); IFNET_WLOCK(); } IFNET_WUNLOCK(); } /* * Stores all groups from an interface in memory pointed to by ifgr. */ static int if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; NET_EPOCH_ASSERT(); if (ifgr->ifgr_len == 0) { CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; /* XXX: wire */ CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) return (EINVAL); bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) return (error); len -= sizeof(ifgrq); ifgp++; } return (0); } /* * Stores all members of a group in memory pointed to by igfr */ static int if_getgroupmembers(struct ifgroupreq *ifgr) { struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) break; if (ifg == NULL) { IFNET_RUNLOCK(); return (ENOENT); } if (ifgr->ifgr_len == 0) { CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); IFNET_RUNLOCK(); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) { IFNET_RUNLOCK(); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IFNET_RUNLOCK(); return (error); } len -= sizeof(ifgrq); ifgp++; } IFNET_RUNLOCK(); return (0); } /* * Return counter values from counter(9)s stored in ifnet. */ uint64_t if_get_counter_default(struct ifnet *ifp, ift_counter cnt) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); return (counter_u64_fetch(ifp->if_counters[cnt])); } /* * Increase an ifnet counter. Usually used for counters shared * between the stack and a driver, but function supports them all. */ void if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); counter_u64_add(ifp->if_counters[cnt], inc); } /* * Copy data from ifnet to userland API structure if_data. */ void if_data_copy(struct ifnet *ifp, struct if_data *ifd) { ifd->ifi_type = ifp->if_type; ifd->ifi_physical = 0; ifd->ifi_addrlen = ifp->if_addrlen; ifd->ifi_hdrlen = ifp->if_hdrlen; ifd->ifi_link_state = ifp->if_link_state; ifd->ifi_vhid = 0; ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_mtu = ifp->if_mtu; ifd->ifi_metric = ifp->if_metric; ifd->ifi_baudrate = ifp->if_baudrate; ifd->ifi_hwassist = ifp->if_hwassist; ifd->ifi_epoch = ifp->if_epoch; ifd->ifi_lastchange = ifp->if_lastchange; ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); } /* * Initialization, destruction and refcounting functions for ifaddrs. */ struct ifaddr * ifa_alloc(size_t size, int flags) { struct ifaddr *ifa; KASSERT(size >= sizeof(struct ifaddr), ("%s: invalid size %zu", __func__, size)); ifa = malloc(size, M_IFADDR, M_ZERO | flags); if (ifa == NULL) return (NULL); if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) goto fail; refcount_init(&ifa->ifa_refcnt, 1); return (ifa); fail: /* free(NULL) is okay */ counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); return (NULL); } void ifa_ref(struct ifaddr *ifa) { u_int old __diagused; old = refcount_acquire(&ifa->ifa_refcnt); KASSERT(old > 0, ("%s: ifa %p has 0 refs", __func__, ifa)); } int ifa_try_ref(struct ifaddr *ifa) { NET_EPOCH_ASSERT(); return (refcount_acquire_if_not_zero(&ifa->ifa_refcnt)); } static void ifa_destroy(epoch_context_t ctx) { struct ifaddr *ifa; ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx); counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } void ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx); } /* * XXX: Because sockaddr_dl has deeper structure than the sockaddr * structs used to represent other address families, it is necessary * to perform a different comparison. */ #define sa_dl_equal(a1, a2) \ ((((const struct sockaddr_dl *)(a1))->sdl_len == \ ((const struct sockaddr_dl *)(a2))->sdl_len) && \ (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \ CLLADDR((const struct sockaddr_dl *)(a2)), \ ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0)) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) { goto done; } /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { goto done; } } } ifa = NULL; done: return (ifa); } int ifa_ifwithaddr_check(const struct sockaddr *addr) { struct epoch_tracker et; int rc; NET_EPOCH_ENTER(et); rc = (ifa_ifwithaddr(addr) != NULL); NET_EPOCH_EXIT(et); return (rc); } /* * Locate an interface based on the broadcast address. */ /* ARGSUSED */ struct ifaddr * ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { goto done; } } } ifa = NULL; done: return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { goto done; } } } ifa = NULL; done: return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; NET_EPOCH_ASSERT(); /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { ifp = ifnet_byindex( ((const struct sockaddr_dl *)addr)->sdl_index); return (ifp ? ifp->if_addr : NULL); } /* * Scan though each interface, looking for ones that have addresses * in this address family and the requested fib. */ CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { const char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { goto done; } } else { /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one), or if the virtual status * of new prefix is better than of the old one, * then remember the new one before continuing * to search for an even better one. */ if (ifa_maybe == NULL || ifa_preferred(ifa_maybe, ifa) || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) { ifa_maybe = ifa; } } } } ifa = ifa_maybe; ifa_maybe = NULL; done: return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: return (ifa); } /* * See whether new ifa is better than current one: * 1) A non-virtual one is preferred over virtual. * 2) A virtual in master state preferred over any other state. * * Used in several address selecting functions. */ int ifa_preferred(struct ifaddr *cur, struct ifaddr *next) { return (cur->ifa_carp && (!next->ifa_carp || ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); } struct sockaddr_dl * link_alloc_sdl(size_t size, int flags) { return (malloc(size, M_TEMP, flags)); } void link_free_sdl(struct sockaddr *sa) { free(sa, M_TEMP); } /* * Fills in given sdl with interface basic info. * Returns pointer to filled sdl. */ struct sockaddr_dl * link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)paddr; memset(sdl, 0, sizeof(struct sockaddr_dl)); sdl->sdl_len = sizeof(struct sockaddr_dl); sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = iftype; return (sdl); } /* * Mark an interface down and notify protocols of * the transition. */ static void if_unroute(struct ifnet *ifp, int flag, int fam) { KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP")); ifp->if_flags &= ~flag; getmicrotime(&ifp->if_lastchange); ifp->if_qflush(ifp); if (ifp->if_carp) (*carp_linkstate_p)(ifp); - rt_ifmsg(ifp); + rt_ifmsg(ifp, IFF_UP); } /* * Mark an interface up and notify protocols of * the transition. */ static void if_route(struct ifnet *ifp, int flag, int fam) { KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP")); ifp->if_flags |= flag; getmicrotime(&ifp->if_lastchange); if (ifp->if_carp) (*carp_linkstate_p)(ifp); - rt_ifmsg(ifp); + rt_ifmsg(ifp, IFF_UP); #ifdef INET6 in6_if_up(ifp); #endif } void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); int (*vlan_pcp_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; /* XXXGL: reference ifp? */ taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp; int link_state; ifp = arg; link_state = ifp->if_link_state; CURVNET_SET(ifp->if_vnet); - rt_ifmsg(ifp); + rt_ifmsg(ifp, 0); if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && ifp->if_l2com != NULL) (*ng_ether_link_state_p)(ifp, link_state); if (ifp->if_carp) (*carp_linkstate_p)(ifp); if (ifp->if_bridge) ifp->if_bridge_linkstate(ifp); if (ifp->if_lagg) (*lagg_linkstate_p)(ifp, link_state); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) if_printf(ifp, "link state changed to %s\n", (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); CURVNET_RESTORE(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); if_unroute(ifp, IFF_UP, AF_UNSPEC); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { if_route(ifp, IFF_UP, AF_UNSPEC); EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); } /* * Flush an interface queue. */ void if_qflush(struct ifnet *ifp) { struct mbuf *m, *n; struct ifaltq *ifq; ifq = &ifp->if_snd; IFQ_LOCK(ifq); #ifdef ALTQ if (ALTQ_IS_ENABLED(ifq)) ALTQ_PURGE(ifq); #endif n = ifq->ifq_head; while ((m = n) != NULL) { n = m->m_nextpkt; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; IFQ_UNLOCK(ifq); } /* * Map interface name to interface structure pointer, with or without * returning a reference. */ struct ifnet * ifunit_ref(const char *name) { struct epoch_tracker et; struct ifnet *ifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && !(ifp->if_flags & IFF_DYING)) break; } if (ifp != NULL) { if_ref(ifp); MPASS(ifindex_table[ifp->if_index].ife_ifnet == ifp); } NET_EPOCH_EXIT(et); return (ifp); } struct ifnet * ifunit(const char *name) { struct epoch_tracker et; struct ifnet *ifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } NET_EPOCH_EXIT(et); return (ifp); } void * ifr_buffer_get_buffer(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer); } static void ifr_buffer_set_buffer_null(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL; } size_t ifr_buffer_get_length(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return (ifrup->ifr32.ifr_ifru.ifru_buffer.length); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.length); } static void ifr_buffer_set_length(void *data, size_t len) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.length = len; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.length = len; } void * ifr_data_get_ptr(void *ifrp) { union ifreq_union *ifrup; ifrup = ifrp; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_data); #endif return (ifrup->ifr.ifr_ifru.ifru_data); } struct ifcap_nv_bit_name { int cap_bit; const char *cap_name; }; #define CAPNV(x) {.cap_bit = IFCAP_##x, \ .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) } const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = { CAPNV(RXCSUM), CAPNV(TXCSUM), CAPNV(NETCONS), CAPNV(VLAN_MTU), CAPNV(VLAN_HWTAGGING), CAPNV(JUMBO_MTU), CAPNV(POLLING), CAPNV(VLAN_HWCSUM), CAPNV(TSO4), CAPNV(TSO6), CAPNV(LRO), CAPNV(WOL_UCAST), CAPNV(WOL_MCAST), CAPNV(WOL_MAGIC), CAPNV(TOE4), CAPNV(TOE6), CAPNV(VLAN_HWFILTER), CAPNV(VLAN_HWTSO), CAPNV(LINKSTATE), CAPNV(NETMAP), CAPNV(RXCSUM_IPV6), CAPNV(TXCSUM_IPV6), CAPNV(HWSTATS), CAPNV(TXRTLMT), CAPNV(HWRXTSTMP), CAPNV(MEXTPG), CAPNV(TXTLS4), CAPNV(TXTLS6), CAPNV(VXLAN_HWCSUM), CAPNV(VXLAN_HWTSO), CAPNV(TXTLS_RTLMT), {0, NULL} }; #define CAP2NV(x) {.cap_bit = IFCAP2_##x, \ .cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) } const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = { CAP2NV(RXTLS4), CAP2NV(RXTLS6), {0, NULL} }; #undef CAPNV #undef CAP2NV int if_capnv_to_capint(const nvlist_t *nv, int *old_cap, const struct ifcap_nv_bit_name *nn, bool all) { int i, res; res = 0; for (i = 0; nn[i].cap_name != NULL; i++) { if (nvlist_exists_bool(nv, nn[i].cap_name)) { if (all || nvlist_get_bool(nv, nn[i].cap_name)) res |= nn[i].cap_bit; } else { res |= *old_cap & nn[i].cap_bit; } } return (res); } void if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn, int ifr_cap, int ifr_req) { int i; for (i = 0; nn[i].cap_name != NULL; i++) { if ((nn[i].cap_bit & ifr_cap) != 0) { nvlist_add_bool(nv, nn[i].cap_name, (nn[i].cap_bit & ifr_req) != 0); } } } /* * Hardware specific interface ioctls. */ int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; int error = 0, do_ifup = 0; int new_flags, temp_flags; size_t namelen, onamelen; size_t descrlen, nvbuflen; char *descrbuf; char new_name[IFNAMSIZ]; char old_name[IFNAMSIZ], strbuf[IFNAMSIZ + 8]; struct ifaddr *ifa; struct sockaddr_dl *sdl; void *buf; nvlist_t *nvcap; struct siocsifcapnv_driver_data drv_ioctl_data; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: temp_flags = ifp->if_flags | ifp->if_drv_flags; ifr->ifr_flags = temp_flags & 0xffff; ifr->ifr_flagshigh = temp_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; case SIOCGIFCAPNV: if ((ifp->if_capabilities & IFCAP_NV) == 0) { error = EINVAL; break; } buf = NULL; nvcap = nvlist_create(0); for (;;) { if_capint_to_capnv(nvcap, ifcap_nv_bit_names, ifp->if_capabilities, ifp->if_capenable); if_capint_to_capnv(nvcap, ifcap2_nv_bit_names, ifp->if_capabilities2, ifp->if_capenable2); error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV, __DECONST(caddr_t, nvcap)); if (error != 0) { if_printf(ifp, "SIOCGIFCAPNV driver mistake: nvlist error %d\n", error); break; } buf = nvlist_pack(nvcap, &nvbuflen); if (buf == NULL) { error = nvlist_error(nvcap); if (error == 0) error = EDOOFUS; break; } if (nvbuflen > ifr->ifr_cap_nv.buf_length) { ifr->ifr_cap_nv.length = nvbuflen; ifr->ifr_cap_nv.buffer = NULL; error = EFBIG; break; } ifr->ifr_cap_nv.length = nvbuflen; error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen); break; } free(buf, M_NVLIST); nvlist_destroy(nvcap); break; case SIOCGIFDATA: { struct if_data ifd; /* Ensure uninitialised padding is not leaked. */ memset(&ifd, 0, sizeof(ifd)); if_data_copy(ifp, &ifd); error = copyout(&ifd, ifr_data_get_ptr(ifr), sizeof(ifd)); break; } #ifdef MAC case SIOCGIFMAC: error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: /* XXXGL: did this ever worked? */ ifr->ifr_phys = 0; break; case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); if (ifp->if_description == NULL) error = ENOMSG; else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr_buffer_get_length(ifr) < descrlen) ifr_buffer_set_buffer_null(ifr); else error = copyout(ifp->if_description, ifr_buffer_get_buffer(ifr), descrlen); ifr_buffer_set_length(ifr, descrlen); } sx_sunlock(&ifdescr_sx); break; case SIOCSIFDESCR: error = priv_check(td, PRIV_NET_SETIFDESCR); if (error) return (error); /* * Copy only (length-1) bytes to make sure that * if_description is always nul terminated. The * length parameter is supposed to count the * terminating nul in. */ if (ifr_buffer_get_length(ifr) > ifdescr_maxlen) return (ENAMETOOLONG); else if (ifr_buffer_get_length(ifr) == 0) descrbuf = NULL; else { descrbuf = if_allocdescr(ifr_buffer_get_length(ifr), M_WAITOK); error = copyin(ifr_buffer_get_buffer(ifr), descrbuf, ifr_buffer_get_length(ifr) - 1); if (error) { if_freedescr(descrbuf); break; } } if_setdescr(ifp, descrbuf); getmicrotime(&ifp->if_lastchange); break; case SIOCGIFFIB: ifr->ifr_fib = ifp->if_fib; break; case SIOCSIFFIB: error = priv_check(td, PRIV_NET_SETIFFIB); if (error) return (error); if (ifr->ifr_fib >= rt_numfibs) return (EINVAL); ifp->if_fib = ifr->ifr_fib; break; case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) return (error); /* * Currently, no driver owned flags pass the IFF_CANTCHANGE * check, so we don't need special handling here yet. */ new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { if_down(ifp); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { do_ifup = 1; } /* See if permanently promiscuous mode bit is about to flip */ if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { if (new_flags & IFF_PPROMISC) ifp->if_flags |= IFF_PROMISC; else if (ifp->if_pcount == 0) ifp->if_flags &= ~IFF_PROMISC; if (log_promisc_mode_change) if_printf(ifp, "permanently promiscuous mode %s\n", ((new_flags & IFF_PPROMISC) ? "enabled" : "disabled")); } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (ifp->if_ioctl) { (void) (*ifp->if_ioctl)(ifp, cmd, data); } if (do_ifup) if_up(ifp); getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); if (error != 0) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAPNV: error = priv_check(td, PRIV_NET_SETIFCAP); if (error != 0) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if ((ifp->if_capabilities & IFCAP_NV) == 0) return (EINVAL); if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) return (EINVAL); nvcap = NULL; buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK); for (;;) { error = copyin(ifr->ifr_cap_nv.buffer, buf, ifr->ifr_cap_nv.length); if (error != 0) break; nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0); if (nvcap == NULL) { error = EINVAL; break; } drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap, &ifp->if_capenable, ifcap_nv_bit_names, false); if ((drv_ioctl_data.reqcap & ~ifp->if_capabilities) != 0) { error = EINVAL; break; } drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap, &ifp->if_capenable2, ifcap2_nv_bit_names, false); if ((drv_ioctl_data.reqcap2 & ~ifp->if_capabilities2) != 0) { error = EINVAL; break; } drv_ioctl_data.nvcap = nvcap; error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV, (caddr_t)&drv_ioctl_data); break; } nvlist_destroy(nvcap); free(buf, M_TEMP); if (error == 0) getmicrotime(&ifp->if_lastchange); break; #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = priv_check(td, PRIV_NET_SETIFNAME); if (error) return (error); error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ, NULL); if (error != 0) return (error); if (new_name[0] == '\0') return (EINVAL); if (strcmp(new_name, ifp->if_xname) == 0) break; if (ifunit(new_name) != NULL) return (EEXIST); /* * XXX: Locking. Nothing else seems to lock if_flags, * and there are numerous other races with the * ifunit() checks not being atomic with namespace * changes (renames, vmoves, if_attach, etc). */ ifp->if_flags |= IFF_RENAMING; EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if_printf(ifp, "changing name to '%s'\n", new_name); IF_ADDR_WLOCK(ifp); strlcpy(old_name, ifp->if_xname, sizeof(old_name)); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); ifp->if_flags &= ~IFF_RENAMING; snprintf(strbuf, sizeof(strbuf), "name=%s", new_name); devctl_notify("IFNET", old_name, "RENAME", strbuf); break; #ifdef VIMAGE case SIOCSIFVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error) return (error); error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); break; #endif case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = priv_check(td, PRIV_NET_SETIFPHYS); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = priv_check(td, PRIV_NET_SETIFMTU); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); /* Disallow MTU changes on bridge member interfaces. */ if (ifp->if_bridge) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) { getmicrotime(&ifp->if_lastchange); - rt_ifmsg(ifp); + rt_ifmsg(ifp, 0); #ifdef INET DEBUGNET_NOTIFY_MTU(ifp); #endif } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } break; } case SIOCADDMULTI: case SIOCDELMULTI: if (cmd == SIOCADDMULTI) error = priv_check(td, PRIV_NET_ADDMULTI); else error = priv_check(td, PRIV_NET_DELMULTI); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct epoch_tracker et; struct ifmultiaddr *ifma; /* * Userland is only permitted to join groups once * via the if_addmulti() KPI, because it cannot hold * struct ifmultiaddr * between calls. It may also * lose a race while we check if the membership * already exists. */ NET_EPOCH_ENTER(et); ifma = if_findmulti(ifp, &ifr->ifr_addr); NET_EPOCH_EXIT(et); if (ifma != NULL) error = EADDRINUSE; else error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = priv_check(td, PRIV_NET_HWIOCTL); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: case SIOCGIFGENERIC: case SIOCGIFRSSKEY: case SIOCGIFRSSHASH: case SIOCGIFDOWNREASON: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); break; case SIOCSIFLLADDR: error = priv_check(td, PRIV_NET_SETLLADDR); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; case SIOCGHWADDR: error = if_gethwaddr(ifp, ifr); break; case SIOCAIFGROUP: error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); error = if_addgroup(ifp, ((struct ifgroupreq *)data)->ifgr_group); if (error != 0) return (error); break; case SIOCGIFGROUP: { struct epoch_tracker et; NET_EPOCH_ENTER(et); error = if_getgroup((struct ifgroupreq *)data, ifp); NET_EPOCH_EXIT(et); break; } case SIOCDIFGROUP: error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); error = if_delgroup(ifp, ((struct ifgroupreq *)data)->ifgr_group); if (error != 0) return (error); break; default: error = ENOIOCTL; break; } return (error); } /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { #ifdef COMPAT_FREEBSD32 union { struct ifconf ifc; struct ifdrv ifd; struct ifgroupreq ifgr; struct ifmediareq ifmr; } thunk; u_long saved_cmd; struct ifconf32 *ifc32; struct ifdrv32 *ifd32; struct ifgroupreq32 *ifgr32; struct ifmediareq32 *ifmr32; #endif struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; #ifdef VIMAGE bool shutdown; #endif CURVNET_SET(so->so_vnet); #ifdef VIMAGE /* Make sure the VNET is stable. */ shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet); if (shutdown) { CURVNET_RESTORE(); return (EBUSY); } #endif #ifdef COMPAT_FREEBSD32 saved_cmd = cmd; switch (cmd) { case SIOCGIFCONF32: ifc32 = (struct ifconf32 *)data; thunk.ifc.ifc_len = ifc32->ifc_len; thunk.ifc.ifc_buf = PTRIN(ifc32->ifc_buf); data = (caddr_t)&thunk.ifc; cmd = SIOCGIFCONF; break; case SIOCGDRVSPEC32: case SIOCSDRVSPEC32: ifd32 = (struct ifdrv32 *)data; memcpy(thunk.ifd.ifd_name, ifd32->ifd_name, sizeof(thunk.ifd.ifd_name)); thunk.ifd.ifd_cmd = ifd32->ifd_cmd; thunk.ifd.ifd_len = ifd32->ifd_len; thunk.ifd.ifd_data = PTRIN(ifd32->ifd_data); data = (caddr_t)&thunk.ifd; cmd = _IOC_NEWTYPE(cmd, struct ifdrv); break; case SIOCAIFGROUP32: case SIOCGIFGROUP32: case SIOCDIFGROUP32: case SIOCGIFGMEMB32: ifgr32 = (struct ifgroupreq32 *)data; memcpy(thunk.ifgr.ifgr_name, ifgr32->ifgr_name, sizeof(thunk.ifgr.ifgr_name)); thunk.ifgr.ifgr_len = ifgr32->ifgr_len; switch (cmd) { case SIOCAIFGROUP32: case SIOCDIFGROUP32: memcpy(thunk.ifgr.ifgr_group, ifgr32->ifgr_group, sizeof(thunk.ifgr.ifgr_group)); break; case SIOCGIFGROUP32: case SIOCGIFGMEMB32: thunk.ifgr.ifgr_groups = PTRIN(ifgr32->ifgr_groups); break; } data = (caddr_t)&thunk.ifgr; cmd = _IOC_NEWTYPE(cmd, struct ifgroupreq); break; case SIOCGIFMEDIA32: case SIOCGIFXMEDIA32: ifmr32 = (struct ifmediareq32 *)data; memcpy(thunk.ifmr.ifm_name, ifmr32->ifm_name, sizeof(thunk.ifmr.ifm_name)); thunk.ifmr.ifm_current = ifmr32->ifm_current; thunk.ifmr.ifm_mask = ifmr32->ifm_mask; thunk.ifmr.ifm_status = ifmr32->ifm_status; thunk.ifmr.ifm_active = ifmr32->ifm_active; thunk.ifmr.ifm_count = ifmr32->ifm_count; thunk.ifmr.ifm_ulist = PTRIN(ifmr32->ifm_ulist); data = (caddr_t)&thunk.ifmr; cmd = _IOC_NEWTYPE(cmd, struct ifmediareq); break; } #endif switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); goto out_noref; } ifr = (struct ifreq *)data; switch (cmd) { #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); goto out_noref; #endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); if (error == 0) error = if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? ifr_data_get_ptr(ifr) : NULL); goto out_noref; case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) { sx_xlock(&ifnet_detach_sxlock); error = if_clone_destroy(ifr->ifr_name); sx_xunlock(&ifnet_detach_sxlock); } goto out_noref; case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); goto out_noref; case SIOCGIFGMEMB: error = if_getgroupmembers((struct ifgroupreq *)data); goto out_noref; #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: if (carp_ioctl_p == NULL) error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); goto out_noref; #endif } ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) { error = ENXIO; goto out_noref; } error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) goto out_ref; oif_flags = ifp->if_flags; if (so->so_proto == NULL) { error = EOPNOTSUPP; goto out_ref; } /* * Pass the request on to the socket control method, and if the * latter returns EOPNOTSUPP, directly to the interface. * * Make an exception for the legacy SIOCSIF* requests. Drivers * trust SIOCSIFADDR et al to come from an already privileged * layer, and do not perform any credentials checks or input * validation. */ error = so->so_proto->pr_control(so, cmd, data, ifp, td); if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) error = (*ifp->if_ioctl)(ifp, cmd, data); if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 if (ifp->if_flags & IFF_UP) in6_if_up(ifp); #endif } out_ref: if_rele(ifp); out_noref: CURVNET_RESTORE(); #ifdef COMPAT_FREEBSD32 if (error != 0) return (error); switch (saved_cmd) { case SIOCGIFCONF32: ifc32->ifc_len = thunk.ifc.ifc_len; break; case SIOCGDRVSPEC32: /* * SIOCGDRVSPEC is IOWR, but nothing actually touches * the struct so just assert that ifd_len (the only * field it might make sense to update) hasn't * changed. */ KASSERT(thunk.ifd.ifd_len == ifd32->ifd_len, ("ifd_len was updated %u -> %zu", ifd32->ifd_len, thunk.ifd.ifd_len)); break; case SIOCGIFGROUP32: case SIOCGIFGMEMB32: ifgr32->ifgr_len = thunk.ifgr.ifgr_len; break; case SIOCGIFMEDIA32: case SIOCGIFXMEDIA32: ifmr32->ifm_current = thunk.ifmr.ifm_current; ifmr32->ifm_mask = thunk.ifmr.ifm_mask; ifmr32->ifm_status = thunk.ifmr.ifm_status; ifmr32->ifm_active = thunk.ifmr.ifm_active; ifmr32->ifm_count = thunk.ifmr.ifm_count; break; } #endif return (error); } /* * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag to check, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. * * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; /* Sanity checks to catch programming errors */ KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, ("%s: setting driver-owned flag %d", __func__, flag)); if (onswitch) KASSERT(*refcount >= 0, ("%s: increment negative refcount %d for flag %d", __func__, *refcount, flag)); else KASSERT(*refcount > 0, ("%s: decrement non-positive refcount %d for flag %d", __func__, *refcount, flag)); /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ if (ifp->if_ioctl == NULL) { error = EOPNOTSUPP; goto recover; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error) goto recover; /* Notify userland that interface flags have changed */ - rt_ifmsg(ifp); + rt_ifmsg(ifp, flag); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && log_promisc_mode_change) if_printf(ifp, "promiscuous mode %s\n", (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to maxphys to avoid DoS from userspace. */ max_len = maxphys - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { struct epoch_tracker et; int addrs; /* * Zero the ifr to make sure we don't disclose the contents * of the stack. */ memset(&ifr, 0, sizeof(ifr)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) { sbuf_delete(sb); IFNET_RUNLOCK(); return (ENAMETOOLONG); } addrs = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; if (sa->sa_len <= sizeof(*sa)) { if (sa->sa_len < sizeof(*sa)) { memset(&ifr.ifr_ifru.ifru_addr, 0, sizeof(ifr.ifr_ifru.ifru_addr)); memcpy(&ifr.ifr_ifru.ifru_addr, sa, sa->sa_len); } else ifr.ifr_ifru.ifru_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } NET_EPOCH_EXIT(et); if (addrs == 0) { sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); } struct ifmultiaddr * if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa->sa_family == AF_LINK) { if (sa_dl_equal(ifma->ifma_addr, sa)) break; } else { if (sa_equal(ifma->ifma_addr, sa)) break; } } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ #ifdef MCAST_VERBOSE extern void kdb_backtrace(void); #endif static void if_freemulti_internal(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", ifma->ifma_refcount)); if (ifma->ifma_lladdr != NULL) free(ifma->ifma_lladdr, M_IFMADDR); #ifdef MCAST_VERBOSE kdb_backtrace(); printf("%s freeing ifma: %p\n", __func__, ifma); #endif free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); } static void if_destroymulti(epoch_context_t ctx) { struct ifmultiaddr *ifma; ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx); if_freemulti_internal(ifma); } void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d", ifma->ifma_refcount)); NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; struct sockaddr_dl sdl; int error; #ifdef INET IN_MULTI_LIST_UNLOCK_ASSERT(); #endif #ifdef INET6 IN6_MULTI_LIST_UNLOCK_ASSERT(); #endif /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_WLOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_WUNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. * Most link layer resolving functions returns address data which * fits inside default sockaddr_dl structure. However callback * can allocate another sockaddr structure, in that case we need to * free it later. */ llsa = NULL; ll_ifma = NULL; if (ifp->if_resolvemulti != NULL) { /* Provide called function with buffer size information */ sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) goto unlock_out; } /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { --ifma->ifma_refcount; if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } ll_ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; ifma->ifma_llifma = ll_ifma; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_WUNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if (ifp->if_ioctl != NULL) { if (THREAD_CAN_SLEEP()) (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); else taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask); } if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); return (0); free_llsa_out: if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); unlock_out: IF_ADDR_WUNLOCK(ifp); return (error); } static void if_siocaddmulti(void *arg, int pending) { struct ifnet *ifp; ifp = arg; #ifdef DIAGNOSTIC if (pending > 1) if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending); #endif CURVNET_SET(ifp->if_vnet); (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); CURVNET_RESTORE(); } /* * Delete a multicast group membership by network-layer group address. * * Returns ENOENT if the entry could not be found. If ifp no longer * exists, results are undefined. This entry point should only be used * from subsystems which do appropriate locking to hold ifp for the * duration of the call. * Network-layer protocol domains must use if_delmulti_ifma(). */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; int lastref; KASSERT(ifp, ("%s: NULL ifp", __func__)); IF_ADDR_WLOCK(ifp); lastref = 0; ifma = if_findmulti(ifp, sa); if (ifma != NULL) lastref = if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); if (ifma == NULL) return (ENOENT); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } return (0); } /* * Delete all multicast group membership for an interface. * Should be used to quickly flush all multicast filters. */ void if_delallmulti(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); } void if_delmulti_ifma(struct ifmultiaddr *ifma) { if_delmulti_ifma_flags(ifma, 0); } /* * Delete a multicast group membership by group membership pointer. * Network-layer protocol domains must use this routine. * * It is safe to call this routine if the ifp disappeared. */ void if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags) { struct ifnet *ifp; int lastref; MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma); #ifdef INET IN_MULTI_LIST_UNLOCK_ASSERT(); #endif ifp = ifma->ifma_ifp; #ifdef DIAGNOSTIC if (ifp == NULL) { printf("%s: ifma_ifp seems to be detached\n", __func__); } else { struct epoch_tracker et; struct ifnet *oifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; NET_EPOCH_EXIT(et); if (ifp != oifp) ifp = NULL; } #endif /* * If and only if the ifnet instance exists: Acquire the address lock. */ if (ifp != NULL) IF_ADDR_WLOCK(ifp); lastref = if_delmulti_locked(ifp, ifma, flags); if (ifp != NULL) { /* * If and only if the ifnet instance exists: * Release the address lock. * If the group was left: update the hardware hash filter. */ IF_ADDR_WUNLOCK(ifp); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } } } /* * Perform deletion of network-layer and/or link-layer multicast address. * * Return 0 if the reference count was decremented. * Return 1 if the final reference was released, indicating that the * hardware hash filter should be reprogrammed. */ static int if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) { struct ifmultiaddr *ll_ifma; if (ifp != NULL && ifma->ifma_ifp != NULL) { KASSERT(ifma->ifma_ifp == ifp, ("%s: inconsistent ifp %p", __func__, ifp)); IF_ADDR_WLOCK_ASSERT(ifp); } ifp = ifma->ifma_ifp; MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : ""); /* * If the ifnet is detaching, null out references to ifnet, * so that upper protocol layers will notice, and not attempt * to obtain locks for an ifnet which no longer exists. The * routing socket announcement must happen before the ifnet * instance is detached from the system. */ if (detaching) { #ifdef DIAGNOSTIC printf("%s: detaching ifnet instance %p\n", __func__, ifp); #endif /* * ifp may already be nulled out if we are being reentered * to delete the ll_ifma. */ if (ifp != NULL) { rt_newmaddrmsg(RTM_DELMADDR, ifma); ifma->ifma_ifp = NULL; } } if (--ifma->ifma_refcount > 0) return 0; if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. */ ll_ifma = ifma->ifma_llifma; if (ll_ifma != NULL) { KASSERT(ifma->ifma_lladdr != NULL, ("%s: llifma w/o lladdr", __func__)); if (detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } } if_freemulti(ll_ifma); } } #ifdef INVARIANTS if (ifp) { struct ifmultiaddr *ifmatmp; CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link) MPASS(ifma != ifmatmp); } #endif if_freemulti(ifma); /* * The last reference to this instance of struct ifmultiaddr * was released; the hardware should be notified of this change. */ return 1; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. * * Set noinline to be dtrace-friendly */ __noinline int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; ifa = ifp->if_addr; if (ifa == NULL) return (EINVAL); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) return (EINVAL); if (len != sdl->sdl_alen) /* don't allow length to change */ return (EINVAL); switch (ifp->if_type) { case IFT_ETHER: case IFT_XETHER: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_IEEE8023ADLAG: bcopy(lladdr, LLADDR(sdl), len); break; default: return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { if (ifp->if_ioctl) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } EVENTHANDLER_INVOKE(iflladdr_event, ifp); return (0); } /* * Compat function for handling basic encapsulation requests. * Not converted stacks (FDDI, IB, ..) supports traditional * output model: ARP (and other similar L2 protocols) are handled * inside output routine, arpresolve/nd6_resolve() returns MAC * address instead of full prepend. * * This function creates calculated header==MAC for IPv4/IPv6 and * returns EAFNOSUPPORT (which is then handled in ARP code) for other * address families. */ static int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) { if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < req->lladdr_len) return (ENOMEM); switch (req->family) { case AF_INET: case AF_INET6: break; default: return (EAFNOSUPPORT); } /* Copy lladdr to storage as is */ memmove(req->buf, req->lladdr, req->lladdr_len); req->bufsize = req->lladdr_len; req->lladdr_off = 0; return (0); } /* * Tunnel interfaces can nest, also they may cause infinite recursion * calls when misconfigured. We'll prevent this by detecting loops. * High nesting level may cause stack exhaustion. We'll prevent this * by introducing upper limit. * * Return 0, if tunnel nesting count is equal or less than limit. */ int if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, int limit) { struct m_tag *mtag; int count; count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); return (EIO); } count++; } if (count > limit) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", if_name(ifp), count); return (EIO); } mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } /* * Get the link layer address that was read from the hardware at attach. * * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type * their component interfaces as IFT_IEEE8023ADLAG. */ int if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr) { if (ifp->if_hw_addr == NULL) return (ENODEV); switch (ifp->if_type) { case IFT_ETHER: case IFT_IEEE8023ADLAG: bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen); return (0); default: return (ENODEV); } } /* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a * static string works well. */ void if_initname(struct ifnet *ifp, const char *name, int unit) { ifp->if_dname = name; ifp->if_dunit = unit; if (unit != IF_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); else strlcpy(ifp->if_xname, name, IFNAMSIZ); } static int if_vlog(struct ifnet *ifp, int pri, const char *fmt, va_list ap) { char if_fmt[256]; snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt); vlog(pri, if_fmt, ap); return (0); } int if_printf(struct ifnet *ifp, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if_vlog(ifp, LOG_INFO, fmt, ap); va_end(ap); return (0); } int if_log(struct ifnet *ifp, int pri, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if_vlog(ifp, pri, fmt, ap); va_end(ap); return (0); } void if_start(struct ifnet *ifp) { (*(ifp)->if_start)(ifp); } /* * Backwards compatibility interface for drivers * that have not implemented it */ static int if_transmit(struct ifnet *ifp, struct mbuf *m) { int error; IFQ_HANDOFF(ifp, m, error); return (error); } static void if_input_default(struct ifnet *ifp __unused, struct mbuf *m) { m_freem(m); } int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) { int active = 0; IF_LOCK(ifq); if (_IF_QFULL(ifq)) { IF_UNLOCK(ifq); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); return (0); } if (ifp != NULL) { if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); active = ifp->if_drv_flags & IFF_DRV_OACTIVE; } _IF_ENQUEUE(ifq, m); IF_UNLOCK(ifq); if (ifp != NULL && !active) (*(ifp)->if_start)(ifp); return (1); } void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f) { KASSERT(if_com_alloc[type] == NULL, ("if_register_com_alloc: %d already registered", type)); KASSERT(if_com_free[type] == NULL, ("if_register_com_alloc: %d free already registered", type)); if_com_alloc[type] = a; if_com_free[type] = f; } void if_deregister_com_alloc(u_char type) { KASSERT(if_com_alloc[type] != NULL, ("if_deregister_com_alloc: %d not registered", type)); KASSERT(if_com_free[type] != NULL, ("if_deregister_com_alloc: %d free not registered", type)); /* * Ensure all pending EPOCH(9) callbacks have been executed. This * fixes issues about late invocation of if_destroy(), which leads * to memory leak from if_com_alloc[type] allocated if_l2com. */ NET_EPOCH_DRAIN_CALLBACKS(); if_com_alloc[type] = NULL; if_com_free[type] = NULL; } /* API for driver access to network stack owned ifnet.*/ uint64_t if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) { uint64_t oldbrate; oldbrate = ifp->if_baudrate; ifp->if_baudrate = baudrate; return (oldbrate); } uint64_t if_getbaudrate(if_t ifp) { return (((struct ifnet *)ifp)->if_baudrate); } int if_setcapabilities(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capabilities = capabilities; return (0); } int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) { ((struct ifnet *)ifp)->if_capabilities |= setbit; ((struct ifnet *)ifp)->if_capabilities &= ~clearbit; return (0); } int if_getcapabilities(if_t ifp) { return ((struct ifnet *)ifp)->if_capabilities; } int if_setcapenable(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capenable = capabilities; return (0); } int if_setcapenablebit(if_t ifp, int setcap, int clearcap) { if(setcap) ((struct ifnet *)ifp)->if_capenable |= setcap; if(clearcap) ((struct ifnet *)ifp)->if_capenable &= ~clearcap; return (0); } const char * if_getdname(if_t ifp) { return ((struct ifnet *)ifp)->if_dname; } int if_togglecapenable(if_t ifp, int togglecap) { ((struct ifnet *)ifp)->if_capenable ^= togglecap; return (0); } int if_getcapenable(if_t ifp) { return ((struct ifnet *)ifp)->if_capenable; } void if_setdescr(if_t ifp, char *descrbuf) { sx_xlock(&ifdescr_sx); char *odescrbuf = ifp->if_description; ifp->if_description = descrbuf; sx_xunlock(&ifdescr_sx); if_freedescr(odescrbuf); } char * if_allocdescr(size_t sz, int malloc_flag) { malloc_flag &= (M_WAITOK | M_NOWAIT); return (malloc(sz, M_IFDESCR, M_ZERO | malloc_flag)); } void if_freedescr(char *descrbuf) { free(descrbuf, M_IFDESCR); } /* * This is largely undesirable because it ties ifnet to a device, but does * provide flexiblity for an embedded product vendor. Should be used with * the understanding that it violates the interface boundaries, and should be * a last resort only. */ int if_setdev(if_t ifp, void *dev) { return (0); } int if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) { ((struct ifnet *)ifp)->if_drv_flags |= set_flags; ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags; return (0); } int if_getdrvflags(if_t ifp) { return ((struct ifnet *)ifp)->if_drv_flags; } int if_setdrvflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_drv_flags = flags; return (0); } int if_setflags(if_t ifp, int flags) { ifp->if_flags = flags; return (0); } int if_setflagbits(if_t ifp, int set, int clear) { ((struct ifnet *)ifp)->if_flags |= set; ((struct ifnet *)ifp)->if_flags &= ~clear; return (0); } int if_getflags(if_t ifp) { return ((struct ifnet *)ifp)->if_flags; } int if_clearhwassist(if_t ifp) { ((struct ifnet *)ifp)->if_hwassist = 0; return (0); } int if_sethwassistbits(if_t ifp, int toset, int toclear) { ((struct ifnet *)ifp)->if_hwassist |= toset; ((struct ifnet *)ifp)->if_hwassist &= ~toclear; return (0); } int if_sethwassist(if_t ifp, int hwassist_bit) { ((struct ifnet *)ifp)->if_hwassist = hwassist_bit; return (0); } int if_gethwassist(if_t ifp) { return ((struct ifnet *)ifp)->if_hwassist; } int if_setmtu(if_t ifp, int mtu) { ((struct ifnet *)ifp)->if_mtu = mtu; return (0); } int if_getmtu(if_t ifp) { return ((struct ifnet *)ifp)->if_mtu; } int if_getmtu_family(if_t ifp, int family) { struct domain *dp; SLIST_FOREACH(dp, &domains, dom_next) { if (dp->dom_family == family && dp->dom_ifmtu != NULL) return (dp->dom_ifmtu((struct ifnet *)ifp)); } return (((struct ifnet *)ifp)->if_mtu); } /* * Methods for drivers to access interface unicast and multicast * link level addresses. Driver shall not know 'struct ifaddr' neither * 'struct ifmultiaddr'. */ u_int if_lladdr_count(if_t ifp) { struct epoch_tracker et; struct ifaddr *ifa; u_int count; count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_LINK) count++; NET_EPOCH_EXIT(et); return (count); } u_int if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) { struct epoch_tracker et; struct ifaddr *ifa; u_int count; MPASS(cb); count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr, count); } NET_EPOCH_EXIT(et); return (count); } u_int if_llmaddr_count(if_t ifp) { struct epoch_tracker et; struct ifmultiaddr *ifma; int count; count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) if (ifma->ifma_addr->sa_family == AF_LINK) count++; NET_EPOCH_EXIT(et); return (count); } u_int if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) { struct epoch_tracker et; struct ifmultiaddr *ifma; u_int count; MPASS(cb); count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr, count); } NET_EPOCH_EXIT(et); return (count); } int if_setsoftc(if_t ifp, void *softc) { ((struct ifnet *)ifp)->if_softc = softc; return (0); } void * if_getsoftc(if_t ifp) { return ((struct ifnet *)ifp)->if_softc; } void if_setrcvif(struct mbuf *m, if_t ifp) { MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); m->m_pkthdr.rcvif = (struct ifnet *)ifp; } void if_setvtag(struct mbuf *m, uint16_t tag) { m->m_pkthdr.ether_vtag = tag; } uint16_t if_getvtag(struct mbuf *m) { return (m->m_pkthdr.ether_vtag); } int if_sendq_empty(if_t ifp) { return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd); } struct ifaddr * if_getifaddr(if_t ifp) { return ((struct ifnet *)ifp)->if_addr; } int if_getamcount(if_t ifp) { return ((struct ifnet *)ifp)->if_amcount; } int if_setsendqready(if_t ifp) { IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd); return (0); } int if_setsendqlen(if_t ifp, int tx_desc_count) { IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count); ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count; return (0); } int if_vlantrunkinuse(if_t ifp) { return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0; } int if_input(if_t ifp, struct mbuf* sendmp) { (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp); return (0); } struct mbuf * if_dequeue(if_t ifp) { struct mbuf *m; IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m); return (m); } int if_sendq_prepend(if_t ifp, struct mbuf *m) { IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m); return (0); } int if_setifheaderlen(if_t ifp, int len) { ((struct ifnet *)ifp)->if_hdrlen = len; return (0); } caddr_t if_getlladdr(if_t ifp) { return (IF_LLADDR((struct ifnet *)ifp)); } void * if_gethandle(u_char type) { return (if_alloc(type)); } void if_bpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; BPF_MTAP(ifp, m); } void if_etherbpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; ETHER_BPF_MTAP(ifp, m); } void if_vlancap(if_t ifh) { struct ifnet *ifp = (struct ifnet *)ifh; VLAN_CAPABILITIES(ifp); } int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax) { ((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax; return (0); } int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount) { ((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount; return (0); } int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize) { ((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize; return (0); } u_int if_gethwtsomax(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomax); } u_int if_gethwtsomaxsegcount(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount); } u_int if_gethwtsomaxsegsize(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize); } void if_setinitfn(if_t ifp, void (*init_fn)(void *)) { ((struct ifnet *)ifp)->if_init = init_fn; } void if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t)) { ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn; } void if_setstartfn(if_t ifp, void (*start_fn)(if_t)) { ((struct ifnet *)ifp)->if_start = (void *)start_fn; } void if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) { ((struct ifnet *)ifp)->if_transmit = start_fn; } void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) { ((struct ifnet *)ifp)->if_qflush = flush_fn; } void if_setgetcounterfn(if_t ifp, if_get_counter_t fn) { ifp->if_get_counter = fn; } #ifdef DDB static void if_show_ifnet(struct ifnet *ifp) { if (ifp == NULL) return; db_printf("%s:\n", ifp->if_xname); #define IF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, ifp->e); IF_DB_PRINTF("%s", if_dname); IF_DB_PRINTF("%d", if_dunit); IF_DB_PRINTF("%s", if_description); IF_DB_PRINTF("%u", if_index); IF_DB_PRINTF("%d", if_idxgen); IF_DB_PRINTF("%u", if_refcount); IF_DB_PRINTF("%p", if_softc); IF_DB_PRINTF("%p", if_l2com); IF_DB_PRINTF("%p", if_llsoftc); IF_DB_PRINTF("%d", if_amcount); IF_DB_PRINTF("%p", if_addr); IF_DB_PRINTF("%p", if_broadcastaddr); IF_DB_PRINTF("%p", if_afdata); IF_DB_PRINTF("%d", if_afdata_initialized); IF_DB_PRINTF("%u", if_fib); IF_DB_PRINTF("%p", if_vnet); IF_DB_PRINTF("%p", if_home_vnet); IF_DB_PRINTF("%p", if_vlantrunk); IF_DB_PRINTF("%p", if_bpf); IF_DB_PRINTF("%u", if_pcount); IF_DB_PRINTF("%p", if_bridge); IF_DB_PRINTF("%p", if_lagg); IF_DB_PRINTF("%p", if_pf_kif); IF_DB_PRINTF("%p", if_carp); IF_DB_PRINTF("%p", if_label); IF_DB_PRINTF("%p", if_netmap); IF_DB_PRINTF("0x%08x", if_flags); IF_DB_PRINTF("0x%08x", if_drv_flags); IF_DB_PRINTF("0x%08x", if_capabilities); IF_DB_PRINTF("0x%08x", if_capenable); IF_DB_PRINTF("%p", if_snd.ifq_head); IF_DB_PRINTF("%p", if_snd.ifq_tail); IF_DB_PRINTF("%d", if_snd.ifq_len); IF_DB_PRINTF("%d", if_snd.ifq_maxlen); IF_DB_PRINTF("%p", if_snd.ifq_drv_head); IF_DB_PRINTF("%p", if_snd.ifq_drv_tail); IF_DB_PRINTF("%d", if_snd.ifq_drv_len); IF_DB_PRINTF("%d", if_snd.ifq_drv_maxlen); IF_DB_PRINTF("%d", if_snd.altq_type); IF_DB_PRINTF("%x", if_snd.altq_flags); #undef IF_DB_PRINTF } DB_SHOW_COMMAND(ifnet, db_show_ifnet) { if (!have_addr) { db_printf("usage: show ifnet \n"); return; } if_show_ifnet((struct ifnet *)addr); } DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets) { struct ifnet *ifp; u_short idx; for (idx = 1; idx <= if_index; idx++) { ifp = ifindex_table[idx].ife_ifnet; if (ifp == NULL) continue; db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); if (db_pager_quit) break; } } #endif /* DDB */ diff --git a/sys/net/if.h b/sys/net/if.h index a559219d700b..117b0e14ef05 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -1,685 +1,686 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_H_ #define _NET_IF_H_ #include #if __BSD_VISIBLE /* * does not depend on on most other systems. This * helps userland compatibility. (struct timeval ifi_lastchange) * The same holds for . (struct sockaddr ifru_addr) */ #ifndef _KERNEL #include #include #endif #endif /* * Length of interface external name, including terminating '\0'. * Note: this is the same size as a generic device's external name. */ #define IF_NAMESIZE 16 #if __BSD_VISIBLE #define IFNAMSIZ IF_NAMESIZE #define IF_MAXUNIT 0x7fff /* historical value */ #endif #if __BSD_VISIBLE /* * Structure used to query names of interface cloners. */ struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; /* * Structure describing information about an interface * which may be of interest to management entities. */ struct if_data { /* generic interface information */ uint8_t ifi_type; /* ethernet, tokenring, etc */ uint8_t ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ uint8_t ifi_addrlen; /* media address length */ uint8_t ifi_hdrlen; /* media header length */ uint8_t ifi_link_state; /* current link state */ uint8_t ifi_vhid; /* carp vhid */ uint16_t ifi_datalen; /* length of this data struct */ uint32_t ifi_mtu; /* maximum transmission unit */ uint32_t ifi_metric; /* routing metric (external only) */ uint64_t ifi_baudrate; /* linespeed */ /* volatile statistics */ uint64_t ifi_ipackets; /* packets received on interface */ uint64_t ifi_ierrors; /* input errors on interface */ uint64_t ifi_opackets; /* packets sent on interface */ uint64_t ifi_oerrors; /* output errors on interface */ uint64_t ifi_collisions; /* collisions on csma interfaces */ uint64_t ifi_ibytes; /* total number of octets received */ uint64_t ifi_obytes; /* total number of octets sent */ uint64_t ifi_imcasts; /* packets received via multicast */ uint64_t ifi_omcasts; /* packets sent via multicast */ uint64_t ifi_iqdrops; /* dropped on input */ uint64_t ifi_oqdrops; /* dropped on output */ uint64_t ifi_noproto; /* destined for unsupported protocol */ uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */ /* Unions are here to make sizes MI. */ union { /* uptime at attach or stat reset */ time_t tt; uint64_t ph; } __ifi_epoch; #define ifi_epoch __ifi_epoch.tt union { /* time of last administrative change */ struct timeval tv; struct { uint64_t ph1; uint64_t ph2; } ph; } __ifi_lastchange; #define ifi_lastchange __ifi_lastchange.tv }; /*- * Interface flags are of two types: network stack owned flags, and driver * owned flags. Historically, these values were stored in the same ifnet * flags field, but with the advent of fine-grained locking, they have been * broken out such that the network stack is responsible for synchronizing * the stack-owned fields, and the device driver the device-owned fields. * Both halves can perform lockless reads of the other half's field, subject * to accepting the involved races. * * Both sets of flags come from the same number space, and should not be * permitted to conflict, as they are exposed to user space via a single * field. * * The following symbols identify read and write requirements for fields: * * (i) if_flags field set by device driver before attach, read-only there * after. * (n) if_flags field written only by the network stack, read by either the * stack or driver. * (d) if_drv_flags field written only by the device driver, read by either * the stack or driver. */ #define IFF_UP 0x1 /* (n) interface is up */ #define IFF_BROADCAST 0x2 /* (i) broadcast address valid */ #define IFF_DEBUG 0x4 /* (n) turn on debugging */ #define IFF_LOOPBACK 0x8 /* (i) is a loopback net */ #define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */ #define IFF_KNOWSEPOCH 0x20 /* (i) calls if_input in net epoch */ #define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */ #define IFF_NOARP 0x80 /* (n) no address resolution protocol */ #define IFF_PROMISC 0x100 /* (n) receive all packets */ #define IFF_ALLMULTI 0x200 /* (n) receive all multicast packets */ #define IFF_DRV_OACTIVE 0x400 /* (d) tx hardware queue is full */ #define IFF_SIMPLEX 0x800 /* (i) can't hear own transmissions */ #define IFF_LINK0 0x1000 /* per link layer defined bit */ #define IFF_LINK1 0x2000 /* per link layer defined bit */ #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */ #define IFF_MULTICAST 0x8000 /* (i) supports multicast */ #define IFF_CANTCONFIG 0x10000 /* (i) unconfigurable using ioctl(2) */ #define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* (n) static ARP */ #define IFF_STICKYARP 0x100000 /* (n) sticky ARP */ #define IFF_DYING 0x200000 /* (n) interface is winding down */ #define IFF_RENAMING 0x400000 /* (n) interface is being renamed */ #define IFF_NOGROUP 0x800000 /* (n) interface is not part of any groups */ +#define IFF_NETLINK_1 0x1000000 /* (n) used by netlink */ /* * Old names for driver flags so that user space tools can continue to use * the old (portable) names. */ #ifndef _KERNEL #define IFF_RUNNING IFF_DRV_RUNNING #define IFF_OACTIVE IFF_DRV_OACTIVE #endif /* flags set internally only: */ #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\ IFF_DYING|IFF_CANTCONFIG|IFF_KNOWSEPOCH) /* * Values for if_link_state. */ #define LINK_STATE_UNKNOWN 0 /* link invalid/unknown */ #define LINK_STATE_DOWN 1 /* link is down */ #define LINK_STATE_UP 2 /* link is up */ /* * Some convenience macros used for setting ifi_baudrate. * XXX 1000 vs. 1024? --thorpej@netbsd.org */ #define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ /* * Capabilities that interfaces can advertise. * * struct ifnet.if_capabilities * contains the optional features & capabilities a particular interface * supports (not only the driver but also the detected hw revision). * Capabilities are defined by IFCAP_* below. * struct ifnet.if_capenable * contains the enabled (either by default or through ifconfig) optional * features & capabilities on this interface. * Capabilities are defined by IFCAP_* below. * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above * contains the enabled optional feature & capabilites that can be used * individually per packet and are specified in the mbuf pkthdr.csum_flags * field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be * more detailed or differentiated than IFCAP_*. * Hwassist features are defined CSUM_* in sys/mbuf.h * * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE. * This is not strictly necessary because the common code never * changes capabilities, and it is left to the individual driver * to do the right thing. However, having the filter here * avoids replication of the same code in all individual drivers. */ #define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ #define IFCAP_NETCONS 0x00004 /* can be a network console */ #define IFCAP_VLAN_MTU 0x00008 /* VLAN-compatible MTU */ #define IFCAP_VLAN_HWTAGGING 0x00010 /* hardware VLAN tag support */ #define IFCAP_JUMBO_MTU 0x00020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x00040 /* driver supports polling */ #define IFCAP_VLAN_HWCSUM 0x00080 /* can do IFCAP_HWCSUM on VLANs */ #define IFCAP_TSO4 0x00100 /* can do TCP Segmentation Offload */ #define IFCAP_TSO6 0x00200 /* can do TCP6 Segmentation Offload */ #define IFCAP_LRO 0x00400 /* can do Large Receive Offload */ #define IFCAP_WOL_UCAST 0x00800 /* wake on any unicast frame */ #define IFCAP_WOL_MCAST 0x01000 /* wake on any multicast frame */ #define IFCAP_WOL_MAGIC 0x02000 /* wake on any Magic Packet */ #define IFCAP_TOE4 0x04000 /* interface can offload TCP */ #define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */ #define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */ #define IFCAP_NV 0x20000 /* can do SIOCGIFCAPNV/SIOCSIFCAPNV */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ #define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ #define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ #define IFCAP_HWSTATS 0x800000 /* manages counters internally */ #define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */ #define IFCAP_HWRXTSTMP 0x2000000 /* hardware rx timestamping */ #define IFCAP_MEXTPG 0x4000000 /* understands M_EXTPG mbufs */ #define IFCAP_TXTLS4 0x8000000 /* can do TLS encryption and segmentation for TCP */ #define IFCAP_TXTLS6 0x10000000 /* can do TLS encryption and segmentation for TCP6 */ #define IFCAP_VXLAN_HWCSUM 0x20000000 /* can do IFCAN_HWCSUM on VXLANs */ #define IFCAP_VXLAN_HWTSO 0x40000000 /* can do IFCAP_TSO on VXLANs */ #define IFCAP_TXTLS_RTLMT 0x80000000 /* can do TLS with rate limiting */ #define IFCAP2_RXTLS4 0x00001 #define IFCAP2_RXTLS6 0x00002 #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) #define IFCAP_TXTLS (IFCAP_TXTLS4 | IFCAP_TXTLS6) #define IFCAP2_RXTLS (IFCAP2_RXTLS4 | IFCAP2_RXTLS6) #define IFCAP_CANTCHANGE (IFCAP_NETMAP | IFCAP_NV) #define IFCAP_ALLCAPS 0xffffffff #define IFCAP_RXCSUM_NAME "RXCSUM" #define IFCAP_TXCSUM_NAME "TXCSUM" #define IFCAP_NETCONS_NAME "NETCONS" #define IFCAP_VLAN_MTU_NAME "VLAN_MTU" #define IFCAP_VLAN_HWTAGGING_NAME "VLAN_HWTAGGING" #define IFCAP_JUMBO_MTU_NAME "JUMBO_MTU" #define IFCAP_POLLING_NAME "POLLING" #define IFCAP_VLAN_HWCSUM_NAME "VLAN_HWCSUM" #define IFCAP_TSO4_NAME "TSO4" #define IFCAP_TSO6_NAME "TSO6" #define IFCAP_LRO_NAME "LRO" #define IFCAP_WOL_UCAST_NAME "WOL_UCAST" #define IFCAP_WOL_MCAST_NAME "WOL_MCAST" #define IFCAP_WOL_MAGIC_NAME "WOL_MAGIC" #define IFCAP_TOE4_NAME "TOE4" #define IFCAP_TOE6_NAME "TOE6" #define IFCAP_VLAN_HWFILTER_NAME "VLAN_HWFILTER" #define IFCAP_VLAN_HWTSO_NAME "VLAN_HWTSO" #define IFCAP_LINKSTATE_NAME "LINKSTATE" #define IFCAP_NETMAP_NAME "NETMAP" #define IFCAP_RXCSUM_IPV6_NAME "RXCSUM_IPV6" #define IFCAP_TXCSUM_IPV6_NAME "TXCSUM_IPV6" #define IFCAP_HWSTATS_NAME "HWSTATS" #define IFCAP_TXRTLMT_NAME "TXRTLMT" #define IFCAP_HWRXTSTMP_NAME "HWRXTSTMP" #define IFCAP_MEXTPG_NAME "MEXTPG" #define IFCAP_TXTLS4_NAME "TXTLS4" #define IFCAP_TXTLS6_NAME "TXTLS6" #define IFCAP_VXLAN_HWCSUM_NAME "VXLAN_HWCSUM" #define IFCAP_VXLAN_HWTSO_NAME "VXLAN_HWTSO" #define IFCAP_TXTLS_RTLMT_NAME "TXTLS_RTLMT" #define IFCAP2_RXTLS4_NAME "RXTLS4" #define IFCAP2_RXTLS6_NAME "RXTLS6" #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Message format for use in obtaining information about interfaces * from getkerninfo and the routing socket * For the new, extensible interface see struct if_msghdrl below. */ struct if_msghdr { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ u_short _ifm_spare1; struct if_data ifm_data;/* statistics and other data about if */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifm_data_off or within ifm_data. Both the if_msghdr and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct if_msghdrl given a pointer to struct if_msghdrl. */ #define IF_MSGHDRL_IFM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifm_data_off) #define IF_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifm_len) struct if_msghdrl { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ u_short _ifm_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifm_len; /* length of if_msghdrl incl. if_data */ u_short ifm_data_off; /* offset of if_data from beginning */ int _ifm_spare2; struct if_data ifm_data;/* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from getkerninfo and the routing socket * For the new, extensible interface see struct ifa_msghdrl below. */ struct ifa_msghdr { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ u_short _ifam_spare1; int ifam_metric; /* value of ifa_ifp->if_metric */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifam_metric or within ifam_data. Both the ifa_msghdrl and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct ifa_msghdrl given a pointer to struct ifa_msghdrl. */ #define IFA_MSGHDRL_IFAM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifam_data_off) #define IFA_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifam_len) struct ifa_msghdrl { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifam_len; /* length of ifa_msghdrl incl. if_data */ u_short ifam_data_off; /* offset of if_data from beginning */ int ifam_metric; /* value of ifa_ifp->if_metric */ struct if_data ifam_data;/* statistics and other data about if or * address */ }; /* * Message format for use in obtaining information about multicast addresses * from the routing socket */ struct ifma_msghdr { u_short ifmam_msglen; /* to skip over non-understood messages */ u_char ifmam_version; /* future binary compatibility */ u_char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ int ifmam_flags; /* value of ifa_flags */ u_short ifmam_index; /* index for associated ifp */ u_short _ifmam_spare1; }; /* * Message format announcing the arrival or departure of a network interface. */ struct if_announcemsghdr { u_short ifan_msglen; /* to skip over non-understood messages */ u_char ifan_version; /* future binary compatibility */ u_char ifan_type; /* message type */ u_short ifan_index; /* index for associated ifp */ char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_short ifan_what; /* what type of announcement */ }; #define IFAN_ARRIVAL 0 /* interface arrival */ #define IFAN_DEPARTURE 1 /* interface departure */ /* * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests */ struct ifreq_buffer { size_t length; void *buffer; }; struct ifreq_nv_req { u_int buf_length; /* Total size of buffer, u_int for ABI struct ifreq */ u_int length; /* Length of the filled part */ void *buffer; /* Buffer itself, containing packed nv */ }; #define IFR_CAP_NV_MAXBUFSIZE (2 * 1024 * 1024) /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; caddr_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; struct ifreq_nv_req ifru_nv; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #ifndef _KERNEL #define ifr_buffer ifr_ifru.ifru_buffer /* user supplied buffer with its length */ #endif #define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ #define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ #define ifr_jid ifr_ifru.ifru_jid /* jail/vnet */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ #define ifr_media ifr_ifru.ifru_media /* physical media */ #ifndef _KERNEL #define ifr_data ifr_ifru.ifru_data /* for use by interface */ #endif #define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ #define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ #define ifr_lan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ #define ifr_cap_nv ifr_ifru.ifru_nv /* nv-based cap interface */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \ (sizeof(struct ifreq) - sizeof(struct sockaddr) + \ (ifr).ifr_addr.sa_len) : sizeof(struct ifreq)) struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; int ifra_vhid; }; /* 9.x compat */ struct oifaliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; }; struct ifmediareq { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* current media options */ int ifm_mask; /* don't care mask */ int ifm_status; /* media status */ int ifm_active; /* active options */ int ifm_count; /* # entries in ifm_ulist array */ int *ifm_ulist; /* media words */ }; struct ifdrv { char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; size_t ifd_len; void *ifd_data; }; /* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting * needed by ifconfig(8): each line starts with a TAB and ends with * a newline. The canonical example to copy and paste is in if_tun.c. */ #define IFSTATMAX 800 /* 10 lines of text */ struct ifstat { char ifs_name[IFNAMSIZ]; /* if name, e.g. "en0" */ char ascii[IFSTATMAX + 1]; }; /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ struct ifconf { int ifc_len; /* size of associated buffer */ union { caddr_t ifcu_buf; struct ifreq *ifcu_req; } ifc_ifcu; #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; /* * interface groups */ #define IFG_ALL "all" /* group contains all interfaces */ /* XXX: will we implement this? */ #define IFG_EGRESS "egress" /* if(s) default route(s) point to */ struct ifg_req { union { char ifgrqu_group[IFNAMSIZ]; char ifgrqu_member[IFNAMSIZ]; } ifgrq_ifgrqu; #define ifgrq_group ifgrq_ifgrqu.ifgrqu_group #define ifgrq_member ifgrq_ifgrqu.ifgrqu_member }; /* * Used to lookup groups for an interface */ struct ifgroupreq { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; struct ifg_req *ifgru_groups; } ifgr_ifgru; #define ifgr_group ifgr_ifgru.ifgru_group #define ifgr_groups ifgr_ifgru.ifgru_groups }; /* * Structure used to request i2c data * from interface transceivers. */ struct ifi2creq { uint8_t dev_addr; /* i2c address (0xA0, 0xA2) */ uint8_t offset; /* read offset */ uint8_t len; /* read length */ uint8_t spare0; uint32_t spare1; uint8_t data[8]; /* read buffer */ }; /* * RSS hash. */ #define RSS_FUNC_NONE 0 /* RSS disabled */ #define RSS_FUNC_PRIVATE 1 /* non-standard */ #define RSS_FUNC_TOEPLITZ 2 #define RSS_TYPE_IPV4 0x00000001 #define RSS_TYPE_TCP_IPV4 0x00000002 #define RSS_TYPE_IPV6 0x00000004 #define RSS_TYPE_IPV6_EX 0x00000008 #define RSS_TYPE_TCP_IPV6 0x00000010 #define RSS_TYPE_TCP_IPV6_EX 0x00000020 #define RSS_TYPE_UDP_IPV4 0x00000040 #define RSS_TYPE_UDP_IPV6 0x00000080 #define RSS_TYPE_UDP_IPV6_EX 0x00000100 #define RSS_KEYLEN 128 struct ifrsskey { char ifrk_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrk_func; /* RSS_FUNC_ */ uint8_t ifrk_spare0; uint16_t ifrk_keylen; uint8_t ifrk_key[RSS_KEYLEN]; }; struct ifrsshash { char ifrh_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrh_func; /* RSS_FUNC_ */ uint8_t ifrh_spare0; uint16_t ifrh_spare1; uint32_t ifrh_types; /* RSS_TYPE_ */ }; #define IFNET_PCP_NONE 0xff /* PCP disabled */ #define IFDR_MSG_SIZE 64 #define IFDR_REASON_MSG 1 #define IFDR_REASON_VENDOR 2 struct ifdownreason { char ifdr_name[IFNAMSIZ]; uint32_t ifdr_reason; uint32_t ifdr_vendor; char ifdr_msg[IFDR_MSG_SIZE]; }; #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); #endif extern struct sx ifnet_detach_sxlock; struct nvlist; struct ifcap_nv_bit_name; int if_capnv_to_capint(const struct nvlist *nv, int *old_cap, const struct ifcap_nv_bit_name *nn, bool all); void if_capint_to_capnv(struct nvlist *nv, const struct ifcap_nv_bit_name *nn, int ifr_cap, int ifr_req); struct siocsifcapnv_driver_data { int reqcap; int reqcap2; struct nvlist *nvcap; }; #endif #ifndef _KERNEL struct if_nameindex { unsigned int if_index; /* 1, 2, ... */ char *if_name; /* null terminated name: "le0", ... */ }; __BEGIN_DECLS void if_freenameindex(struct if_nameindex *); char *if_indextoname(unsigned int, char *); struct if_nameindex *if_nameindex(void); unsigned int if_nametoindex(const char *); __END_DECLS #endif #endif /* !_NET_IF_H_ */ diff --git a/sys/net/route.c b/sys/net/route.c index 9773f899f5af..b79e8e6e2a66 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1,707 +1,723 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ /************************************************************************ * Note: In this file a 'fib' is a "forwarding information base" * * Which is the new name for an in kernel routing (next hop) table. * ***********************************************************************/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_mrouting.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat); VNET_PCPUSTAT_SYSINIT(rtstat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(rtstat); #endif EVENTHANDLER_LIST_DEFINE(rt_addrmsg); static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *, void *arg); /* * route initialization must occur before ip6_init2(), which happenas at * SI_ORDER_MIDDLE. */ static void route_init(void) { nhops_init(); } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL); struct rib_head * rt_table_init(int offset, int family, u_int fibnum) { struct rib_head *rh; rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO); /* TODO: These details should be hidded inside radix.c */ /* Init masks tree */ rn_inithead_internal(&rh->head, rh->rnh_nodes, offset); rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0); rh->head.rnh_masks = &rh->rmhead; /* Save metadata associated with this routing table. */ rh->rib_family = family; rh->rib_fibnum = fibnum; #ifdef VIMAGE rh->rib_vnet = curvnet; #endif tmproutes_init(rh); /* Init locks */ RIB_LOCK_INIT(rh); nhops_init_rib(rh); /* Init subscription system */ rib_init_subscriptions(rh); /* Finally, set base callbacks */ rh->rnh_addaddr = rn_addroute; rh->rnh_deladdr = rn_delete; rh->rnh_matchaddr = rn_match; rh->rnh_lookup = rn_lookup; rh->rnh_walktree = rn_walktree; rh->rnh_walktree_from = rn_walktree_from; return (rh); } static int rt_freeentry(struct radix_node *rn, void *arg) { struct radix_head * const rnh = arg; struct radix_node *x; x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh); if (x != NULL) R_Free(x); return (0); } void rt_table_destroy(struct rib_head *rh) { RIB_WLOCK(rh); rh->rib_dying = true; RIB_WUNLOCK(rh); #ifdef FIB_ALGO fib_destroy_rib(rh); #endif tmproutes_destroy(rh); rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head); nhops_destroy_rib(rh); rib_destroy_subscriptions(rh); /* Assume table is already empty */ RIB_LOCK_DESTROY(rh); free(rh, M_RTABLE); } /* * Adds a temporal redirect entry to the routing table. * @fibnum: fib number * @dst: destination to install redirect to * @gateway: gateway to go via * @author: sockaddr of originating router, can be NULL * @ifp: interface to use for the redirected route * @flags: set of flags to add. Allowed: RTF_GATEWAY * @lifetime_sec: time in seconds to expire this redirect. * * Retuns 0 on success, errno otherwise. */ int rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *author, struct ifnet *ifp, int flags, int lifetime_sec) { struct route_nhop_data rnd = { .rnd_weight = RT_DEFAULT_WEIGHT }; struct rib_cmd_info rc; struct ifaddr *ifa; int error; NET_EPOCH_ASSERT(); if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL) return (EAFNOSUPPORT); /* Verify the allowed flag mask. */ KASSERT(((flags & ~(RTF_GATEWAY)) == 0), ("invalid redirect flags: %x", flags)); flags |= RTF_HOST | RTF_DYNAMIC; /* Get the best ifa for the given interface and gateway. */ if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL) return (ENETUNREACH); struct nhop_object *nh = nhop_alloc(fibnum, dst->sa_family); if (nh == NULL) return (ENOMEM); nhop_set_gw(nh, gateway, flags & RTF_GATEWAY); nhop_set_transmit_ifp(nh, ifp); nhop_set_src(nh, ifa); nhop_set_pxtype_flag(nh, NHF_HOST); nhop_set_expire(nh, lifetime_sec + time_uptime); nhop_set_redirect(nh, true); nhop_set_origin(nh, NH_ORIGIN_REDIRECT); rnd.rnd_nhop = nhop_get_nhop(nh, &error); if (error == 0) { error = rib_add_route_px(fibnum, dst, -1, &rnd, RTM_F_CREATE, &rc); } if (error != 0) { /* TODO: add per-fib redirect stats. */ return (error); } RTSTAT_INC(rts_dynamic); /* Send notification of a route addition to userland. */ struct rt_addrinfo info = { .rti_info[RTAX_DST] = dst, .rti_info[RTAX_GATEWAY] = gateway, .rti_info[RTAX_AUTHOR] = author, }; rt_missmsg_fib(RTM_REDIRECT, &info, flags | RTF_UP, error, fibnum); return (0); } /* * Routing table ioctl interface. */ int rtioctl_fib(u_long req, caddr_t data, u_int fibnum) { /* * If more ioctl commands are added here, make sure the proper * super-user checks are being performed because it is possible for * prison-root to make it this far if raw sockets have been enabled * in jails. */ #ifdef INET /* Multicast goop, grrr... */ return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ } struct ifaddr * ifa_ifwithroute(int flags, const struct sockaddr *dst, const struct sockaddr *gateway, u_int fibnum) { struct ifaddr *ifa; NET_EPOCH_ASSERT(); if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = NULL; if (flags & RTF_HOST) ifa = ifa_ifwithdstaddr(dst, fibnum); if (ifa == NULL) ifa = ifa_ifwithaddr(gateway); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway, fibnum); } if (ifa == NULL) ifa = ifa_ifwithnet(gateway, 0, fibnum); if (ifa == NULL) { struct nhop_object *nh; nh = rib_lookup(fibnum, gateway, NHR_NONE, 0); /* * dismiss a gateway that is reachable only * through the default router */ if ((nh == NULL) || (nh->nh_flags & NHF_DEFAULT)) return (NULL); ifa = nh->nh_ifa; } if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == NULL) ifa = oifa; } return (ifa); } /* * Delete Routes for a Network Interface * * Called for each routing entry via the rnh->rnh_walktree() call above * to delete all route entries referencing a detaching network interface. * * Arguments: * rt pointer to rtentry * nh pointer to nhop * arg argument passed to rnh->rnh_walktree() - detaching interface * * Returns: * 0 successful * errno failed - reason indicated */ static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *nh, void *arg) { struct ifnet *ifp = arg; if (nh->nh_ifp != ifp) return (0); /* * Protect (sorta) against walktree recursion problems * with cloned routes */ if ((rt->rte_flags & RTF_UP) == 0) return (0); return (1); } void rt_flushifroutes(struct ifnet *ifp) { rib_foreach_table_walk_del(AF_UNSPEC, rt_ifdelroute, ifp); } /* * Tries to extract interface from RTAX_IFP passed in rt_addrinfo. * Interface can be specified ether as interface index (sdl_index) or * the interface name (sdl_data). * * Returns found ifp or NULL */ static struct ifnet * info_get_ifp(struct rt_addrinfo *info) { const struct sockaddr_dl *sdl; sdl = (const struct sockaddr_dl *)info->rti_info[RTAX_IFP]; if (sdl->sdl_family != AF_LINK) return (NULL); if (sdl->sdl_index != 0) return (ifnet_byindex(sdl->sdl_index)); if (sdl->sdl_nlen > 0) { char if_name[IF_NAMESIZE]; if (sdl->sdl_nlen + offsetof(struct sockaddr_dl, sdl_data) > sdl->sdl_len) return (NULL); if (sdl->sdl_nlen >= IF_NAMESIZE) return (NULL); bzero(if_name, sizeof(if_name)); memcpy(if_name, sdl->sdl_data, sdl->sdl_nlen); return (ifunit(if_name)); } return (NULL); } /* * Calculates proper ifa/ifp for the cases when gateway AF is different * from dst AF. * * Returns 0 on success. */ __noinline static int rt_getifa_family(struct rt_addrinfo *info, uint32_t fibnum) { if (info->rti_ifp == NULL) { struct ifaddr *ifa = NULL; /* * No transmit interface specified. Guess it by checking gw sa. */ const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; ifa = ifa_ifwithroute(RTF_GATEWAY, gw, gw, fibnum); if (ifa == NULL) return (ENETUNREACH); info->rti_ifp = ifa->ifa_ifp; } /* Prefer address from outgoing interface */ info->rti_ifa = ifaof_ifpforaddr(info->rti_info[RTAX_DST], info->rti_ifp); #ifdef INET if (info->rti_ifa == NULL) { /* Use first found IPv4 address */ bool loopback_ok = info->rti_ifp->if_flags & IFF_LOOPBACK; info->rti_ifa = (struct ifaddr *)in_findlocal(fibnum, loopback_ok); } #endif if (info->rti_ifa == NULL) return (ENETUNREACH); return (0); } /* * Fills in rti_ifp and rti_ifa for the provided fib. * * Assume basic consistency checks are executed by callers: * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well. */ int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) { const struct sockaddr *dst, *gateway, *ifaaddr; int error, flags; dst = info->rti_info[RTAX_DST]; gateway = info->rti_info[RTAX_GATEWAY]; ifaaddr = info->rti_info[RTAX_IFA]; flags = info->rti_flags; /* * ifp may be specified by sockaddr_dl * when protocol address is ambiguous. */ error = 0; /* If we have interface specified by RTAX_IFP address, try to use it */ if ((info->rti_ifp == NULL) && (info->rti_info[RTAX_IFP] != NULL)) info->rti_ifp = info_get_ifp(info); /* * If we have source address specified, try to find it * TODO: avoid enumerating all ifas on all interfaces. */ if (info->rti_ifa == NULL && ifaaddr != NULL) info->rti_ifa = ifa_ifwithaddr(ifaaddr); if ((info->rti_ifa == NULL) && ((info->rti_flags & RTF_GATEWAY) != 0) && (gateway->sa_family != dst->sa_family)) return (rt_getifa_family(info, fibnum)); if (info->rti_ifa == NULL) { const struct sockaddr *sa; /* * Most common use case for the userland-supplied routes. * * Choose sockaddr to select ifa. * -- if ifp is set -- * Order of preference: * 1) IFA address * 2) gateway address * Note: for interface routes link-level gateway address * is specified to indicate the interface index without * specifying RTF_GATEWAY. In this case, ignore gateway * Note: gateway AF may be different from dst AF. In this case, * ignore gateway * 3) final destination. * 4) if all of these fails, try to get at least link-level ifa. * -- else -- * try to lookup gateway or dst in the routing table to get ifa */ if (info->rti_info[RTAX_IFA] != NULL) sa = info->rti_info[RTAX_IFA]; else if ((info->rti_flags & RTF_GATEWAY) != 0 && gateway->sa_family == dst->sa_family) sa = gateway; else sa = dst; if (info->rti_ifp != NULL) { info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); /* Case 4 */ if (info->rti_ifa == NULL && gateway != NULL) info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp); } else if (dst != NULL && gateway != NULL) info->rti_ifa = ifa_ifwithroute(flags, dst, gateway, fibnum); else if (sa != NULL) info->rti_ifa = ifa_ifwithroute(flags, sa, sa, fibnum); } if (info->rti_ifa != NULL) { if (info->rti_ifp == NULL) info->rti_ifp = info->rti_ifa->ifa_ifp; } else error = ENETUNREACH; return (error); } void rt_updatemtu(struct ifnet *ifp) { struct rib_head *rnh; int mtu; int i, j; /* * Try to update rt_mtu for all routes using this interface * Unfortunately the only way to do this is to traverse all * routing tables in all fibs/domains. */ for (i = 1; i <= AF_MAX; i++) { mtu = if_getmtu_family(ifp, i); for (j = 0; j < rt_numfibs; j++) { rnh = rt_tables_get_rnh(j, i); if (rnh == NULL) continue; nhops_update_ifmtu(rnh, ifp, mtu); } } } #if 0 int p_sockaddr(char *buf, int buflen, struct sockaddr *s); int rt_print(char *buf, int buflen, struct rtentry *rt); int p_sockaddr(char *buf, int buflen, struct sockaddr *s) { void *paddr = NULL; switch (s->sa_family) { case AF_INET: paddr = &((struct sockaddr_in *)s)->sin_addr; break; case AF_INET6: paddr = &((struct sockaddr_in6 *)s)->sin6_addr; break; } if (paddr == NULL) return (0); if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) return (0); return (strlen(buf)); } int rt_print(char *buf, int buflen, struct rtentry *rt) { struct sockaddr *addr, *mask; int i = 0; addr = rt_key(rt); mask = rt_mask(rt); i = p_sockaddr(buf, buflen, addr); if (!(rt->rt_flags & RTF_HOST)) { buf[i++] = '/'; i += p_sockaddr(buf + i, buflen - i, mask); } if (rt->rt_flags & RTF_GATEWAY) { buf[i++] = '>'; i += p_sockaddr(buf + i, buflen - i, &rt->rt_nhop->gw_sa); } return (i); } #endif void rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, const struct sockaddr *netmask) { const u_char *cp1 = (const u_char *)src; u_char *cp2 = (u_char *)dst; const u_char *cp3 = (const u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); } /* * Announce interface address arrival/withdraw * Returns 0 on success. */ int rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { #if defined(INET) || defined(INET6) struct sockaddr *sa = ifa->ifa_addr; struct ifnet *ifp = ifa->ifa_ifp; #endif KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT((fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); EVENTHANDLER_DIRECT_INVOKE(rt_addrmsg, ifa, cmd); #ifdef INET if (sa->sa_family == AF_INET) { char addrstr[INET_ADDRSTRLEN]; char strbuf[INET_ADDRSTRLEN + 12]; inet_ntoa_r(((struct sockaddr_in *)sa)->sin_addr, addrstr); snprintf(strbuf, sizeof(strbuf), "address=%s", addrstr); devctl_notify("IFNET", ifp->if_xname, (cmd == RTM_ADD) ? "ADDR_ADD" : "ADDR_DEL", strbuf); } #endif #ifdef INET6 if (sa->sa_family == AF_INET6) { char addrstr[INET6_ADDRSTRLEN]; char strbuf[INET6_ADDRSTRLEN + 12]; ip6_sprintf(addrstr, IFA_IN6(ifa)); snprintf(strbuf, sizeof(strbuf), "address=%s", addrstr); devctl_notify("IFNET", ifp->if_xname, (cmd == RTM_ADD) ? "ADDR_ADD" : "ADDR_DEL", strbuf); } #endif if (V_rt_add_addr_allfibs) fibnum = RT_ALL_FIBS; return (rtsock_addrmsg(cmd, ifa, fibnum)); } /* * Announce kernel-originated route addition/removal to rtsock based on @rt data. * cmd: RTM_ cmd * @rt: valid rtentry * @nh: nhop object to announce * @fibnum: fib id or RT_ALL_FIBS * * Returns 0 on success. */ int rt_routemsg(int cmd, struct rtentry *rt, struct nhop_object *nh, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); return (rtsock_routemsg(cmd, rt, nh, fibnum)); } /* * Announce kernel-originated route addition/removal to rtsock based on @rt data. * cmd: RTM_ cmd * @info: addrinfo structure with valid data. * @fibnum: fib id or RT_ALL_FIBS * * Returns 0 on success. */ int rt_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE || cmd == RTM_CHANGE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); KASSERT(info->rti_info[RTAX_DST] != NULL, (":%s: RTAX_DST must be supplied", __func__)); return (rtsock_routemsg_info(cmd, info, fibnum)); } +void +rt_ifmsg(struct ifnet *ifp, int if_flags_mask) +{ + rtsock_callback_p->ifmsg_f(ifp, if_flags_mask); + netlink_callback_p->ifmsg_f(ifp, if_flags_mask); +} + /* Netlink-related callbacks needed to glue rtsock, netlink and linuxolator */ static void ignore_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) { } -static struct rtbridge ignore_cb = { .route_f = ignore_route_event }; + +static void +ignore_ifmsg_event(struct ifnet *ifp, int if_flags_mask) +{ +} + +static struct rtbridge ignore_cb = { + .route_f = ignore_route_event, + .ifmsg_f = ignore_ifmsg_event, +}; void *linux_netlink_p = NULL; /* Callback pointer for Linux translator functions */ struct rtbridge *rtsock_callback_p = &ignore_cb; struct rtbridge *netlink_callback_p = &ignore_cb; diff --git a/sys/net/route.h b/sys/net/route.h index 87cbbbf1d83d..34a7b0d5c795 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -1,448 +1,448 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ #include /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ /* * Struct route consiste of a destination address, * a route entry pointer, link-layer prepend data pointer along * with its length. */ struct route { struct nhop_object *ro_nh; struct llentry *ro_lle; /* * ro_prepend and ro_plen are only used for bpf to pass in a * preformed header. They are not cacheable. */ char *ro_prepend; uint16_t ro_plen; uint16_t ro_flags; uint16_t ro_mtu; /* saved ro_rt mtu */ uint16_t spare; struct sockaddr ro_dst; }; #define RT_L2_ME_BIT 2 /* dst L2 addr is our address */ #define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */ #define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */ #define RT_L2_ME (1 << RT_L2_ME_BIT) /* 0x0004 */ #define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) /* 0x0008 */ #define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) /* 0x0010 */ #define RT_REJECT 0x0020 /* Destination is reject */ #define RT_BLACKHOLE 0x0040 /* Destination is blackhole */ #define RT_HAS_GW 0x0080 /* Destination has GW */ #define RT_LLE_CACHE 0x0100 /* Cache link layer */ struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ u_long rmx_hopcount; /* max hops expected */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_ssthresh; /* outbound gateway buffer limit */ u_long rmx_rtt; /* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ u_long rmx_pksent; /* packets sent using this route */ u_long rmx_weight; /* route weight */ u_long rmx_nhidx; /* route nexhop index */ u_long rmx_filler[2]; /* will be used for T/TCP later */ }; /* * rmx_rtt and rmx_rttvar are stored as microseconds; */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ /* lle state is exported in rmx_state rt_metrics field */ #define rmx_state rmx_weight /* default route weight */ #define RT_DEFAULT_WEIGHT 1 #define RT_MAX_WEIGHT 16777215 /* 3 bytes */ /* * Keep a generation count of routing table, incremented on route addition, * so we can invalidate caches. This is accessed without a lock, as precision * is not required. */ typedef volatile u_int rt_gen_t; /* tree generation (for adds) */ #define RT_GEN(fibnum, af) rt_tables_get_gen(fibnum, af) #define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ #define RT_ALL_FIBS -1 /* Announce event for every fib */ #ifdef _KERNEL VNET_DECLARE(uint32_t, _rt_numfibs); /* number of existing route tables */ #define V_rt_numfibs VNET(_rt_numfibs) /* temporary compat arg */ #define rt_numfibs V_rt_numfibs VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs) /* Calculate flowid for locally-originated packets */ #define V_fib_hash_outbound VNET(fib_hash_outbound) VNET_DECLARE(u_int, fib_hash_outbound); /* Outbound flowid generation rules */ #ifdef RSS #define fib4_calc_packet_hash xps_proto_software_hash_v4 #define fib6_calc_packet_hash xps_proto_software_hash_v6 #define CALC_FLOWID_OUTBOUND_SENDTO true #ifdef ROUTE_MPATH #define CALC_FLOWID_OUTBOUND V_fib_hash_outbound #else #define CALC_FLOWID_OUTBOUND false #endif #else /* !RSS */ #define fib4_calc_packet_hash fib4_calc_software_hash #define fib6_calc_packet_hash fib6_calc_software_hash #ifdef ROUTE_MPATH #define CALC_FLOWID_OUTBOUND_SENDTO V_fib_hash_outbound #define CALC_FLOWID_OUTBOUND V_fib_hash_outbound #else #define CALC_FLOWID_OUTBOUND_SENDTO false #define CALC_FLOWID_OUTBOUND false #endif #endif /* RSS */ #endif /* _KERNEL */ /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ /* 0x80 unused, was RTF_DELCLONE */ /* 0x100 unused, was RTF_CLONING */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ #define RTF_LLINFO 0x400 /* DEPRECATED - exists ONLY for backward compatibility */ #define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ /* 0x10000 unused, was RTF_PRCLONING */ /* 0x20000 unused, was RTF_WASCLONED */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ #define RTF_FIXEDMTU 0x80000 /* MTU was explicitly specified */ #define RTF_PINNED 0x100000 /* route is immutable */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ /* 0x8000000 and up unassigned */ #define RTF_STICKY 0x10000000 /* always route dst->src */ /* 0x40000000 unused, was RTF_RNH_LOCKED */ #define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting with existing routing apps */ /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */ #define RTF_FMASK \ (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \ RTF_REJECT | RTF_STATIC | RTF_STICKY) /* * fib_ nexthop API flags. */ /* Consumer-visible nexthop info flags */ #define NHF_MULTIPATH 0x0008 /* Nexhop is a nexthop group */ #define NHF_REJECT 0x0010 /* RTF_REJECT */ #define NHF_BLACKHOLE 0x0020 /* RTF_BLACKHOLE */ #define NHF_REDIRECT 0x0040 /* RTF_DYNAMIC|RTF_MODIFIED */ #define NHF_DEFAULT 0x0080 /* Default route */ #define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */ #define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */ #define NHF_HOST 0x0400 /* RTF_HOST */ /* Nexthop request flags */ #define NHR_NONE 0x00 /* empty flags field */ #define NHR_REF 0x01 /* reference nexhop */ #define NHR_NODEFAULT 0x02 /* uRPF: do not consider default route */ /* Control plane route request flags */ #define NHR_COPY 0x100 /* Copy rte data */ #define NHR_UNLOCKED 0x200 /* Do not lock table */ /* * Routing statistics. */ struct rtstat { uint64_t rts_badredirect; /* bogus redirect calls */ uint64_t rts_dynamic; /* routes created by redirects */ uint64_t rts_newgateway; /* routes modified by redirects */ uint64_t rts_unreach; /* lookups which failed */ uint64_t rts_wildcard; /* lookups satisfied by a wildcard */ uint64_t rts_nh_idx_alloc_failure; /* nexthop index alloc failure*/ uint64_t rts_nh_alloc_failure; /* nexthop allocation failure*/ uint64_t rts_add_failure; /* # of route addition failures */ uint64_t rts_add_retry; /* # of route addition retries */ uint64_t rts_del_failure; /* # of route deletion failure */ uint64_t rts_del_retry; /* # of route deletion retries */ }; /* * Structures for routing messages. */ struct rt_msghdr { u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_index; /* index for associated ifp */ u_short _rtm_spare1; int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_fmask; /* bitmask used in RTM_CHANGE message */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; #define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* * Message types. * * The format for each message is annotated below using the following * identifiers: * * (1) struct rt_msghdr * (2) struct ifa_msghdr * (3) struct if_msghdr * (4) struct ifma_msghdr * (5) struct if_announcemsghdr * */ #define RTM_ADD 0x1 /* (1) Add Route */ #define RTM_DELETE 0x2 /* (1) Delete Route */ #define RTM_CHANGE 0x3 /* (1) Change Metrics or flags */ #define RTM_GET 0x4 /* (1) Report Metrics */ #define RTM_LOSING 0x5 /* (1) Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* (1) Told to use different route */ #define RTM_MISS 0x7 /* (1) Lookup failed on this address */ #define RTM_LOCK 0x8 /* (1) fix specified metrics */ /* 0x9 */ /* 0xa */ #define RTM_RESOLVE 0xb /* (1) req to resolve dst to LL addr */ #define RTM_NEWADDR 0xc /* (2) address being added to iface */ #define RTM_DELADDR 0xd /* (2) address being removed from iface */ #define RTM_IFINFO 0xe /* (3) iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* (4) mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* (4) mcast group membership being deleted */ #define RTM_IFANNOUNCE 0x11 /* (5) iface arrival/departure */ #define RTM_IEEE80211 0x12 /* (5) IEEE80211 wireless event */ /* * Bitmask values for rtm_inits and rmx_locks. */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ #define RTV_WEIGHT 0x100 /* init or lock _weight */ /* * Bitmask values for rtm_addrs. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_MAX 8 /* size of array to allocate */ struct rtentry; struct nhop_object; typedef int rib_filter_f_t(const struct rtentry *, const struct nhop_object *, void *); struct rt_addrinfo { int rti_addrs; /* Route RTF_ flags */ int rti_flags; /* Route RTF_ flags */ struct sockaddr *rti_info[RTAX_MAX]; /* Sockaddr data */ struct ifaddr *rti_ifa; /* value of rt_ifa addr */ struct ifnet *rti_ifp; /* route interface */ rib_filter_f_t *rti_filter; /* filter function */ void *rti_filterdata; /* filter parameters */ u_long rti_mflags; /* metrics RTV_ flags */ u_long rti_spare; /* Will be used for fib */ struct rt_metrics *rti_rmx; /* Pointer to route metrics */ }; /* * This macro returns the size of a struct sockaddr when passed * through a routing socket. Basically we round up sa_len to * a multiple of sizeof(long), with a minimum of sizeof(long). * The case sa_len == 0 should only apply to empty structures. */ #define SA_SIZE(sa) \ ( (((struct sockaddr *)(sa))->sa_len == 0) ? \ sizeof(long) : \ 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) ) #define sa_equal(a, b) ( \ (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \ (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0)) #ifdef _KERNEL #define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \ || (ifp)->if_link_state == LINK_STATE_UP) #define RO_NHFREE(_ro) do { \ if ((_ro)->ro_nh) { \ NH_FREE((_ro)->ro_nh); \ (_ro)->ro_nh = NULL; \ } \ } while (0) #define RO_INVALIDATE_CACHE(ro) do { \ if ((ro)->ro_lle != NULL) { \ LLE_FREE((ro)->ro_lle); \ (ro)->ro_lle = NULL; \ } \ if ((ro)->ro_nh != NULL) { \ NH_FREE((ro)->ro_nh); \ (ro)->ro_nh = NULL; \ } \ } while (0) #define RO_GET_FAMILY(ro, dst) ((ro) != NULL && \ (ro)->ro_flags & RT_HAS_GW \ ? (ro)->ro_dst.sa_family : (dst)->sa_family) /* * Validate a cached route based on a supplied cookie. If there is an * out-of-date cache, simply free it. Update the generation number * for the new allocation */ #define NH_VALIDATE(ro, cookiep, fibnum) do { \ rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family); \ if (*(cookiep) != cookie) { \ RO_INVALIDATE_CACHE(ro); \ *(cookiep) = cookie; \ } \ } while (0) struct ifmultiaddr; struct rib_head; void rt_ieee80211msg(struct ifnet *, int, void *, size_t); -void rt_ifmsg(struct ifnet *); +void rt_ifmsg(struct ifnet *, int); void rt_missmsg(int, struct rt_addrinfo *, int, int); void rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int); int rt_addrmsg(int, struct ifaddr *, int); int rt_routemsg(int, struct rtentry *, struct nhop_object *, int); int rt_routemsg_info(int, struct rt_addrinfo *, int); void rt_newmaddrmsg(int, struct ifmultiaddr *); void rt_maskedcopy(const struct sockaddr *, struct sockaddr *, const struct sockaddr *); struct rib_head *rt_table_init(int, int, u_int); void rt_table_destroy(struct rib_head *); u_int rt_tables_get_gen(uint32_t table, sa_family_t family); struct sockaddr *rtsock_fix_netmask(const struct sockaddr *dst, const struct sockaddr *smask, struct sockaddr_storage *dmask); void rt_updatemtu(struct ifnet *); void rt_flushifroutes(struct ifnet *ifp); /* XXX MRT NEW VERSIONS THAT USE FIBs * For now the protocol indepedent versions are the same as the AF_INET ones * but this will change.. */ int rtioctl_fib(u_long, caddr_t, u_int); /* New API */ void rib_flush_routes_family(int family); struct nhop_object *rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, uint32_t flowid); const char *rib_print_family(int family); #endif #endif diff --git a/sys/net/route/route_ctl.h b/sys/net/route/route_ctl.h index d150da6264d4..e8560e681ddb 100644 --- a/sys/net/route/route_ctl.h +++ b/sys/net/route/route_ctl.h @@ -1,199 +1,201 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2020 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This header file contains public functions and structures used for * routing table manipulations. */ #ifndef _NET_ROUTE_ROUTE_CTL_H_ #define _NET_ROUTE_ROUTE_CTL_H_ struct rib_cmd_info { uint8_t rc_cmd; /* RTM_ADD|RTM_DEL|RTM_CHANGE */ uint8_t spare[3]; uint32_t rc_nh_weight; /* new nhop weight */ struct rtentry *rc_rt; /* Target entry */ struct nhop_object *rc_nh_old; /* Target nhop OR mpath */ struct nhop_object *rc_nh_new; /* Target nhop OR mpath */ }; struct route_nhop_data { union { struct nhop_object *rnd_nhop; struct nhgrp_object *rnd_nhgrp; }; uint32_t rnd_weight; }; int rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc); int rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, rib_filter_f_t *filter_func, void *filter_arg, int op_flags, struct rib_cmd_info *rc); int rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc); /* operation flags */ #define RTM_F_CREATE 0x01 #define RTM_F_EXCL 0x02 #define RTM_F_REPLACE 0x04 #define RTM_F_APPEND 0x08 #define RTM_F_FORCE 0x10 int rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc); int rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc); int rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc); int rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, struct rib_cmd_info *rc); int rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *_data); int rib_handle_ifaddr_info(uint32_t fibnum, int cmd, struct rt_addrinfo *info); int rib_add_default_route(uint32_t fibnum, int family, struct ifnet *ifp, struct sockaddr *gw, struct rib_cmd_info *rc); typedef void route_notification_t(const struct rib_cmd_info *rc, void *); void rib_decompose_notification(const struct rib_cmd_info *rc, route_notification_t *cb, void *cbdata); int rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *author, struct ifnet *ifp, int flags, int expire_sec); /* common flags for the functions below */ #define RIB_FLAG_WLOCK 0x01 /* Need exclusive rnh lock */ #define RIB_FLAG_LOCKED 0x02 /* Do not explicitly acquire rnh lock */ enum rib_walk_hook { RIB_WALK_HOOK_PRE, /* Hook is called before iteration */ RIB_WALK_HOOK_POST, /* Hook is called after iteration */ }; typedef int rib_walktree_f_t(struct rtentry *, void *); typedef void rib_walk_hook_f_t(struct rib_head *rnh, enum rib_walk_hook stage, void *arg); void rib_walk(uint32_t fibnum, int af, bool wlock, rib_walktree_f_t *wa_f, void *arg); void rib_walk_ext(uint32_t fibnum, int af, bool wlock, rib_walktree_f_t *wa_f, rib_walk_hook_f_t *hook_f, void *arg); void rib_walk_ext_internal(struct rib_head *rnh, bool wlock, rib_walktree_f_t *wa_f, rib_walk_hook_f_t *hook_f, void *arg); void rib_walk_ext_locked(struct rib_head *rnh, rib_walktree_f_t *wa_f, rib_walk_hook_f_t *hook_f, void *arg); void rib_walk_from(uint32_t fibnum, int family, uint32_t flags, struct sockaddr *prefix, struct sockaddr *mask, rib_walktree_f_t *wa_f, void *arg); void rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, bool report); void rib_foreach_table_walk(int family, bool wlock, rib_walktree_f_t *wa_f, rib_walk_hook_f_t *hook_f, void *arg); void rib_foreach_table_walk_del(int family, rib_filter_f_t *filter_f, void *arg); struct nhop_object; struct nhgrp_object; const struct rtentry *rib_lookup_prefix(uint32_t fibnum, int family, const struct sockaddr *dst, const struct sockaddr *netmask, struct route_nhop_data *rnd); const struct rtentry *rib_lookup_lpm(uint32_t fibnum, int family, const struct sockaddr *dst, struct route_nhop_data *rnd); /* rtentry accessors */ bool rt_is_host(const struct rtentry *rt); sa_family_t rt_get_family(const struct rtentry *); struct nhop_object *rt_get_raw_nhop(const struct rtentry *rt); void rt_get_rnd(const struct rtentry *rt, struct route_nhop_data *rnd); #ifdef INET struct in_addr; void rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, int *plen, uint32_t *pscopeid); void rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, struct in_addr *pmask, uint32_t *pscopeid); struct rtentry *rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen); #endif #ifdef INET6 struct in6_addr; void rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, int *plen, uint32_t *pscopeid); void rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, struct in6_addr *pmask, uint32_t *pscopeid); struct rtentry *rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen); struct in6_addr; void ip6_writemask(struct in6_addr *addr6, uint8_t mask); #endif /* Nexthops */ uint32_t nhops_get_count(struct rib_head *rh); /* Multipath */ struct weightened_nhop; const struct weightened_nhop *nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops); uint32_t nhgrp_get_count(struct rib_head *rh); int nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops, uint32_t uidx, struct nhgrp_object **pnhg); /* Route subscriptions */ enum rib_subscription_type { RIB_NOTIFY_IMMEDIATE, RIB_NOTIFY_DELAYED }; struct rib_subscription; typedef void rib_subscription_cb_t(struct rib_head *rnh, struct rib_cmd_info *rc, void *arg); struct rib_subscription *rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, enum rib_subscription_type type, bool waitok); struct rib_subscription *rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, enum rib_subscription_type type, bool waitok); struct rib_subscription *rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, enum rib_subscription_type type); void rib_unsubscribe(struct rib_subscription *rs); void rib_unsubscribe_locked(struct rib_subscription *rs); void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, struct rib_cmd_info *rc); /* Event bridge */ typedef void route_event_f(uint32_t fibnum, const struct rib_cmd_info *rc); +typedef void ifmsg_event_f(struct ifnet *ifp, int if_flags_mask); struct rtbridge{ route_event_f *route_f; + ifmsg_event_f *ifmsg_f; }; extern struct rtbridge *rtsock_callback_p; extern struct rtbridge *netlink_callback_p; #endif diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 99d962c972cb..a67643e724db 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1,2711 +1,2715 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD$ */ #include "opt_ddb.h" #include "opt_route.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #include #define DEBUG_MOD_NAME rtsock #define DEBUG_MAX_LEVEL LOG_DEBUG #include _DECLARE_DEBUG(LOG_INFO); #ifdef COMPAT_FREEBSD32 #include #include struct if_msghdr32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; uint16_t _ifm_spare1; struct if_data ifm_data; }; struct if_msghdrl32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; uint16_t _ifm_spare1; uint16_t ifm_len; uint16_t ifm_data_off; uint32_t _ifm_spare2; struct if_data ifm_data; }; struct ifa_msghdrl32 { uint16_t ifam_msglen; uint8_t ifam_version; uint8_t ifam_type; int32_t ifam_addrs; int32_t ifam_flags; uint16_t ifam_index; uint16_t _ifam_spare1; uint16_t ifam_len; uint16_t ifam_data_off; int32_t ifam_metric; struct if_data ifam_data; }; #define SA_SIZE32(sa) \ ( (((struct sockaddr *)(sa))->sa_len == 0) ? \ sizeof(int) : \ 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) ) #endif /* COMPAT_FREEBSD32 */ struct linear_buffer { char *base; /* Base allocated memory pointer */ uint32_t offset; /* Currently used offset */ uint32_t size; /* Total buffer size */ }; #define SCRATCH_BUFFER_SIZE 1024 #define RTS_PID_LOG(_l, _fmt, ...) RT_LOG_##_l(_l, "PID %d: " _fmt, curproc ? curproc->p_pid : 0, ## __VA_ARGS__) MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; /* These are external hooks for CARP. */ int (*carp_get_vhid_p)(struct ifaddr *); /* * Used by rtsock callback code to decide whether to filter the update * notification to a socket bound to a particular FIB. */ #define RTS_FILTER_FIB M_PROTO8 /* * Used to store address family of the notification. */ #define m_rtsock_family m_pkthdr.PH_loc.eight[0] struct rcb { LIST_ENTRY(rcb) list; struct socket *rcb_socket; sa_family_t rcb_family; }; typedef struct { LIST_HEAD(, rcb) cblist; int ip_count; /* attached w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ int any_count; /* total attached */ } route_cb_t; VNET_DEFINE_STATIC(route_cb_t, route_cb); #define V_route_cb VNET(route_cb) struct mtx rtsock_mtx; MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); struct walkarg { int family; int w_tmemsize; int w_op, w_arg; caddr_t w_tmem; struct sysctl_req *w_req; struct sockaddr *dst; struct sockaddr *mask; }; static void rts_input(struct mbuf *m); static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo); static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen); static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo); static int cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb); static int sysctl_dumpentry(struct rtentry *rt, void *vw); static int sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh, uint32_t weight, struct walkarg *w); static int sysctl_iflist(int af, struct walkarg *w); static int sysctl_ifmalist(int af, struct walkarg *w); static void rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh, struct rt_metrics *out); static void rt_dispatch(struct mbuf *, sa_family_t); static void rt_ifannouncemsg(struct ifnet *ifp, int what); static int handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, struct rt_msghdr *rtm, struct rib_cmd_info *rc); static int update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm, int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh); static void send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m, sa_family_t saf, u_int fibnum, int rtm_errno); static bool can_export_rte(struct ucred *td_ucred, bool rt_is_host, const struct sockaddr *rt_dst); static void rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc); +static void rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask); static struct netisr_handler rtsock_nh = { .nh_name = "rtsock", .nh_handler = rts_input, .nh_proto = NETISR_ROUTE, .nh_policy = NETISR_POLICY_SOURCE, }; static int sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&rtsock_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&rtsock_nh, qlimit)); } SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_route_netisr_maxqlen, "I", "maximum routing socket dispatch queue length"); static void vnet_rts_init(void) { int tmp; if (IS_DEFAULT_VNET(curvnet)) { if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) rtsock_nh.nh_qlimit = tmp; netisr_register(&rtsock_nh); } #ifdef VIMAGE else netisr_register_vnet(&rtsock_nh); #endif } VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, vnet_rts_init, 0); #ifdef VIMAGE static void vnet_rts_uninit(void) { netisr_unregister_vnet(&rtsock_nh); } VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, vnet_rts_uninit, 0); #endif static void report_route_event(const struct rib_cmd_info *rc, void *_cbdata) { uint32_t fibnum = (uint32_t)(uintptr_t)_cbdata; struct nhop_object *nh; nh = rc->rc_cmd == RTM_DELETE ? rc->rc_nh_old : rc->rc_nh_new; rt_routemsg(rc->rc_cmd, rc->rc_rt, nh, fibnum); } static void rts_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) { #ifdef ROUTE_MPATH if ((rc->rc_nh_new && NH_IS_NHGRP(rc->rc_nh_new)) || (rc->rc_nh_old && NH_IS_NHGRP(rc->rc_nh_old))) { rib_decompose_notification(rc, report_route_event, (void *)(uintptr_t)fibnum); } else #endif report_route_event(rc, (void *)(uintptr_t)fibnum); } -static struct rtbridge rtsbridge = { .route_f = rts_handle_route_event }; +static struct rtbridge rtsbridge = { + .route_f = rts_handle_route_event, + .ifmsg_f = rtsock_ifmsg, +}; static struct rtbridge *rtsbridge_orig_p; static void rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc) { netlink_callback_p->route_f(fibnum, rc); } static void rtsock_init(void) { rtsbridge_orig_p = rtsock_callback_p; rtsock_callback_p = &rtsbridge; } SYSINIT(rtsock_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtsock_init, NULL); static void rts_handle_ifnet_arrival(void *arg __unused, struct ifnet *ifp) { rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } EVENTHANDLER_DEFINE(ifnet_arrival_event, rts_handle_ifnet_arrival, NULL, 0); static void rts_handle_ifnet_departure(void *arg __unused, struct ifnet *ifp) { rt_ifannouncemsg(ifp, IFAN_DEPARTURE); } EVENTHANDLER_DEFINE(ifnet_departure_event, rts_handle_ifnet_departure, NULL, 0); static void rts_append_data(struct socket *so, struct mbuf *m) { if (sbappendaddr(&so->so_rcv, &route_src, m, NULL) == 0) { soroverflow(so); m_freem(m); } else sorwakeup(so); } static void rts_input(struct mbuf *m) { struct rcb *rcb; struct socket *last; last = NULL; RTSOCK_LOCK(); LIST_FOREACH(rcb, &V_route_cb.cblist, list) { if (rcb->rcb_family != AF_UNSPEC && rcb->rcb_family != m->m_rtsock_family) continue; if ((m->m_flags & RTS_FILTER_FIB) && M_GETFIB(m) != rcb->rcb_socket->so_fibnum) continue; if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) rts_append_data(last, n); } last = rcb->rcb_socket; } if (last != NULL) rts_append_data(last, m); else m_freem(m); RTSOCK_UNLOCK(); } static void rts_close(struct socket *so) { soisdisconnected(so); } static SYSCTL_NODE(_net, OID_AUTO, rtsock, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Routing socket infrastructure"); static u_long rts_sendspace = 8192; SYSCTL_ULONG(_net_rtsock, OID_AUTO, sendspace, CTLFLAG_RW, &rts_sendspace, 0, "Default routing socket send space"); static u_long rts_recvspace = 8192; SYSCTL_ULONG(_net_rtsock, OID_AUTO, recvspace, CTLFLAG_RW, &rts_recvspace, 0, "Default routing socket receive space"); static int rts_attach(struct socket *so, int proto, struct thread *td) { struct rcb *rcb; int error; error = soreserve(so, rts_sendspace, rts_recvspace); if (error) return (error); rcb = malloc(sizeof(*rcb), M_PCB, M_WAITOK); rcb->rcb_socket = so; rcb->rcb_family = proto; so->so_pcb = rcb; so->so_fibnum = td->td_proc->p_fibnum; so->so_options |= SO_USELOOPBACK; RTSOCK_LOCK(); LIST_INSERT_HEAD(&V_route_cb.cblist, rcb, list); switch (proto) { case AF_INET: V_route_cb.ip_count++; break; case AF_INET6: V_route_cb.ip6_count++; break; } V_route_cb.any_count++; RTSOCK_UNLOCK(); soisconnected(so); return (0); } static void rts_detach(struct socket *so) { struct rcb *rcb = so->so_pcb; RTSOCK_LOCK(); LIST_REMOVE(rcb, list); switch(rcb->rcb_family) { case AF_INET: V_route_cb.ip_count--; break; case AF_INET6: V_route_cb.ip6_count--; break; } V_route_cb.any_count--; RTSOCK_UNLOCK(); free(rcb, M_PCB); so->so_pcb = NULL; } static int rts_shutdown(struct socket *so) { socantsendmore(so); return (0); } #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED /* * The union of all possible address formats we handle. */ union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ static int rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred) { #if defined(INET) || defined(INET6) struct epoch_tracker et; #endif /* First, see if the returned address is part of the jail. */ if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) { info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr; return (0); } switch (info->rti_info[RTAX_DST]->sa_family) { #ifdef INET case AF_INET: { struct in_addr ia; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; ia = ((struct sockaddr_in *)sa)->sin_addr; if (prison_check_ip4(cred, &ia) == 0) { found = 1; break; } } NET_EPOCH_EXIT(et); if (!found) { /* * As a last resort return the 'default' jail address. */ ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)-> sin_addr; if (prison_get_ip4(cred, &ia) != 0) return (ESRCH); } bzero(&saun->sin, sizeof(struct sockaddr_in)); saun->sin.sin_len = sizeof(struct sockaddr_in); saun->sin.sin_family = AF_INET; saun->sin.sin_addr.s_addr = ia.s_addr; info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin; break; } #endif #ifdef INET6 case AF_INET6: { struct in6_addr ia6; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET6) continue; bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr, &ia6, sizeof(struct in6_addr)); if (prison_check_ip6(cred, &ia6) == 0) { found = 1; break; } } NET_EPOCH_EXIT(et); if (!found) { /* * As a last resort return the 'default' jail address. */ ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)-> sin6_addr; if (prison_get_ip6(cred, &ia6) != 0) return (ESRCH); } bzero(&saun->sin6, sizeof(struct sockaddr_in6)); saun->sin6.sin6_len = sizeof(struct sockaddr_in6); saun->sin6.sin6_family = AF_INET6; bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr)); if (sa6_recoverscope(&saun->sin6) != 0) return (ESRCH); info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6; break; } #endif default: return (ESRCH); } return (0); } static int fill_blackholeinfo(struct rt_addrinfo *info, union sockaddr_union *saun) { struct ifaddr *ifa; sa_family_t saf; if (V_loif == NULL) { RTS_PID_LOG(LOG_INFO, "Unable to add blackhole/reject nhop without loopback"); return (ENOTSUP); } info->rti_ifp = V_loif; saf = info->rti_info[RTAX_DST]->sa_family; CK_STAILQ_FOREACH(ifa, &info->rti_ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == saf) { info->rti_ifa = ifa; break; } } if (info->rti_ifa == NULL) { RTS_PID_LOG(LOG_INFO, "Unable to find ifa for blackhole/reject nhop"); return (ENOTSUP); } bzero(saun, sizeof(union sockaddr_union)); switch (saf) { #ifdef INET case AF_INET: saun->sin.sin_family = AF_INET; saun->sin.sin_len = sizeof(struct sockaddr_in); saun->sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); break; #endif #ifdef INET6 case AF_INET6: saun->sin6.sin6_family = AF_INET6; saun->sin6.sin6_len = sizeof(struct sockaddr_in6); saun->sin6.sin6_addr = in6addr_loopback; break; #endif default: RTS_PID_LOG(LOG_INFO, "unsupported family: %d", saf); return (ENOTSUP); } info->rti_info[RTAX_GATEWAY] = &saun->sa; info->rti_flags |= RTF_GATEWAY; return (0); } /* * Fills in @info based on userland-provided @rtm message. * * Returns 0 on success. */ static int fill_addrinfo(struct rt_msghdr *rtm, int len, struct linear_buffer *lb, u_int fibnum, struct rt_addrinfo *info) { int error; rtm->rtm_pid = curproc->p_pid; info->rti_addrs = rtm->rtm_addrs; info->rti_mflags = rtm->rtm_inits; info->rti_rmx = &rtm->rtm_rmx; /* * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6 * link-local address because rtrequest requires addresses with * embedded scope id. */ if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, info)) return (EINVAL); info->rti_flags = rtm->rtm_flags; error = cleanup_xaddrs(info, lb); if (error != 0) return (error); /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET) { error = priv_check(curthread, PRIV_NET_ROUTE); if (error != 0) return (error); } /* * The given gateway address may be an interface address. * For example, issuing a "route change" command on a route * entry that was created from a tunnel, and the gateway * address given is the local end point. In this case the * RTF_GATEWAY flag must be cleared or the destination will * not be reachable even though there is no error message. */ if (info->rti_info[RTAX_GATEWAY] != NULL && info->rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) { struct nhop_object *nh; /* * A host route through the loopback interface is * installed for each interface adddress. In pre 8.0 * releases the interface address of a PPP link type * is not reachable locally. This behavior is fixed as * part of the new L2/L3 redesign and rewrite work. The * signature of this interface address route is the * AF_LINK sa_family type of the gateway, and the * rt_ifp has the IFF_LOOPBACK flag set. */ nh = rib_lookup(fibnum, info->rti_info[RTAX_GATEWAY], NHR_NONE, 0); if (nh != NULL && nh->gw_sa.sa_family == AF_LINK && nh->nh_ifp->if_flags & IFF_LOOPBACK) { info->rti_flags &= ~RTF_GATEWAY; info->rti_flags |= RTF_GWFLAG_COMPAT; } } return (0); } static struct nhop_object * select_nhop(struct nhop_object *nh, const struct sockaddr *gw) { if (!NH_IS_NHGRP(nh)) return (nh); #ifdef ROUTE_MPATH const struct weightened_nhop *wn; uint32_t num_nhops; wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops); if (gw == NULL) return (wn[0].nh); for (int i = 0; i < num_nhops; i++) { if (match_nhop_gw(wn[i].nh, gw)) return (wn[i].nh); } #endif return (NULL); } /* * Handles RTM_GET message from routing socket, returning matching rt. * * Returns: * 0 on success, with locked and referenced matching rt in @rt_nrt * errno of failure */ static int handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, struct rt_msghdr *rtm, struct rib_cmd_info *rc) { RIB_RLOCK_TRACKER; struct rib_head *rnh; struct nhop_object *nh; sa_family_t saf; saf = info->rti_info[RTAX_DST]->sa_family; rnh = rt_tables_get_rnh(fibnum, saf); if (rnh == NULL) return (EAFNOSUPPORT); RIB_RLOCK(rnh); /* * By (implicit) convention host route (one without netmask) * means longest-prefix-match request and the route with netmask * means exact-match lookup. * As cleanup_xaddrs() cleans up info flags&addrs for the /32,/128 * prefixes, use original data to check for the netmask presence. */ if ((rtm->rtm_addrs & RTA_NETMASK) == 0) { /* * Provide longest prefix match for * address lookup (no mask). * 'route -n get addr' */ rc->rc_rt = (struct rtentry *) rnh->rnh_matchaddr( info->rti_info[RTAX_DST], &rnh->head); } else rc->rc_rt = (struct rtentry *) rnh->rnh_lookup( info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], &rnh->head); if (rc->rc_rt == NULL) { RIB_RUNLOCK(rnh); return (ESRCH); } nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]); if (nh == NULL) { RIB_RUNLOCK(rnh); return (ESRCH); } /* * If performing proxied L2 entry insertion, and * the actual PPP host entry is found, perform * another search to retrieve the prefix route of * the local end point of the PPP link. * TODO: move this logic to userland. */ if (rtm->rtm_flags & RTF_ANNOUNCE) { struct sockaddr_storage laddr; if (nh->nh_ifp != NULL && nh->nh_ifp->if_type == IFT_PROPVIRTUAL) { struct ifaddr *ifa; ifa = ifa_ifwithnet(info->rti_info[RTAX_DST], 1, RT_ALL_FIBS); if (ifa != NULL) rt_maskedcopy(ifa->ifa_addr, (struct sockaddr *)&laddr, ifa->ifa_netmask); } else rt_maskedcopy(nh->nh_ifa->ifa_addr, (struct sockaddr *)&laddr, nh->nh_ifa->ifa_netmask); /* * refactor rt and no lock operation necessary */ rc->rc_rt = (struct rtentry *)rnh->rnh_matchaddr( (struct sockaddr *)&laddr, &rnh->head); if (rc->rc_rt == NULL) { RIB_RUNLOCK(rnh); return (ESRCH); } nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]); if (nh == NULL) { RIB_RUNLOCK(rnh); return (ESRCH); } } rc->rc_nh_new = nh; rc->rc_nh_weight = rc->rc_rt->rt_weight; RIB_RUNLOCK(rnh); return (0); } static void init_sockaddrs_family(int family, struct sockaddr *dst, struct sockaddr *mask) { #ifdef INET if (family == AF_INET) { struct sockaddr_in *dst4 = (struct sockaddr_in *)dst; struct sockaddr_in *mask4 = (struct sockaddr_in *)mask; bzero(dst4, sizeof(struct sockaddr_in)); bzero(mask4, sizeof(struct sockaddr_in)); dst4->sin_family = AF_INET; dst4->sin_len = sizeof(struct sockaddr_in); mask4->sin_family = AF_INET; mask4->sin_len = sizeof(struct sockaddr_in); } #endif #ifdef INET6 if (family == AF_INET6) { struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst; struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask; bzero(dst6, sizeof(struct sockaddr_in6)); bzero(mask6, sizeof(struct sockaddr_in6)); dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(struct sockaddr_in6); mask6->sin6_family = AF_INET6; mask6->sin6_len = sizeof(struct sockaddr_in6); } #endif } static void export_rtaddrs(const struct rtentry *rt, struct sockaddr *dst, struct sockaddr *mask) { #ifdef INET if (dst->sa_family == AF_INET) { struct sockaddr_in *dst4 = (struct sockaddr_in *)dst; struct sockaddr_in *mask4 = (struct sockaddr_in *)mask; uint32_t scopeid = 0; rt_get_inet_prefix_pmask(rt, &dst4->sin_addr, &mask4->sin_addr, &scopeid); return; } #endif #ifdef INET6 if (dst->sa_family == AF_INET6) { struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst; struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask; uint32_t scopeid = 0; rt_get_inet6_prefix_pmask(rt, &dst6->sin6_addr, &mask6->sin6_addr, &scopeid); dst6->sin6_scope_id = scopeid; return; } #endif } static int update_rtm_from_info(struct rt_addrinfo *info, struct rt_msghdr **prtm, int alloc_len) { struct rt_msghdr *rtm, *orig_rtm = NULL; struct walkarg w; int len; rtm = *prtm; /* Check if we need to realloc storage */ rtsock_msg_buffer(rtm->rtm_type, info, NULL, &len); if (len > alloc_len) { struct rt_msghdr *tmp_rtm; tmp_rtm = malloc(len, M_TEMP, M_NOWAIT); if (tmp_rtm == NULL) return (ENOBUFS); bcopy(rtm, tmp_rtm, rtm->rtm_msglen); orig_rtm = rtm; rtm = tmp_rtm; alloc_len = len; /* * Delay freeing original rtm as info contains * data referencing it. */ } w.w_tmem = (caddr_t)rtm; w.w_tmemsize = alloc_len; rtsock_msg_buffer(rtm->rtm_type, info, &w, &len); rtm->rtm_addrs = info->rti_addrs; if (orig_rtm != NULL) free(orig_rtm, M_TEMP); *prtm = rtm; return (0); } /* * Update sockaddrs, flags, etc in @prtm based on @rc data. * rtm can be reallocated. * * Returns 0 on success, along with pointer to (potentially reallocated) * rtm. * */ static int update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm, int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh) { union sockaddr_union saun; struct rt_msghdr *rtm; struct ifnet *ifp; int error; rtm = *prtm; union sockaddr_union sa_dst, sa_mask; int family = info->rti_info[RTAX_DST]->sa_family; init_sockaddrs_family(family, &sa_dst.sa, &sa_mask.sa); export_rtaddrs(rc->rc_rt, &sa_dst.sa, &sa_mask.sa); info->rti_info[RTAX_DST] = &sa_dst.sa; info->rti_info[RTAX_NETMASK] = rt_is_host(rc->rc_rt) ? NULL : &sa_mask.sa; info->rti_info[RTAX_GATEWAY] = &nh->gw_sa; info->rti_info[RTAX_GENMASK] = 0; ifp = nh->nh_ifp; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { if (ifp) { info->rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; error = rtm_get_jailed(info, ifp, nh, &saun, curthread->td_ucred); if (error != 0) return (error); if (ifp->if_flags & IFF_POINTOPOINT) info->rti_info[RTAX_BRD] = nh->nh_ifa->ifa_dstaddr; rtm->rtm_index = ifp->if_index; } else { info->rti_info[RTAX_IFP] = NULL; info->rti_info[RTAX_IFA] = NULL; } } else if (ifp != NULL) rtm->rtm_index = ifp->if_index; if ((error = update_rtm_from_info(info, prtm, alloc_len)) != 0) return (error); rtm = *prtm; rtm->rtm_flags = rc->rc_rt->rte_flags | nhop_get_rtflags(nh); if (rtm->rtm_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT); rt_getmetrics(rc->rc_rt, nh, &rtm->rtm_rmx); rtm->rtm_rmx.rmx_weight = rc->rc_nh_weight; return (0); } #ifdef ROUTE_MPATH static void save_del_notification(const struct rib_cmd_info *rc, void *_cbdata) { struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata; if (rc->rc_cmd == RTM_DELETE) *rc_new = *rc; } static void save_add_notification(const struct rib_cmd_info *rc, void *_cbdata) { struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata; if (rc->rc_cmd == RTM_ADD) *rc_new = *rc; } #endif #if defined(INET6) || defined(INET) static struct sockaddr * alloc_sockaddr_aligned(struct linear_buffer *lb, int len) { len = roundup2(len, sizeof(uint64_t)); if (lb->offset + len > lb->size) return (NULL); struct sockaddr *sa = (struct sockaddr *)(lb->base + lb->offset); lb->offset += len; return (sa); } #endif static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct rt_msghdr *rtm = NULL; struct rt_addrinfo info; struct epoch_tracker et; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; int i, rti_need_deembed = 0; #endif int alloc_len = 0, len, error = 0, fibnum; sa_family_t saf = AF_UNSPEC; struct rib_cmd_info rc; struct nhop_object *nh; if ((flags & PRUS_OOB) || control != NULL) { m_freem(m); if (control != NULL) m_freem(control); return (EOPNOTSUPP); } fibnum = so->so_fibnum; #define senderr(e) { error = e; goto flush;} if (m == NULL || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == NULL)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); NET_EPOCH_ENTER(et); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) senderr(EINVAL); /* * Most of current messages are in range 200-240 bytes, * minimize possible re-allocation on reply using larger size * buffer aligned on 1k boundaty. */ alloc_len = roundup2(len, 1024); int total_len = alloc_len + SCRATCH_BUFFER_SIZE; if ((rtm = malloc(total_len, M_TEMP, M_NOWAIT)) == NULL) senderr(ENOBUFS); m_copydata(m, 0, len, (caddr_t)rtm); bzero(&info, sizeof(info)); nh = NULL; struct linear_buffer lb = { .base = (char *)rtm + alloc_len, .size = SCRATCH_BUFFER_SIZE, }; if (rtm->rtm_version != RTM_VERSION) { /* Do not touch message since format is unknown */ free(rtm, M_TEMP); rtm = NULL; senderr(EPROTONOSUPPORT); } /* * Starting from here, it is possible * to alter original message and insert * caller PID and error value. */ if ((error = fill_addrinfo(rtm, len, &lb, fibnum, &info)) != 0) { senderr(error); } /* fill_addringo() embeds scope into IPv6 addresses */ #ifdef INET6 rti_need_deembed = 1; #endif saf = info.rti_info[RTAX_DST]->sa_family; /* support for new ARP code */ if (rtm->rtm_flags & RTF_LLDATA) { error = lla_rt_output(rtm, &info); goto flush; } union sockaddr_union gw_saun; int blackhole_flags = rtm->rtm_flags & (RTF_BLACKHOLE|RTF_REJECT); if (blackhole_flags != 0) { if (blackhole_flags != (RTF_BLACKHOLE | RTF_REJECT)) error = fill_blackholeinfo(&info, &gw_saun); else { RTS_PID_LOG(LOG_DEBUG, "both BLACKHOLE and REJECT flags specifiied"); error = EINVAL; } if (error != 0) senderr(error); } switch (rtm->rtm_type) { case RTM_ADD: case RTM_CHANGE: if (rtm->rtm_type == RTM_ADD) { if (info.rti_info[RTAX_GATEWAY] == NULL) { RTS_PID_LOG(LOG_DEBUG, "RTM_ADD w/o gateway"); senderr(EINVAL); } } error = rib_action(fibnum, rtm->rtm_type, &info, &rc); if (error == 0) { rtsock_notify_event(fibnum, &rc); #ifdef ROUTE_MPATH if (NH_IS_NHGRP(rc.rc_nh_new) || (rc.rc_nh_old && NH_IS_NHGRP(rc.rc_nh_old))) { struct rib_cmd_info rc_simple = {}; rib_decompose_notification(&rc, save_add_notification, (void *)&rc_simple); rc = rc_simple; } #endif /* nh MAY be empty if RTM_CHANGE request is no-op */ nh = rc.rc_nh_new; if (nh != NULL) { rtm->rtm_index = nh->nh_ifp->if_index; rtm->rtm_flags = rc.rc_rt->rte_flags | nhop_get_rtflags(nh); } } break; case RTM_DELETE: error = rib_action(fibnum, RTM_DELETE, &info, &rc); if (error == 0) { rtsock_notify_event(fibnum, &rc); #ifdef ROUTE_MPATH if (NH_IS_NHGRP(rc.rc_nh_old) || (rc.rc_nh_new && NH_IS_NHGRP(rc.rc_nh_new))) { struct rib_cmd_info rc_simple = {}; rib_decompose_notification(&rc, save_del_notification, (void *)&rc_simple); rc = rc_simple; } #endif nh = rc.rc_nh_old; } break; case RTM_GET: error = handle_rtm_get(&info, fibnum, rtm, &rc); if (error != 0) senderr(error); nh = rc.rc_nh_new; if (!can_export_rte(curthread->td_ucred, info.rti_info[RTAX_NETMASK] == NULL, info.rti_info[RTAX_DST])) { senderr(ESRCH); } break; default: senderr(EOPNOTSUPP); } if (error == 0 && nh != NULL) { error = update_rtm_from_rc(&info, &rtm, alloc_len, &rc, nh); /* * Note that some sockaddr pointers may have changed to * point to memory outsize @rtm. Some may be pointing * to the on-stack variables. * Given that, any pointer in @info CANNOT BE USED. */ /* * scopeid deembedding has been performed while * writing updated rtm in rtsock_msg_buffer(). * With that in mind, skip deembedding procedure below. */ #ifdef INET6 rti_need_deembed = 0; #endif } flush: NET_EPOCH_EXIT(et); #ifdef INET6 if (rtm != NULL) { if (rti_need_deembed) { /* sin6_scope_id is recovered before sending rtm. */ sin6 = (struct sockaddr_in6 *)&ss; for (i = 0; i < RTAX_MAX; i++) { if (info.rti_info[i] == NULL) continue; if (info.rti_info[i]->sa_family != AF_INET6) continue; bcopy(info.rti_info[i], sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) bcopy(sin6, info.rti_info[i], sizeof(*sin6)); } if (update_rtm_from_info(&info, &rtm, alloc_len) != 0) { if (error != 0) error = ENOBUFS; } } } #endif send_rtm_reply(so, rtm, m, saf, fibnum, error); return (error); } /* * Sends the prepared reply message in @rtm to all rtsock clients. * Frees @m and @rtm. * */ static void send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m, sa_family_t saf, u_int fibnum, int rtm_errno) { struct rcb *rcb = NULL; /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (V_route_cb.any_count <= 1) { if (rtm != NULL) free(rtm, M_TEMP); m_freem(m); return; } /* There is another listener, so construct message */ rcb = so->so_pcb; } if (rtm != NULL) { if (rtm_errno!= 0) rtm->rtm_errno = rtm_errno; else rtm->rtm_flags |= RTF_DONE; m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); free(rtm, M_TEMP); } if (m != NULL) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; if (rcb) { /* * XXX insure we don't get a copy by * invalidating our protocol */ sa_family_t family = rcb->rcb_family; rcb->rcb_family = AF_UNSPEC; rt_dispatch(m, saf); rcb->rcb_family = family; } else rt_dispatch(m, saf); } } static void rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh, struct rt_metrics *out) { bzero(out, sizeof(*out)); out->rmx_mtu = nh->nh_mtu; out->rmx_weight = rt->rt_weight; out->rmx_nhidx = nhop_get_idx(nh); /* Kernel -> userland timebase conversion. */ out->rmx_expire = nhop_get_expire(nh) ? nhop_get_expire(nh) - time_uptime + time_second : 0; } /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) { struct sockaddr *sa; int i; for (i = 0; i < RTAX_MAX && cp < cplim; i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if (cp + sa->sa_len > cplim) { RTS_PID_LOG(LOG_DEBUG, "sa_len too big for sa type %d", i); return (EINVAL); } /* * there are no more.. quit now * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ if (sa->sa_len == 0) { rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } /* accept it */ #ifdef INET6 if (sa->sa_family == AF_INET6) sa6_embedscope((struct sockaddr_in6 *)sa, V_ip6_use_defzone); #endif rtinfo->rti_info[i] = sa; cp += SA_SIZE(sa); } return (0); } #ifdef INET static inline void fill_sockaddr_inet(struct sockaddr_in *sin, struct in_addr addr) { const struct sockaddr_in nsin = { .sin_family = AF_INET, .sin_len = sizeof(struct sockaddr_in), .sin_addr = addr, }; *sin = nsin; } #endif #ifdef INET6 static inline void fill_sockaddr_inet6(struct sockaddr_in6 *sin6, const struct in6_addr *addr6, uint32_t scopeid) { const struct sockaddr_in6 nsin6 = { .sin6_family = AF_INET6, .sin6_len = sizeof(struct sockaddr_in6), .sin6_addr = *addr6, .sin6_scope_id = scopeid, }; *sin6 = nsin6; } #endif #if defined(INET6) || defined(INET) /* * Checks if gateway is suitable for lltable operations. * Lltable code requires AF_LINK gateway with ifindex * and mac address specified. * Returns 0 on success. */ static int cleanup_xaddrs_lladdr(struct rt_addrinfo *info) { struct sockaddr_dl *sdl = (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY]; if (sdl->sdl_family != AF_LINK) return (EINVAL); if (sdl->sdl_index == 0) { RTS_PID_LOG(LOG_DEBUG, "AF_LINK gateway w/o ifindex"); return (EINVAL); } if (offsetof(struct sockaddr_dl, sdl_data) + sdl->sdl_nlen + sdl->sdl_alen > sdl->sdl_len) { RTS_PID_LOG(LOG_DEBUG, "AF_LINK gw: sdl_nlen/sdl_alen too large"); return (EINVAL); } return (0); } static int cleanup_xaddrs_gateway(struct rt_addrinfo *info, struct linear_buffer *lb) { struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; struct sockaddr *sa; if (info->rti_flags & RTF_LLDATA) return (cleanup_xaddrs_lladdr(info)); switch (gw->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *gw_sin = (struct sockaddr_in *)gw; /* Ensure reads do not go beyoud SA boundary */ if (SA_SIZE(gw) < offsetof(struct sockaddr_in, sin_zero)) { RTS_PID_LOG(LOG_DEBUG, "gateway sin_len too small: %d", gw->sa_len); return (EINVAL); } sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_in)); if (sa == NULL) return (ENOBUFS); fill_sockaddr_inet((struct sockaddr_in *)sa, gw_sin->sin_addr); info->rti_info[RTAX_GATEWAY] = sa; } break; #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *gw_sin6 = (struct sockaddr_in6 *)gw; if (gw_sin6->sin6_len < sizeof(struct sockaddr_in6)) { RTS_PID_LOG(LOG_DEBUG, "gateway sin6_len too small: %d", gw->sa_len); return (EINVAL); } fill_sockaddr_inet6(gw_sin6, &gw_sin6->sin6_addr, 0); break; } #endif case AF_LINK: { struct sockaddr_dl *gw_sdl; size_t sdl_min_len = offsetof(struct sockaddr_dl, sdl_data); gw_sdl = (struct sockaddr_dl *)gw; if (gw_sdl->sdl_len < sdl_min_len) { RTS_PID_LOG(LOG_DEBUG, "gateway sdl_len too small: %d", gw_sdl->sdl_len); return (EINVAL); } sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_dl_short)); if (sa == NULL) return (ENOBUFS); const struct sockaddr_dl_short sdl = { .sdl_family = AF_LINK, .sdl_len = sizeof(struct sockaddr_dl_short), .sdl_index = gw_sdl->sdl_index, }; *((struct sockaddr_dl_short *)sa) = sdl; info->rti_info[RTAX_GATEWAY] = sa; break; } } return (0); } #endif static void remove_netmask(struct rt_addrinfo *info) { info->rti_info[RTAX_NETMASK] = NULL; info->rti_flags |= RTF_HOST; info->rti_addrs &= ~RTA_NETMASK; } #ifdef INET static int cleanup_xaddrs_inet(struct rt_addrinfo *info, struct linear_buffer *lb) { struct sockaddr_in *dst_sa, *mask_sa; const int sa_len = sizeof(struct sockaddr_in); struct in_addr dst, mask; /* Check & fixup dst/netmask combination first */ dst_sa = (struct sockaddr_in *)info->rti_info[RTAX_DST]; mask_sa = (struct sockaddr_in *)info->rti_info[RTAX_NETMASK]; /* Ensure reads do not go beyound the buffer size */ if (SA_SIZE(dst_sa) < offsetof(struct sockaddr_in, sin_zero)) { RTS_PID_LOG(LOG_DEBUG, "prefix dst sin_len too small: %d", dst_sa->sin_len); return (EINVAL); } if ((mask_sa != NULL) && mask_sa->sin_len < sizeof(struct sockaddr_in)) { /* * Some older routing software encode mask length into the * sin_len, thus resulting in "truncated" sockaddr. */ int len = mask_sa->sin_len - offsetof(struct sockaddr_in, sin_addr); if (len >= 0) { mask.s_addr = 0; if (len > sizeof(struct in_addr)) len = sizeof(struct in_addr); memcpy(&mask, &mask_sa->sin_addr, len); } else { RTS_PID_LOG(LOG_DEBUG, "prefix mask sin_len too small: %d", mask_sa->sin_len); return (EINVAL); } } else mask.s_addr = mask_sa ? mask_sa->sin_addr.s_addr : INADDR_BROADCAST; dst.s_addr = htonl(ntohl(dst_sa->sin_addr.s_addr) & ntohl(mask.s_addr)); /* Construct new "clean" dst/mask sockaddresses */ if ((dst_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL) return (ENOBUFS); fill_sockaddr_inet(dst_sa, dst); info->rti_info[RTAX_DST] = (struct sockaddr *)dst_sa; if (mask.s_addr != INADDR_BROADCAST) { if ((mask_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL) return (ENOBUFS); fill_sockaddr_inet(mask_sa, mask); info->rti_info[RTAX_NETMASK] = (struct sockaddr *)mask_sa; info->rti_flags &= ~RTF_HOST; } else remove_netmask(info); /* Check gateway */ if (info->rti_info[RTAX_GATEWAY] != NULL) return (cleanup_xaddrs_gateway(info, lb)); return (0); } #endif #ifdef INET6 static int cleanup_xaddrs_inet6(struct rt_addrinfo *info, struct linear_buffer *lb) { struct sockaddr *sa; struct sockaddr_in6 *dst_sa, *mask_sa; struct in6_addr mask, *dst; const int sa_len = sizeof(struct sockaddr_in6); /* Check & fixup dst/netmask combination first */ dst_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_DST]; mask_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_NETMASK]; if (dst_sa->sin6_len < sizeof(struct sockaddr_in6)) { RTS_PID_LOG(LOG_DEBUG, "prefix dst sin6_len too small: %d", dst_sa->sin6_len); return (EINVAL); } if (mask_sa && mask_sa->sin6_len < sizeof(struct sockaddr_in6)) { /* * Some older routing software encode mask length into the * sin6_len, thus resulting in "truncated" sockaddr. */ int len = mask_sa->sin6_len - offsetof(struct sockaddr_in6, sin6_addr); if (len >= 0) { bzero(&mask, sizeof(mask)); if (len > sizeof(struct in6_addr)) len = sizeof(struct in6_addr); memcpy(&mask, &mask_sa->sin6_addr, len); } else { RTS_PID_LOG(LOG_DEBUG, "rtsock: prefix mask sin6_len too small: %d", mask_sa->sin6_len); return (EINVAL); } } else mask = mask_sa ? mask_sa->sin6_addr : in6mask128; dst = &dst_sa->sin6_addr; IN6_MASK_ADDR(dst, &mask); if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL) return (ENOBUFS); fill_sockaddr_inet6((struct sockaddr_in6 *)sa, dst, 0); info->rti_info[RTAX_DST] = sa; if (!IN6_ARE_ADDR_EQUAL(&mask, &in6mask128)) { if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL) return (ENOBUFS); fill_sockaddr_inet6((struct sockaddr_in6 *)sa, &mask, 0); info->rti_info[RTAX_NETMASK] = sa; info->rti_flags &= ~RTF_HOST; } else remove_netmask(info); /* Check gateway */ if (info->rti_info[RTAX_GATEWAY] != NULL) return (cleanup_xaddrs_gateway(info, lb)); return (0); } #endif static int cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb) { int error = EAFNOSUPPORT; if (info->rti_info[RTAX_DST] == NULL) { RTS_PID_LOG(LOG_DEBUG, "prefix dst is not set"); return (EINVAL); } if (info->rti_flags & RTF_LLDATA) { /* * arp(8)/ndp(8) sends RTA_NETMASK for the associated * prefix along with the actual address in RTA_DST. * Remove netmask to avoid unnecessary address masking. */ remove_netmask(info); } switch (info->rti_info[RTAX_DST]->sa_family) { #ifdef INET case AF_INET: error = cleanup_xaddrs_inet(info, lb); break; #endif #ifdef INET6 case AF_INET6: error = cleanup_xaddrs_inet6(info, lb); break; #endif } return (error); } /* * Fill in @dmask with valid netmask leaving original @smask * intact. Mostly used with radix netmasks. */ struct sockaddr * rtsock_fix_netmask(const struct sockaddr *dst, const struct sockaddr *smask, struct sockaddr_storage *dmask) { if (dst == NULL || smask == NULL) return (NULL); memset(dmask, 0, dst->sa_len); memcpy(dmask, smask, smask->sa_len); dmask->ss_len = dst->sa_len; dmask->ss_family = dst->sa_family; return ((struct sockaddr *)dmask); } /* * Writes information related to @rtinfo object to newly-allocated mbuf. * Assumes MCLBYTES is enough to construct any message. * Used for OS notifications of vaious events (if/ifa announces,etc) * * Returns allocated mbuf or NULL on failure. */ static struct mbuf * rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo) { struct sockaddr_storage ss; struct rt_msghdr *rtm; struct mbuf *m; int i; struct sockaddr *sa; #ifdef INET6 struct sockaddr_in6 *sin6; #endif int len, dlen; switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; case RTM_IFANNOUNCE: case RTM_IEEE80211: len = sizeof(struct if_announcemsghdr); break; default: len = sizeof(struct rt_msghdr); } /* XXXGL: can we use MJUMPAGESIZE cluster here? */ KASSERT(len <= MCLBYTES, ("%s: message too big", __func__)); if (len > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (m); m->m_pkthdr.len = m->m_len = len; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); KASSERT(dlen <= sizeof(ss), ("%s: sockaddr size overflow", __func__)); bzero(&ss, sizeof(ss)); bcopy(sa, &ss, sa->sa_len); sa = (struct sockaddr *)&ss; #ifdef INET6 if (sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)sa; (void)sa6_recoverscope(sin6); } #endif m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } if (m->m_pkthdr.len != len) { m_freem(m); return (NULL); } rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } /* * Writes information related to @rtinfo object to preallocated buffer. * Stores needed size in @plen. If @w is NULL, calculates size without * writing. * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation. * * Returns 0 on success. * */ static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen) { struct sockaddr_storage ss; int len, buflen = 0, dlen, i; caddr_t cp = NULL; struct rt_msghdr *rtm = NULL; #ifdef INET6 struct sockaddr_in6 *sin6; #endif #ifdef COMPAT_FREEBSD32 bool compat32 = false; #endif switch (type) { case RTM_DELADDR: case RTM_NEWADDR: if (w != NULL && w->w_op == NET_RT_IFLISTL) { #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { len = sizeof(struct ifa_msghdrl32); compat32 = true; } else #endif len = sizeof(struct ifa_msghdrl); } else len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: #ifdef COMPAT_FREEBSD32 if (w != NULL && w->w_req->flags & SCTL_MASK32) { if (w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl32); else len = sizeof(struct if_msghdr32); compat32 = true; break; } #endif if (w != NULL && w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl); else len = sizeof(struct if_msghdr); break; case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; default: len = sizeof(struct rt_msghdr); } if (w != NULL) { rtm = (struct rt_msghdr *)w->w_tmem; buflen = w->w_tmemsize - len; cp = (caddr_t)w->w_tmem + len; } rtinfo->rti_addrs = 0; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); #ifdef COMPAT_FREEBSD32 if (compat32) dlen = SA_SIZE32(sa); else #endif dlen = SA_SIZE(sa); if (cp != NULL && buflen >= dlen) { KASSERT(dlen <= sizeof(ss), ("%s: sockaddr size overflow", __func__)); bzero(&ss, sizeof(ss)); bcopy(sa, &ss, sa->sa_len); sa = (struct sockaddr *)&ss; #ifdef INET6 if (sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)sa; (void)sa6_recoverscope(sin6); } #endif bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; buflen -= dlen; } else if (cp != NULL) { /* * Buffer too small. Count needed size * and return with error. */ cp = NULL; } len += dlen; } if (cp != NULL) { dlen = ALIGN(len) - len; if (buflen < dlen) cp = NULL; else { bzero(cp, dlen); cp += dlen; buflen -= dlen; } } len = ALIGN(len); if (cp != NULL) { /* fill header iff buffer is large enough */ rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; } *plen = len; if (w != NULL && cp == NULL) return (ENOBUFS); return (0); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occurred, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error, int fibnum) { struct rt_msghdr *rtm; struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (V_route_cb.any_count == 0) return; m = rtsock_msg_mbuf(type, rtinfo); if (m == NULL) return; if (fibnum != RT_ALL_FIBS) { KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out " "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs)); M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); } void rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) { rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ -void -rt_ifmsg(struct ifnet *ifp) +static void +rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask __unused) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; if (V_route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rtsock_msg_mbuf(RTM_IFINFO, &info); if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; if_data_copy(ifp, &ifm->ifm_data); ifm->ifm_addrs = 0; rt_dispatch(m, AF_UNSPEC); } /* * Announce interface address arrival/withdraw. * Please do not call directly, use rt_addrmsg(). * Assume input data to be valid. * Returns 0 on success. */ int rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { struct rt_addrinfo info; struct sockaddr *sa; int ncmd; struct mbuf *m; struct ifa_msghdr *ifam; struct ifnet *ifp = ifa->ifa_ifp; struct sockaddr_storage ss; if (V_route_cb.any_count == 0) return (0); ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask( info.rti_info[RTAX_IFA], ifa->ifa_netmask, &ss); info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL) return (ENOBUFS); ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_ifp->if_metric; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = info.rti_addrs; if (fibnum != RT_ALL_FIBS) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); return (0); } /* * Announce route addition/removal to rtsock based on @rt data. * Callers are advives to use rt_routemsg() instead of using this * function directly. * Assume @rt data is consistent. * * Returns 0 on success. */ int rtsock_routemsg(int cmd, struct rtentry *rt, struct nhop_object *nh, int fibnum) { union sockaddr_union dst, mask; struct rt_addrinfo info; if (V_route_cb.any_count == 0) return (0); int family = rt_get_family(rt); init_sockaddrs_family(family, &dst.sa, &mask.sa); export_rtaddrs(rt, &dst.sa, &mask.sa); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = &dst.sa; info.rti_info[RTAX_NETMASK] = &mask.sa; info.rti_info[RTAX_GATEWAY] = &nh->gw_sa; info.rti_flags = rt->rte_flags | nhop_get_rtflags(nh); info.rti_ifp = nh->nh_ifp; return (rtsock_routemsg_info(cmd, &info, fibnum)); } int rtsock_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum) { struct rt_msghdr *rtm; struct sockaddr *sa; struct mbuf *m; if (V_route_cb.any_count == 0) return (0); if (info->rti_flags & RTF_HOST) info->rti_info[RTAX_NETMASK] = NULL; m = rtsock_msg_mbuf(cmd, info); if (m == NULL) return (ENOBUFS); if (fibnum != RT_ALL_FIBS) { KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out " "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs)); M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rtm = mtod(m, struct rt_msghdr *); rtm->rtm_addrs = info->rti_addrs; if (info->rti_ifp != NULL) rtm->rtm_index = info->rti_ifp->if_index; /* Add RTF_DONE to indicate command 'completion' required by API */ info->rti_flags |= RTF_DONE; /* Reported routes has to be up */ if (cmd == RTM_ADD || cmd == RTM_CHANGE) info->rti_flags |= RTF_UP; rtm->rtm_flags = info->rti_flags; sa = info->rti_info[RTAX_DST]; rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); return (0); } /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) { struct rt_addrinfo info; struct mbuf *m = NULL; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (V_route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = ifma->ifma_addr; if (ifp && ifp->if_addr) info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; else info.rti_info[RTAX_IFP] = NULL; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; m = rtsock_msg_mbuf(cmd, &info); if (m == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n", __func__)); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = info.rti_addrs; rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC); } static struct mbuf * rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, struct rt_addrinfo *info) { struct if_announcemsghdr *ifan; struct mbuf *m; if (V_route_cb.any_count == 0) return NULL; bzero((caddr_t)info, sizeof(*info)); m = rtsock_msg_mbuf(type, info); if (m != NULL) { ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); ifan->ifan_what = what; } return m; } /* * This is called to generate routing socket messages indicating * IEEE80211 wireless events. * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. */ void rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); if (m != NULL) { /* * Append the ieee80211 data. Try to stick it in the * mbuf containing the ifannounce msg; otherwise allocate * a new mbuf and append. * * NB: we assume m is a single mbuf. */ if (data_len > M_TRAILINGSPACE(m)) { struct mbuf *n = m_get(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } bcopy(data, mtod(n, void *), data_len); n->m_len = data_len; m->m_next = n; } else if (data_len > 0) { bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); m->m_len += data_len; } if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += data_len; mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; rt_dispatch(m, AF_UNSPEC); } } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ static void rt_ifannouncemsg(struct ifnet *ifp, int what) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info); if (m != NULL) rt_dispatch(m, AF_UNSPEC); } static void rt_dispatch(struct mbuf *m, sa_family_t saf) { M_ASSERTPKTHDR(m); m->m_rtsock_family = saf; if (V_loif) m->m_pkthdr.rcvif = V_loif; else { m_freem(m); return; } netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ } /* * Checks if rte can be exported w.r.t jails/vnets. * * Returns true if it can, false otherwise. */ static bool can_export_rte(struct ucred *td_ucred, bool rt_is_host, const struct sockaddr *rt_dst) { if ((!rt_is_host) ? jailed_without_vnet(td_ucred) : prison_if(td_ucred, rt_dst) != 0) return (false); return (true); } /* * This is used in dumping the kernel table via sysctl(). */ static int sysctl_dumpentry(struct rtentry *rt, void *vw) { struct walkarg *w = vw; struct nhop_object *nh; NET_EPOCH_ASSERT(); export_rtaddrs(rt, w->dst, w->mask); if (!can_export_rte(w->w_req->td->td_ucred, rt_is_host(rt), w->dst)) return (0); nh = rt_get_raw_nhop(rt); #ifdef ROUTE_MPATH if (NH_IS_NHGRP(nh)) { const struct weightened_nhop *wn; uint32_t num_nhops; int error; wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops); for (int i = 0; i < num_nhops; i++) { error = sysctl_dumpnhop(rt, wn[i].nh, wn[i].weight, w); if (error != 0) return (error); } } else #endif sysctl_dumpnhop(rt, nh, rt->rt_weight, w); return (0); } static int sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh, uint32_t weight, struct walkarg *w) { struct rt_addrinfo info; int error = 0, size; uint32_t rtflags; rtflags = nhop_get_rtflags(nh); if (w->w_op == NET_RT_FLAGS && !(rtflags & w->w_arg)) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = w->dst; info.rti_info[RTAX_GATEWAY] = &nh->gw_sa; info.rti_info[RTAX_NETMASK] = (rtflags & RTF_HOST) ? NULL : w->mask; info.rti_info[RTAX_GENMASK] = 0; if (nh->nh_ifp && !(nh->nh_ifp->if_flags & IFF_DYING)) { info.rti_info[RTAX_IFP] = nh->nh_ifp->if_addr->ifa_addr; info.rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr; if (nh->nh_ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = nh->nh_ifa->ifa_dstaddr; } if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0) return (error); if (w->w_req && w->w_tmem) { struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; bzero(&rtm->rtm_index, sizeof(*rtm) - offsetof(struct rt_msghdr, rtm_index)); /* * rte flags may consist of RTF_HOST (duplicated in nhop rtflags) * and RTF_UP (if entry is linked, which is always true here). * Given that, use nhop rtflags & add RTF_UP. */ rtm->rtm_flags = rtflags | RTF_UP; if (rtm->rtm_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT); rt_getmetrics(rt, nh, &rtm->rtm_rmx); rtm->rtm_rmx.rmx_weight = weight; rtm->rtm_index = nh->nh_ifp->if_index; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); return (error); } return (error); } static int sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdrl *ifm; struct if_data *ifd; ifm = (struct if_msghdrl *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdrl32 *ifm32; ifm32 = (struct if_msghdrl32 *)ifm; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; ifm32->_ifm_spare1 = 0; ifm32->ifm_len = sizeof(*ifm32); ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data); ifm32->_ifm_spare2 = 0; ifd = &ifm32->ifm_data; } else #endif { ifm->ifm_addrs = info->rti_addrs; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_index = ifp->if_index; ifm->_ifm_spare1 = 0; ifm->ifm_len = sizeof(*ifm); ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data); ifm->_ifm_spare2 = 0; ifd = &ifm->ifm_data; } memcpy(ifd, src_ifd, sizeof(*ifd)); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdr *ifm; struct if_data *ifd; ifm = (struct if_msghdr *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdr32 *ifm32; ifm32 = (struct if_msghdr32 *)ifm; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; ifm32->_ifm_spare1 = 0; ifd = &ifm32->ifm_data; } else #endif { ifm->ifm_addrs = info->rti_addrs; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_index = ifp->if_index; ifm->_ifm_spare1 = 0; ifd = &ifm->ifm_data; } memcpy(ifd, src_ifd, sizeof(*ifd)); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdrl *ifam; struct if_data *ifd; ifam = (struct ifa_msghdrl *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct ifa_msghdrl32 *ifam32; ifam32 = (struct ifa_msghdrl32 *)ifam; ifam32->ifam_addrs = info->rti_addrs; ifam32->ifam_flags = ifa->ifa_flags; ifam32->ifam_index = ifa->ifa_ifp->if_index; ifam32->_ifam_spare1 = 0; ifam32->ifam_len = sizeof(*ifam32); ifam32->ifam_data_off = offsetof(struct ifa_msghdrl32, ifam_data); ifam32->ifam_metric = ifa->ifa_ifp->if_metric; ifd = &ifam32->ifam_data; } else #endif { ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->_ifam_spare1 = 0; ifam->ifam_len = sizeof(*ifam); ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data); ifam->ifam_metric = ifa->ifa_ifp->if_metric; ifd = &ifam->ifam_data; } bzero(ifd, sizeof(*ifd)); ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets); ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets); ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes); ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes); /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifd->ifi_vhid = (*carp_get_vhid_p)(ifa); return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->_ifam_spare1 = 0; ifam->ifam_metric = ifa->ifa_ifp->if_metric; return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifaddr *ifa; struct if_data ifd; struct rt_addrinfo info; int len, error = 0; struct sockaddr_storage ss; bzero((caddr_t)&info, sizeof(info)); bzero(&ifd, sizeof(ifd)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; if_data_copy(ifp, &ifd); ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len); if (error != 0) goto done; info.rti_info[RTAX_IFP] = NULL; if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifml(ifp, &ifd, &info, w, len); else error = sysctl_iflist_ifm(ifp, &ifd, &info, w, len); if (error) goto done; } while ((ifa = CK_STAILQ_NEXT(ifa, ifa_link)) != NULL) { if (af && af != ifa->ifa_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifa->ifa_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask( ifa->ifa_addr, ifa->ifa_netmask, &ss); info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len); if (error != 0) goto done; if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifaml(ifa, &info, w, len); else error = sysctl_iflist_ifam(ifa, &info, w, len); if (error) goto done; } } info.rti_info[RTAX_IFA] = NULL; info.rti_info[RTAX_NETMASK] = NULL; info.rti_info[RTAX_BRD] = NULL; } done: return (error); } static int sysctl_ifmalist(int af, struct walkarg *w) { struct rt_addrinfo info; struct ifaddr *ifa; struct ifmultiaddr *ifma; struct ifnet *ifp; int error, len; NET_EPOCH_ASSERT(); error = 0; bzero((caddr_t)&info, sizeof(info)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (af && af != ifma->ifma_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifma->ifma_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_GATEWAY] = (ifma->ifma_addr->sa_family != AF_LINK) ? ifma->ifma_lladdr : NULL; error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len); if (error != 0) break; if (w->w_req && w->w_tmem) { struct ifma_msghdr *ifmam; ifmam = (struct ifma_msghdr *)w->w_tmem; ifmam->ifmam_index = ifma->ifma_ifp->if_index; ifmam->ifmam_flags = 0; ifmam->ifmam_addrs = info.rti_addrs; ifmam->_ifmam_spare1 = 0; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error != 0) break; } } if (error != 0) break; } return (error); } static void rtable_sysctl_dump(uint32_t fibnum, int family, struct walkarg *w) { union sockaddr_union sa_dst, sa_mask; w->family = family; w->dst = (struct sockaddr *)&sa_dst; w->mask = (struct sockaddr *)&sa_mask; init_sockaddrs_family(family, w->dst, w->mask); rib_walk(fibnum, family, false, sysctl_dumpentry, w); } static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { struct epoch_tracker et; int *name = (int *)arg1; u_int namelen = arg2; struct rib_head *rnh = NULL; /* silence compiler. */ int i, lim, error = EINVAL; int fib = 0; u_char af; struct walkarg w; if (namelen < 3) return (EINVAL); name++; namelen--; if (req->newptr) return (EPERM); if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP || name[1] == NET_RT_NHGRP) { if (namelen == 3) fib = req->td->td_proc->p_fibnum; else if (namelen == 4) fib = (name[3] == RT_ALL_FIBS) ? req->td->td_proc->p_fibnum : name[3]; else return ((namelen < 3) ? EISDIR : ENOTDIR); if (fib < 0 || fib >= rt_numfibs) return (EINVAL); } else if (namelen != 3) return ((namelen < 3) ? EISDIR : ENOTDIR); af = name[0]; if (af > AF_MAX) return (EINVAL); bzero(&w, sizeof(w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); /* * Allocate reply buffer in advance. * All rtsock messages has maximum length of u_short. */ w.w_tmemsize = 65536; w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK); NET_EPOCH_ENTER(et); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: if (af == 0) { /* dump all tables */ i = 1; lim = AF_MAX; } else /* dump only one table */ i = lim = af; /* * take care of llinfo entries, the caller must * specify an AF */ if (w.w_op == NET_RT_FLAGS && (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) { if (af != 0) error = lltable_sysctl_dumparp(af, w.w_req); else error = EINVAL; break; } /* * take care of routing entries */ for (error = 0; error == 0 && i <= lim; i++) { rnh = rt_tables_get_rnh(fib, i); if (rnh != NULL) { rtable_sysctl_dump(fib, i, &w); } else if (af != 0) error = EAFNOSUPPORT; } break; case NET_RT_NHOP: case NET_RT_NHGRP: /* Allow dumping one specific af/fib at a time */ if (namelen < 4) { error = EINVAL; break; } fib = name[3]; if (fib < 0 || fib > rt_numfibs) { error = EINVAL; break; } rnh = rt_tables_get_rnh(fib, af); if (rnh == NULL) { error = EAFNOSUPPORT; break; } if (w.w_op == NET_RT_NHOP) error = nhops_dump_sysctl(rnh, w.w_req); else #ifdef ROUTE_MPATH error = nhgrp_dump_sysctl(rnh, w.w_req); #else error = ENOTSUP; #endif break; case NET_RT_IFLIST: case NET_RT_IFLISTL: error = sysctl_iflist(af, &w); break; case NET_RT_IFMALIST: error = sysctl_ifmalist(af, &w); break; } NET_EPOCH_EXIT(et); free(w.w_tmem, M_TEMP); return (error); } static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_rtsock, "Return route tables and interface/address lists"); /* * Definitions of protocols supported in the ROUTE domain. */ static struct domain routedomain; /* or at least forward */ static struct protosw routesw = { .pr_type = SOCK_RAW, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_abort = rts_close, .pr_attach = rts_attach, .pr_detach = rts_detach, .pr_send = rts_send, .pr_shutdown = rts_shutdown, .pr_close = rts_close, }; static struct domain routedomain = { .dom_family = PF_ROUTE, .dom_name = "route", .dom_nprotosw = 1, .dom_protosw = { &routesw }, }; DOMAIN_SET(route); diff --git a/sys/net80211/ieee80211_freebsd.c b/sys/net80211/ieee80211_freebsd.c index ec970e217dfd..7158ada291ab 100644 --- a/sys/net80211/ieee80211_freebsd.c +++ b/sys/net80211/ieee80211_freebsd.c @@ -1,1194 +1,1194 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * IEEE 802.11 support (FreeBSD-specific code) */ #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include DEBUGNET_DEFINE(ieee80211); SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "IEEE 80211 parameters"); #ifdef IEEE80211_DEBUG static int ieee80211_debug = 0; SYSCTL_INT(_net_wlan, OID_AUTO, debug, CTLFLAG_RW, &ieee80211_debug, 0, "debugging printfs"); #endif static const char wlanname[] = "wlan"; static struct if_clone *wlan_cloner; /* * priv(9) NET80211 checks. * Return 0 if operation is allowed, E* (usually EPERM) otherwise. */ int ieee80211_priv_check_vap_getkey(u_long cmd __unused, struct ieee80211vap *vap __unused, struct ifnet *ifp __unused) { return (priv_check(curthread, PRIV_NET80211_VAP_GETKEY)); } int ieee80211_priv_check_vap_manage(u_long cmd __unused, struct ieee80211vap *vap __unused, struct ifnet *ifp __unused) { return (priv_check(curthread, PRIV_NET80211_VAP_MANAGE)); } int ieee80211_priv_check_vap_setmac(u_long cmd __unused, struct ieee80211vap *vap __unused, struct ifnet *ifp __unused) { return (priv_check(curthread, PRIV_NET80211_VAP_SETMAC)); } int ieee80211_priv_check_create_vap(u_long cmd __unused, struct ieee80211vap *vap __unused, struct ifnet *ifp __unused) { return (priv_check(curthread, PRIV_NET80211_CREATE_VAP)); } static int wlan_clone_create(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp) { struct ieee80211_clone_params cp; struct ieee80211vap *vap; struct ieee80211com *ic; int error; error = ieee80211_priv_check_create_vap(0, NULL, NULL); if (error) return error; error = ifc_copyin(ifd, &cp, sizeof(cp)); if (error) return error; ic = ieee80211_find_com(cp.icp_parent); if (ic == NULL) return ENXIO; if (cp.icp_opmode >= IEEE80211_OPMODE_MAX) { ic_printf(ic, "%s: invalid opmode %d\n", __func__, cp.icp_opmode); return EINVAL; } if ((ic->ic_caps & ieee80211_opcap[cp.icp_opmode]) == 0) { ic_printf(ic, "%s mode not supported\n", ieee80211_opmode_name[cp.icp_opmode]); return EOPNOTSUPP; } if ((cp.icp_flags & IEEE80211_CLONE_TDMA) && #ifdef IEEE80211_SUPPORT_TDMA (ic->ic_caps & IEEE80211_C_TDMA) == 0 #else (1) #endif ) { ic_printf(ic, "TDMA not supported\n"); return EOPNOTSUPP; } vap = ic->ic_vap_create(ic, wlanname, ifd->unit, cp.icp_opmode, cp.icp_flags, cp.icp_bssid, cp.icp_flags & IEEE80211_CLONE_MACADDR ? cp.icp_macaddr : ic->ic_macaddr); if (vap == NULL) return (EIO); #ifdef DEBUGNET if (ic->ic_debugnet_meth != NULL) DEBUGNET_SET(vap->iv_ifp, ieee80211); #endif *ifpp = vap->iv_ifp; return (0); } static int wlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; ic->ic_vap_delete(vap); return (0); } void ieee80211_vap_destroy(struct ieee80211vap *vap) { CURVNET_SET(vap->iv_ifp->if_vnet); if_clone_destroyif(wlan_cloner, vap->iv_ifp); CURVNET_RESTORE(); } int ieee80211_sysctl_msecs_ticks(SYSCTL_HANDLER_ARGS) { int msecs = ticks_to_msecs(*(int *)arg1); int error; error = sysctl_handle_int(oidp, &msecs, 0, req); if (error || !req->newptr) return error; *(int *)arg1 = msecs_to_ticks(msecs); return 0; } static int ieee80211_sysctl_inact(SYSCTL_HANDLER_ARGS) { int inact = (*(int *)arg1) * IEEE80211_INACT_WAIT; int error; error = sysctl_handle_int(oidp, &inact, 0, req); if (error || !req->newptr) return error; *(int *)arg1 = inact / IEEE80211_INACT_WAIT; return 0; } static int ieee80211_sysctl_parent(SYSCTL_HANDLER_ARGS) { struct ieee80211com *ic = arg1; return SYSCTL_OUT_STR(req, ic->ic_name); } static int ieee80211_sysctl_radar(SYSCTL_HANDLER_ARGS) { struct ieee80211com *ic = arg1; int t = 0, error; error = sysctl_handle_int(oidp, &t, 0, req); if (error || !req->newptr) return error; IEEE80211_LOCK(ic); ieee80211_dfs_notify_radar(ic, ic->ic_curchan); IEEE80211_UNLOCK(ic); return 0; } /* * For now, just restart everything. * * Later on, it'd be nice to have a separate VAP restart to * full-device restart. */ static int ieee80211_sysctl_vap_restart(SYSCTL_HANDLER_ARGS) { struct ieee80211vap *vap = arg1; int t = 0, error; error = sysctl_handle_int(oidp, &t, 0, req); if (error || !req->newptr) return error; ieee80211_restart_all(vap->iv_ic); return 0; } void ieee80211_sysctl_attach(struct ieee80211com *ic) { } void ieee80211_sysctl_detach(struct ieee80211com *ic) { } void ieee80211_sysctl_vattach(struct ieee80211vap *vap) { struct ifnet *ifp = vap->iv_ifp; struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; char num[14]; /* sufficient for 32 bits */ ctx = (struct sysctl_ctx_list *) IEEE80211_MALLOC(sizeof(struct sysctl_ctx_list), M_DEVBUF, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (ctx == NULL) { if_printf(ifp, "%s: cannot allocate sysctl context!\n", __func__); return; } sysctl_ctx_init(ctx); snprintf(num, sizeof(num), "%u", ifp->if_dunit); oid = SYSCTL_ADD_NODE(ctx, &SYSCTL_NODE_CHILDREN(_net, wlan), OID_AUTO, num, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "%parent", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, vap->iv_ic, 0, ieee80211_sysctl_parent, "A", "parent device"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "driver_caps", CTLFLAG_RW, &vap->iv_caps, 0, "driver capabilities"); #ifdef IEEE80211_DEBUG vap->iv_debug = ieee80211_debug; SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "debug", CTLFLAG_RW, &vap->iv_debug, 0, "control debugging printfs"); #endif SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "bmiss_max", CTLFLAG_RW, &vap->iv_bmiss_max, 0, "consecutive beacon misses before scanning"); /* XXX inherit from tunables */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "inact_run", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &vap->iv_inact_run, 0, ieee80211_sysctl_inact, "I", "station inactivity timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "inact_probe", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &vap->iv_inact_probe, 0, ieee80211_sysctl_inact, "I", "station inactivity probe timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "inact_auth", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &vap->iv_inact_auth, 0, ieee80211_sysctl_inact, "I", "station authentication timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "inact_init", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &vap->iv_inact_init, 0, ieee80211_sysctl_inact, "I", "station initial state timeout (sec)"); if (vap->iv_htcaps & IEEE80211_HTC_HT) { SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_bk", CTLFLAG_RW, &vap->iv_ampdu_mintraffic[WME_AC_BK], 0, "BK traffic tx aggr threshold (pps)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_be", CTLFLAG_RW, &vap->iv_ampdu_mintraffic[WME_AC_BE], 0, "BE traffic tx aggr threshold (pps)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_vo", CTLFLAG_RW, &vap->iv_ampdu_mintraffic[WME_AC_VO], 0, "VO traffic tx aggr threshold (pps)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_vi", CTLFLAG_RW, &vap->iv_ampdu_mintraffic[WME_AC_VI], 0, "VI traffic tx aggr threshold (pps)"); } SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "force_restart", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, vap, 0, ieee80211_sysctl_vap_restart, "I", "force a VAP restart"); if (vap->iv_caps & IEEE80211_C_DFS) { SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "radar", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, vap->iv_ic, 0, ieee80211_sysctl_radar, "I", "simulate radar event"); } vap->iv_sysctl = ctx; vap->iv_oid = oid; } void ieee80211_sysctl_vdetach(struct ieee80211vap *vap) { if (vap->iv_sysctl != NULL) { sysctl_ctx_free(vap->iv_sysctl); IEEE80211_FREE(vap->iv_sysctl, M_DEVBUF); vap->iv_sysctl = NULL; } } int ieee80211_com_vincref(struct ieee80211vap *vap) { uint32_t ostate; ostate = atomic_fetchadd_32(&vap->iv_com_state, IEEE80211_COM_REF_ADD); if (ostate & IEEE80211_COM_DETACHED) { atomic_subtract_32(&vap->iv_com_state, IEEE80211_COM_REF_ADD); return (ENETDOWN); } if (_IEEE80211_MASKSHIFT(ostate, IEEE80211_COM_REF) == IEEE80211_COM_REF_MAX) { atomic_subtract_32(&vap->iv_com_state, IEEE80211_COM_REF_ADD); return (EOVERFLOW); } return (0); } void ieee80211_com_vdecref(struct ieee80211vap *vap) { uint32_t ostate; ostate = atomic_fetchadd_32(&vap->iv_com_state, -IEEE80211_COM_REF_ADD); KASSERT(_IEEE80211_MASKSHIFT(ostate, IEEE80211_COM_REF) != 0, ("com reference counter underflow")); (void) ostate; } void ieee80211_com_vdetach(struct ieee80211vap *vap) { int sleep_time; sleep_time = msecs_to_ticks(250); atomic_set_32(&vap->iv_com_state, IEEE80211_COM_DETACHED); while (_IEEE80211_MASKSHIFT(atomic_load_32(&vap->iv_com_state), IEEE80211_COM_REF) != 0) pause("comref", sleep_time); } int ieee80211_node_dectestref(struct ieee80211_node *ni) { /* XXX need equivalent of atomic_dec_and_test */ atomic_subtract_int(&ni->ni_refcnt, 1); return atomic_cmpset_int(&ni->ni_refcnt, 0, 1); } void ieee80211_drain_ifq(struct ifqueue *ifq) { struct ieee80211_node *ni; struct mbuf *m; for (;;) { IF_DEQUEUE(ifq, m); if (m == NULL) break; ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; KASSERT(ni != NULL, ("frame w/o node")); ieee80211_free_node(ni); m->m_pkthdr.rcvif = NULL; m_freem(m); } } void ieee80211_flush_ifq(struct ifqueue *ifq, struct ieee80211vap *vap) { struct ieee80211_node *ni; struct mbuf *m, **mprev; IF_LOCK(ifq); mprev = &ifq->ifq_head; while ((m = *mprev) != NULL) { ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; if (ni != NULL && ni->ni_vap == vap) { *mprev = m->m_nextpkt; /* remove from list */ ifq->ifq_len--; m_freem(m); ieee80211_free_node(ni); /* reclaim ref */ } else mprev = &m->m_nextpkt; } /* recalculate tail ptr */ m = ifq->ifq_head; for (; m != NULL && m->m_nextpkt != NULL; m = m->m_nextpkt) ; ifq->ifq_tail = m; IF_UNLOCK(ifq); } /* * As above, for mbufs allocated with m_gethdr/MGETHDR * or initialized by M_COPY_PKTHDR. */ #define MC_ALIGN(m, len) \ do { \ (m)->m_data += rounddown2(MCLBYTES - (len), sizeof(long)); \ } while (/* CONSTCOND */ 0) /* * Allocate and setup a management frame of the specified * size. We return the mbuf and a pointer to the start * of the contiguous data area that's been reserved based * on the packet length. The data area is forced to 32-bit * alignment and the buffer length to a multiple of 4 bytes. * This is done mainly so beacon frames (that require this) * can use this interface too. */ struct mbuf * ieee80211_getmgtframe(uint8_t **frm, int headroom, int pktlen) { struct mbuf *m; u_int len; /* * NB: we know the mbuf routines will align the data area * so we don't need to do anything special. */ len = roundup2(headroom + pktlen, 4); KASSERT(len <= MCLBYTES, ("802.11 mgt frame too large: %u", len)); if (len < MINCLSIZE) { m = m_gethdr(IEEE80211_M_NOWAIT, MT_DATA); /* * Align the data in case additional headers are added. * This should only happen when a WEP header is added * which only happens for shared key authentication mgt * frames which all fit in MHLEN. */ if (m != NULL) M_ALIGN(m, len); } else { m = m_getcl(IEEE80211_M_NOWAIT, MT_DATA, M_PKTHDR); if (m != NULL) MC_ALIGN(m, len); } if (m != NULL) { m->m_data += headroom; *frm = m->m_data; } return m; } #ifndef __NO_STRICT_ALIGNMENT /* * Re-align the payload in the mbuf. This is mainly used (right now) * to handle IP header alignment requirements on certain architectures. */ struct mbuf * ieee80211_realign(struct ieee80211vap *vap, struct mbuf *m, size_t align) { int pktlen, space; struct mbuf *n; pktlen = m->m_pkthdr.len; space = pktlen + align; if (space < MINCLSIZE) n = m_gethdr(IEEE80211_M_NOWAIT, MT_DATA); else { n = m_getjcl(IEEE80211_M_NOWAIT, MT_DATA, M_PKTHDR, space <= MCLBYTES ? MCLBYTES : #if MJUMPAGESIZE != MCLBYTES space <= MJUMPAGESIZE ? MJUMPAGESIZE : #endif space <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES); } if (__predict_true(n != NULL)) { m_move_pkthdr(n, m); n->m_data = (caddr_t)(ALIGN(n->m_data + align) - align); m_copydata(m, 0, pktlen, mtod(n, caddr_t)); n->m_len = pktlen; } else { IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY, mtod(m, const struct ieee80211_frame *), NULL, "%s", "no mbuf to realign"); vap->iv_stats.is_rx_badalign++; } m_freem(m); return n; } #endif /* !__NO_STRICT_ALIGNMENT */ int ieee80211_add_callback(struct mbuf *m, void (*func)(struct ieee80211_node *, void *, int), void *arg) { struct m_tag *mtag; struct ieee80211_cb *cb; mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_CALLBACK, sizeof(struct ieee80211_cb), IEEE80211_M_NOWAIT); if (mtag == NULL) return 0; cb = (struct ieee80211_cb *)(mtag+1); cb->func = func; cb->arg = arg; m_tag_prepend(m, mtag); m->m_flags |= M_TXCB; return 1; } int ieee80211_add_xmit_params(struct mbuf *m, const struct ieee80211_bpf_params *params) { struct m_tag *mtag; struct ieee80211_tx_params *tx; mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_XMIT_PARAMS, sizeof(struct ieee80211_tx_params), IEEE80211_M_NOWAIT); if (mtag == NULL) return (0); tx = (struct ieee80211_tx_params *)(mtag+1); memcpy(&tx->params, params, sizeof(struct ieee80211_bpf_params)); m_tag_prepend(m, mtag); return (1); } int ieee80211_get_xmit_params(struct mbuf *m, struct ieee80211_bpf_params *params) { struct m_tag *mtag; struct ieee80211_tx_params *tx; mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_XMIT_PARAMS, NULL); if (mtag == NULL) return (-1); tx = (struct ieee80211_tx_params *)(mtag + 1); memcpy(params, &tx->params, sizeof(struct ieee80211_bpf_params)); return (0); } void ieee80211_process_callback(struct ieee80211_node *ni, struct mbuf *m, int status) { struct m_tag *mtag; mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_CALLBACK, NULL); if (mtag != NULL) { struct ieee80211_cb *cb = (struct ieee80211_cb *)(mtag+1); cb->func(ni, cb->arg, status); } } /* * Add RX parameters to the given mbuf. * * Returns 1 if OK, 0 on error. */ int ieee80211_add_rx_params(struct mbuf *m, const struct ieee80211_rx_stats *rxs) { struct m_tag *mtag; struct ieee80211_rx_params *rx; mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_RECV_PARAMS, sizeof(struct ieee80211_rx_stats), IEEE80211_M_NOWAIT); if (mtag == NULL) return (0); rx = (struct ieee80211_rx_params *)(mtag + 1); memcpy(&rx->params, rxs, sizeof(*rxs)); m_tag_prepend(m, mtag); return (1); } int ieee80211_get_rx_params(struct mbuf *m, struct ieee80211_rx_stats *rxs) { struct m_tag *mtag; struct ieee80211_rx_params *rx; mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_RECV_PARAMS, NULL); if (mtag == NULL) return (-1); rx = (struct ieee80211_rx_params *)(mtag + 1); memcpy(rxs, &rx->params, sizeof(*rxs)); return (0); } const struct ieee80211_rx_stats * ieee80211_get_rx_params_ptr(struct mbuf *m) { struct m_tag *mtag; struct ieee80211_rx_params *rx; mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_RECV_PARAMS, NULL); if (mtag == NULL) return (NULL); rx = (struct ieee80211_rx_params *)(mtag + 1); return (&rx->params); } /* * Add TOA parameters to the given mbuf. */ int ieee80211_add_toa_params(struct mbuf *m, const struct ieee80211_toa_params *p) { struct m_tag *mtag; struct ieee80211_toa_params *rp; mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_TOA_PARAMS, sizeof(struct ieee80211_toa_params), IEEE80211_M_NOWAIT); if (mtag == NULL) return (0); rp = (struct ieee80211_toa_params *)(mtag + 1); memcpy(rp, p, sizeof(*rp)); m_tag_prepend(m, mtag); return (1); } int ieee80211_get_toa_params(struct mbuf *m, struct ieee80211_toa_params *p) { struct m_tag *mtag; struct ieee80211_toa_params *rp; mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_TOA_PARAMS, NULL); if (mtag == NULL) return (0); rp = (struct ieee80211_toa_params *)(mtag + 1); if (p != NULL) memcpy(p, rp, sizeof(*p)); return (1); } /* * Transmit a frame to the parent interface. */ int ieee80211_parent_xmitpkt(struct ieee80211com *ic, struct mbuf *m) { int error; /* * Assert the IC TX lock is held - this enforces the * processing -> queuing order is maintained */ IEEE80211_TX_LOCK_ASSERT(ic); error = ic->ic_transmit(ic, m); if (error) { struct ieee80211_node *ni; ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; /* XXX number of fragments */ if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); ieee80211_free_mbuf(m); } return (error); } /* * Transmit a frame to the VAP interface. */ int ieee80211_vap_xmitpkt(struct ieee80211vap *vap, struct mbuf *m) { struct ifnet *ifp = vap->iv_ifp; /* * When transmitting via the VAP, we shouldn't hold * any IC TX lock as the VAP TX path will acquire it. */ IEEE80211_TX_UNLOCK_ASSERT(vap->iv_ic); return (ifp->if_transmit(ifp, m)); } #include void net80211_get_random_bytes(void *p, size_t n) { uint8_t *dp = p; while (n > 0) { uint32_t v = arc4random(); size_t nb = n > sizeof(uint32_t) ? sizeof(uint32_t) : n; bcopy(&v, dp, n > sizeof(uint32_t) ? sizeof(uint32_t) : n); dp += sizeof(uint32_t), n -= nb; } } /* * Helper function for events that pass just a single mac address. */ static void notify_macaddr(struct ifnet *ifp, int op, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ieee80211_join_event iev; CURVNET_SET(ifp->if_vnet); memset(&iev, 0, sizeof(iev)); IEEE80211_ADDR_COPY(iev.iev_addr, mac); rt_ieee80211msg(ifp, op, &iev, sizeof(iev)); CURVNET_RESTORE(); } void ieee80211_notify_node_join(struct ieee80211_node *ni, int newassoc) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; CURVNET_SET_QUIET(ifp->if_vnet); IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%snode join", (ni == vap->iv_bss) ? "bss " : ""); if (ni == vap->iv_bss) { notify_macaddr(ifp, newassoc ? RTM_IEEE80211_ASSOC : RTM_IEEE80211_REASSOC, ni->ni_bssid); if_link_state_change(ifp, LINK_STATE_UP); } else { notify_macaddr(ifp, newassoc ? RTM_IEEE80211_JOIN : RTM_IEEE80211_REJOIN, ni->ni_macaddr); } CURVNET_RESTORE(); } void ieee80211_notify_node_leave(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; CURVNET_SET_QUIET(ifp->if_vnet); IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%snode leave", (ni == vap->iv_bss) ? "bss " : ""); if (ni == vap->iv_bss) { rt_ieee80211msg(ifp, RTM_IEEE80211_DISASSOC, NULL, 0); if_link_state_change(ifp, LINK_STATE_DOWN); } else { /* fire off wireless event station leaving */ notify_macaddr(ifp, RTM_IEEE80211_LEAVE, ni->ni_macaddr); } CURVNET_RESTORE(); } void ieee80211_notify_scan_done(struct ieee80211vap *vap) { struct ifnet *ifp = vap->iv_ifp; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s\n", "notify scan done"); /* dispatch wireless event indicating scan completed */ CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_SCAN, NULL, 0); CURVNET_RESTORE(); } void ieee80211_notify_replay_failure(struct ieee80211vap *vap, const struct ieee80211_frame *wh, const struct ieee80211_key *k, u_int64_t rsc, int tid) { struct ifnet *ifp = vap->iv_ifp; IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_CRYPTO, wh->i_addr2, "%s replay detected tid %d ", k->wk_cipher->ic_name, tid, (intmax_t) rsc, (intmax_t) rsc, (intmax_t) k->wk_keyrsc[tid], (intmax_t) k->wk_keyrsc[tid], k->wk_keyix, k->wk_rxkeyix); if (ifp != NULL) { /* NB: for cipher test modules */ struct ieee80211_replay_event iev; IEEE80211_ADDR_COPY(iev.iev_dst, wh->i_addr1); IEEE80211_ADDR_COPY(iev.iev_src, wh->i_addr2); iev.iev_cipher = k->wk_cipher->ic_cipher; if (k->wk_rxkeyix != IEEE80211_KEYIX_NONE) iev.iev_keyix = k->wk_rxkeyix; else iev.iev_keyix = k->wk_keyix; iev.iev_keyrsc = k->wk_keyrsc[tid]; iev.iev_rsc = rsc; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_REPLAY, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_michael_failure(struct ieee80211vap *vap, const struct ieee80211_frame *wh, u_int keyix) { struct ifnet *ifp = vap->iv_ifp; IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_CRYPTO, wh->i_addr2, "michael MIC verification failed ", keyix); vap->iv_stats.is_rx_tkipmic++; if (ifp != NULL) { /* NB: for cipher test modules */ struct ieee80211_michael_event iev; IEEE80211_ADDR_COPY(iev.iev_dst, wh->i_addr1); IEEE80211_ADDR_COPY(iev.iev_src, wh->i_addr2); iev.iev_cipher = IEEE80211_CIPHER_TKIP; iev.iev_keyix = keyix; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_MICHAEL, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_wds_discover(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; notify_macaddr(ifp, RTM_IEEE80211_WDS, ni->ni_macaddr); } void ieee80211_notify_csa(struct ieee80211com *ic, const struct ieee80211_channel *c, int mode, int count) { struct ieee80211_csa_event iev; struct ieee80211vap *vap; struct ifnet *ifp; memset(&iev, 0, sizeof(iev)); iev.iev_flags = c->ic_flags; iev.iev_freq = c->ic_freq; iev.iev_ieee = c->ic_ieee; iev.iev_mode = mode; iev.iev_count = count; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_CSA, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_radar(struct ieee80211com *ic, const struct ieee80211_channel *c) { struct ieee80211_radar_event iev; struct ieee80211vap *vap; struct ifnet *ifp; memset(&iev, 0, sizeof(iev)); iev.iev_flags = c->ic_flags; iev.iev_freq = c->ic_freq; iev.iev_ieee = c->ic_ieee; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_RADAR, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_cac(struct ieee80211com *ic, const struct ieee80211_channel *c, enum ieee80211_notify_cac_event type) { struct ieee80211_cac_event iev; struct ieee80211vap *vap; struct ifnet *ifp; memset(&iev, 0, sizeof(iev)); iev.iev_flags = c->ic_flags; iev.iev_freq = c->ic_freq; iev.iev_ieee = c->ic_ieee; iev.iev_type = type; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_CAC, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void ieee80211_notify_node_deauth(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%s", "node deauth"); notify_macaddr(ifp, RTM_IEEE80211_DEAUTH, ni->ni_macaddr); } void ieee80211_notify_node_auth(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ifnet *ifp = vap->iv_ifp; IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%s", "node auth"); notify_macaddr(ifp, RTM_IEEE80211_AUTH, ni->ni_macaddr); } void ieee80211_notify_country(struct ieee80211vap *vap, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t cc[2]) { struct ifnet *ifp = vap->iv_ifp; struct ieee80211_country_event iev; memset(&iev, 0, sizeof(iev)); IEEE80211_ADDR_COPY(iev.iev_addr, bssid); iev.iev_cc[0] = cc[0]; iev.iev_cc[1] = cc[1]; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_COUNTRY, &iev, sizeof(iev)); CURVNET_RESTORE(); } void ieee80211_notify_radio(struct ieee80211com *ic, int state) { struct ieee80211_radio_event iev; struct ieee80211vap *vap; struct ifnet *ifp; memset(&iev, 0, sizeof(iev)); iev.iev_state = state; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; CURVNET_SET(ifp->if_vnet); rt_ieee80211msg(ifp, RTM_IEEE80211_RADIO, &iev, sizeof(iev)); CURVNET_RESTORE(); } } void -ieee80211_notify_ifnet_change(struct ieee80211vap *vap) +ieee80211_notify_ifnet_change(struct ieee80211vap *vap, int if_flags_mask) { struct ifnet *ifp = vap->iv_ifp; IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG, "%s\n", "interface state change"); CURVNET_SET(ifp->if_vnet); - rt_ifmsg(ifp); + rt_ifmsg(ifp, if_flags_mask); CURVNET_RESTORE(); } void ieee80211_load_module(const char *modname) { #ifdef notyet (void)kern_kldload(curthread, modname, NULL); #else printf("%s: load the %s module by hand for now.\n", __func__, modname); #endif } static eventhandler_tag wlan_bpfevent; static eventhandler_tag wlan_ifllevent; static void bpf_track(void *arg, struct ifnet *ifp, int dlt, int attach) { /* NB: identify vap's by if_init */ if (dlt == DLT_IEEE802_11_RADIO && ifp->if_init == ieee80211_init) { struct ieee80211vap *vap = ifp->if_softc; /* * Track bpf radiotap listener state. We mark the vap * to indicate if any listener is present and the com * to indicate if any listener exists on any associated * vap. This flag is used by drivers to prepare radiotap * state only when needed. */ if (attach) { ieee80211_syncflag_ext(vap, IEEE80211_FEXT_BPF); if (vap->iv_opmode == IEEE80211_M_MONITOR) atomic_add_int(&vap->iv_ic->ic_montaps, 1); } else if (!bpf_peers_present(vap->iv_rawbpf)) { ieee80211_syncflag_ext(vap, -IEEE80211_FEXT_BPF); if (vap->iv_opmode == IEEE80211_M_MONITOR) atomic_subtract_int(&vap->iv_ic->ic_montaps, 1); } } } /* * Change MAC address on the vap (if was not started). */ static void wlan_iflladdr(void *arg __unused, struct ifnet *ifp) { /* NB: identify vap's by if_init */ if (ifp->if_init == ieee80211_init && (ifp->if_flags & IFF_UP) == 0) { struct ieee80211vap *vap = ifp->if_softc; IEEE80211_ADDR_COPY(vap->iv_myaddr, IF_LLADDR(ifp)); } } /* * Fetch the VAP name. * * This returns a const char pointer suitable for debugging, * but don't expect it to stick around for much longer. */ const char * ieee80211_get_vap_ifname(struct ieee80211vap *vap) { if (vap->iv_ifp == NULL) return "(none)"; return vap->iv_ifp->if_xname; } #ifdef DEBUGNET static void ieee80211_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct ieee80211vap *vap; struct ieee80211com *ic; vap = if_getsoftc(ifp); ic = vap->iv_ic; IEEE80211_LOCK(ic); ic->ic_debugnet_meth->dn8_init(ic, nrxr, ncl, clsize); IEEE80211_UNLOCK(ic); } static void ieee80211_debugnet_event(struct ifnet *ifp, enum debugnet_ev ev) { struct ieee80211vap *vap; struct ieee80211com *ic; vap = if_getsoftc(ifp); ic = vap->iv_ic; IEEE80211_LOCK(ic); ic->ic_debugnet_meth->dn8_event(ic, ev); IEEE80211_UNLOCK(ic); } static int ieee80211_debugnet_transmit(struct ifnet *ifp, struct mbuf *m) { return (ieee80211_vap_transmit(ifp, m)); } static int ieee80211_debugnet_poll(struct ifnet *ifp, int count) { struct ieee80211vap *vap; struct ieee80211com *ic; vap = if_getsoftc(ifp); ic = vap->iv_ic; return (ic->ic_debugnet_meth->dn8_poll(ic, count)); } #endif /* * Module glue. * * NB: the module name is "wlan" for compatibility with NetBSD. */ static int wlan_modevent(module_t mod, int type, void *unused) { switch (type) { case MOD_LOAD: if (bootverbose) printf("wlan: <802.11 Link Layer>\n"); wlan_bpfevent = EVENTHANDLER_REGISTER(bpf_track, bpf_track, 0, EVENTHANDLER_PRI_ANY); wlan_ifllevent = EVENTHANDLER_REGISTER(iflladdr_event, wlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); struct if_clone_addreq req = { .create_f = wlan_clone_create, .destroy_f = wlan_clone_destroy, .flags = IFC_F_AUTOUNIT, }; wlan_cloner = ifc_attach_cloner(wlanname, &req); return 0; case MOD_UNLOAD: ifc_detach_cloner(wlan_cloner); EVENTHANDLER_DEREGISTER(bpf_track, wlan_bpfevent); EVENTHANDLER_DEREGISTER(iflladdr_event, wlan_ifllevent); return 0; } return EINVAL; } static moduledata_t wlan_mod = { wlanname, wlan_modevent, 0 }; DECLARE_MODULE(wlan, wlan_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); MODULE_VERSION(wlan, 1); MODULE_DEPEND(wlan, ether, 1, 1, 1); #ifdef IEEE80211_ALQ MODULE_DEPEND(wlan, alq, 1, 1, 1); #endif /* IEEE80211_ALQ */ diff --git a/sys/net80211/ieee80211_proto.c b/sys/net80211/ieee80211_proto.c index 87fed05a281c..7e76a3ae9226 100644 --- a/sys/net80211/ieee80211_proto.c +++ b/sys/net80211/ieee80211_proto.c @@ -1,2825 +1,2825 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 Atsushi Onoe * Copyright (c) 2002-2008 Sam Leffler, Errno Consulting * Copyright (c) 2012 IEEE * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * IEEE 802.11 protocol support. */ #include "opt_inet.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include /* XXX for ether_sprintf */ #include #include #include #include #include #ifdef IEEE80211_SUPPORT_MESH #include #endif #include #include /* XXX tunables */ #define AGGRESSIVE_MODE_SWITCH_HYSTERESIS 3 /* pkts / 100ms */ #define HIGH_PRI_SWITCH_THRESH 10 /* pkts / 100ms */ const char *mgt_subtype_name[] = { "assoc_req", "assoc_resp", "reassoc_req", "reassoc_resp", "probe_req", "probe_resp", "timing_adv", "reserved#7", "beacon", "atim", "disassoc", "auth", "deauth", "action", "action_noack", "reserved#15" }; const char *ctl_subtype_name[] = { "reserved#0", "reserved#1", "reserved#2", "reserved#3", "reserved#4", "reserved#5", "reserved#6", "control_wrap", "bar", "ba", "ps_poll", "rts", "cts", "ack", "cf_end", "cf_end_ack" }; const char *ieee80211_opmode_name[IEEE80211_OPMODE_MAX] = { "IBSS", /* IEEE80211_M_IBSS */ "STA", /* IEEE80211_M_STA */ "WDS", /* IEEE80211_M_WDS */ "AHDEMO", /* IEEE80211_M_AHDEMO */ "HOSTAP", /* IEEE80211_M_HOSTAP */ "MONITOR", /* IEEE80211_M_MONITOR */ "MBSS" /* IEEE80211_M_MBSS */ }; const char *ieee80211_state_name[IEEE80211_S_MAX] = { "INIT", /* IEEE80211_S_INIT */ "SCAN", /* IEEE80211_S_SCAN */ "AUTH", /* IEEE80211_S_AUTH */ "ASSOC", /* IEEE80211_S_ASSOC */ "CAC", /* IEEE80211_S_CAC */ "RUN", /* IEEE80211_S_RUN */ "CSA", /* IEEE80211_S_CSA */ "SLEEP", /* IEEE80211_S_SLEEP */ }; const char *ieee80211_wme_acnames[] = { "WME_AC_BE", "WME_AC_BK", "WME_AC_VI", "WME_AC_VO", "WME_UPSD", }; /* * Reason code descriptions were (mostly) obtained from * IEEE Std 802.11-2012, pp. 442-445 Table 8-36. */ const char * ieee80211_reason_to_string(uint16_t reason) { switch (reason) { case IEEE80211_REASON_UNSPECIFIED: return ("unspecified"); case IEEE80211_REASON_AUTH_EXPIRE: return ("previous authentication is expired"); case IEEE80211_REASON_AUTH_LEAVE: return ("sending STA is leaving/has left IBSS or ESS"); case IEEE80211_REASON_ASSOC_EXPIRE: return ("disassociated due to inactivity"); case IEEE80211_REASON_ASSOC_TOOMANY: return ("too many associated STAs"); case IEEE80211_REASON_NOT_AUTHED: return ("class 2 frame received from nonauthenticated STA"); case IEEE80211_REASON_NOT_ASSOCED: return ("class 3 frame received from nonassociated STA"); case IEEE80211_REASON_ASSOC_LEAVE: return ("sending STA is leaving/has left BSS"); case IEEE80211_REASON_ASSOC_NOT_AUTHED: return ("STA requesting (re)association is not authenticated"); case IEEE80211_REASON_DISASSOC_PWRCAP_BAD: return ("information in the Power Capability element is " "unacceptable"); case IEEE80211_REASON_DISASSOC_SUPCHAN_BAD: return ("information in the Supported Channels element is " "unacceptable"); case IEEE80211_REASON_IE_INVALID: return ("invalid element"); case IEEE80211_REASON_MIC_FAILURE: return ("MIC failure"); case IEEE80211_REASON_4WAY_HANDSHAKE_TIMEOUT: return ("4-Way handshake timeout"); case IEEE80211_REASON_GROUP_KEY_UPDATE_TIMEOUT: return ("group key update timeout"); case IEEE80211_REASON_IE_IN_4WAY_DIFFERS: return ("element in 4-Way handshake different from " "(re)association request/probe response/beacon frame"); case IEEE80211_REASON_GROUP_CIPHER_INVALID: return ("invalid group cipher"); case IEEE80211_REASON_PAIRWISE_CIPHER_INVALID: return ("invalid pairwise cipher"); case IEEE80211_REASON_AKMP_INVALID: return ("invalid AKMP"); case IEEE80211_REASON_UNSUPP_RSN_IE_VERSION: return ("unsupported version in RSN IE"); case IEEE80211_REASON_INVALID_RSN_IE_CAP: return ("invalid capabilities in RSN IE"); case IEEE80211_REASON_802_1X_AUTH_FAILED: return ("IEEE 802.1X authentication failed"); case IEEE80211_REASON_CIPHER_SUITE_REJECTED: return ("cipher suite rejected because of the security " "policy"); case IEEE80211_REASON_UNSPECIFIED_QOS: return ("unspecified (QoS-related)"); case IEEE80211_REASON_INSUFFICIENT_BW: return ("QoS AP lacks sufficient bandwidth for this QoS STA"); case IEEE80211_REASON_TOOMANY_FRAMES: return ("too many frames need to be acknowledged"); case IEEE80211_REASON_OUTSIDE_TXOP: return ("STA is transmitting outside the limits of its TXOPs"); case IEEE80211_REASON_LEAVING_QBSS: return ("requested from peer STA (the STA is " "resetting/leaving the BSS)"); case IEEE80211_REASON_BAD_MECHANISM: return ("requested from peer STA (it does not want to use " "the mechanism)"); case IEEE80211_REASON_SETUP_NEEDED: return ("requested from peer STA (setup is required for the " "used mechanism)"); case IEEE80211_REASON_TIMEOUT: return ("requested from peer STA (timeout)"); case IEEE80211_REASON_PEER_LINK_CANCELED: return ("SME cancels the mesh peering instance (not related " "to the maximum number of peer mesh STAs)"); case IEEE80211_REASON_MESH_MAX_PEERS: return ("maximum number of peer mesh STAs was reached"); case IEEE80211_REASON_MESH_CPVIOLATION: return ("the received information violates the Mesh " "Configuration policy configured in the mesh STA " "profile"); case IEEE80211_REASON_MESH_CLOSE_RCVD: return ("the mesh STA has received a Mesh Peering Close " "message requesting to close the mesh peering"); case IEEE80211_REASON_MESH_MAX_RETRIES: return ("the mesh STA has resent dot11MeshMaxRetries Mesh " "Peering Open messages, without receiving a Mesh " "Peering Confirm message"); case IEEE80211_REASON_MESH_CONFIRM_TIMEOUT: return ("the confirmTimer for the mesh peering instance times " "out"); case IEEE80211_REASON_MESH_INVALID_GTK: return ("the mesh STA fails to unwrap the GTK or the values " "in the wrapped contents do not match"); case IEEE80211_REASON_MESH_INCONS_PARAMS: return ("the mesh STA receives inconsistent information about " "the mesh parameters between Mesh Peering Management " "frames"); case IEEE80211_REASON_MESH_INVALID_SECURITY: return ("the mesh STA fails the authenticated mesh peering " "exchange because due to failure in selecting " "pairwise/group ciphersuite"); case IEEE80211_REASON_MESH_PERR_NO_PROXY: return ("the mesh STA does not have proxy information for " "this external destination"); case IEEE80211_REASON_MESH_PERR_NO_FI: return ("the mesh STA does not have forwarding information " "for this destination"); case IEEE80211_REASON_MESH_PERR_DEST_UNREACH: return ("the mesh STA determines that the link to the next " "hop of an active path in its forwarding information " "is no longer usable"); case IEEE80211_REASON_MESH_MAC_ALRDY_EXISTS_MBSS: return ("the MAC address of the STA already exists in the " "mesh BSS"); case IEEE80211_REASON_MESH_CHAN_SWITCH_REG: return ("the mesh STA performs channel switch to meet " "regulatory requirements"); case IEEE80211_REASON_MESH_CHAN_SWITCH_UNSPEC: return ("the mesh STA performs channel switch with " "unspecified reason"); default: return ("reserved/unknown"); } } static void beacon_miss(void *, int); static void beacon_swmiss(void *, int); static void parent_updown(void *, int); static void update_mcast(void *, int); static void update_promisc(void *, int); static void update_channel(void *, int); static void update_chw(void *, int); static void vap_update_wme(void *, int); static void vap_update_slot(void *, int); static void restart_vaps(void *, int); static void vap_update_erp_protmode(void *, int); static void vap_update_preamble(void *, int); static void vap_update_ht_protmode(void *, int); static void ieee80211_newstate_cb(void *, int); static struct ieee80211_node *vap_update_bss(struct ieee80211vap *, struct ieee80211_node *); static int null_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { ic_printf(ni->ni_ic, "missing ic_raw_xmit callback, drop frame\n"); m_freem(m); return ENETDOWN; } void ieee80211_proto_attach(struct ieee80211com *ic) { uint8_t hdrlen; /* override the 802.3 setting */ hdrlen = ic->ic_headroom + sizeof(struct ieee80211_qosframe_addr4) + IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN + IEEE80211_WEP_EXTIVLEN; /* XXX no way to recalculate on ifdetach */ max_linkhdr_grow(ALIGN(hdrlen)); //ic->ic_protmode = IEEE80211_PROT_CTSONLY; TASK_INIT(&ic->ic_parent_task, 0, parent_updown, ic); TASK_INIT(&ic->ic_mcast_task, 0, update_mcast, ic); TASK_INIT(&ic->ic_promisc_task, 0, update_promisc, ic); TASK_INIT(&ic->ic_chan_task, 0, update_channel, ic); TASK_INIT(&ic->ic_bmiss_task, 0, beacon_miss, ic); TASK_INIT(&ic->ic_chw_task, 0, update_chw, ic); TASK_INIT(&ic->ic_restart_task, 0, restart_vaps, ic); ic->ic_wme.wme_hipri_switch_hysteresis = AGGRESSIVE_MODE_SWITCH_HYSTERESIS; /* initialize management frame handlers */ ic->ic_send_mgmt = ieee80211_send_mgmt; ic->ic_raw_xmit = null_raw_xmit; ieee80211_adhoc_attach(ic); ieee80211_sta_attach(ic); ieee80211_wds_attach(ic); ieee80211_hostap_attach(ic); #ifdef IEEE80211_SUPPORT_MESH ieee80211_mesh_attach(ic); #endif ieee80211_monitor_attach(ic); } void ieee80211_proto_detach(struct ieee80211com *ic) { ieee80211_monitor_detach(ic); #ifdef IEEE80211_SUPPORT_MESH ieee80211_mesh_detach(ic); #endif ieee80211_hostap_detach(ic); ieee80211_wds_detach(ic); ieee80211_adhoc_detach(ic); ieee80211_sta_detach(ic); } static void null_update_beacon(struct ieee80211vap *vap, int item) { } void ieee80211_proto_vattach(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ifnet *ifp = vap->iv_ifp; int i; /* override the 802.3 setting */ ifp->if_hdrlen = ic->ic_headroom + sizeof(struct ieee80211_qosframe_addr4) + IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN + IEEE80211_WEP_EXTIVLEN; vap->iv_rtsthreshold = IEEE80211_RTS_DEFAULT; vap->iv_fragthreshold = IEEE80211_FRAG_DEFAULT; vap->iv_bmiss_max = IEEE80211_BMISS_MAX; callout_init_mtx(&vap->iv_swbmiss, IEEE80211_LOCK_OBJ(ic), 0); callout_init(&vap->iv_mgtsend, 1); TASK_INIT(&vap->iv_nstate_task, 0, ieee80211_newstate_cb, vap); TASK_INIT(&vap->iv_swbmiss_task, 0, beacon_swmiss, vap); TASK_INIT(&vap->iv_wme_task, 0, vap_update_wme, vap); TASK_INIT(&vap->iv_slot_task, 0, vap_update_slot, vap); TASK_INIT(&vap->iv_erp_protmode_task, 0, vap_update_erp_protmode, vap); TASK_INIT(&vap->iv_ht_protmode_task, 0, vap_update_ht_protmode, vap); TASK_INIT(&vap->iv_preamble_task, 0, vap_update_preamble, vap); /* * Install default tx rate handling: no fixed rate, lowest * supported rate for mgmt and multicast frames. Default * max retry count. These settings can be changed by the * driver and/or user applications. */ for (i = IEEE80211_MODE_11A; i < IEEE80211_MODE_MAX; i++) { if (isclr(ic->ic_modecaps, i)) continue; const struct ieee80211_rateset *rs = &ic->ic_sup_rates[i]; vap->iv_txparms[i].ucastrate = IEEE80211_FIXED_RATE_NONE; /* * Setting the management rate to MCS 0 assumes that the * BSS Basic rate set is empty and the BSS Basic MCS set * is not. * * Since we're not checking this, default to the lowest * defined rate for this mode. * * At least one 11n AP (DLINK DIR-825) is reported to drop * some MCS management traffic (eg BA response frames.) * * See also: 9.6.0 of the 802.11n-2009 specification. */ #ifdef NOTYET if (i == IEEE80211_MODE_11NA || i == IEEE80211_MODE_11NG) { vap->iv_txparms[i].mgmtrate = 0 | IEEE80211_RATE_MCS; vap->iv_txparms[i].mcastrate = 0 | IEEE80211_RATE_MCS; } else { vap->iv_txparms[i].mgmtrate = rs->rs_rates[0] & IEEE80211_RATE_VAL; vap->iv_txparms[i].mcastrate = rs->rs_rates[0] & IEEE80211_RATE_VAL; } #endif vap->iv_txparms[i].mgmtrate = rs->rs_rates[0] & IEEE80211_RATE_VAL; vap->iv_txparms[i].mcastrate = rs->rs_rates[0] & IEEE80211_RATE_VAL; vap->iv_txparms[i].maxretry = IEEE80211_TXMAX_DEFAULT; } vap->iv_roaming = IEEE80211_ROAMING_AUTO; vap->iv_update_beacon = null_update_beacon; vap->iv_deliver_data = ieee80211_deliver_data; vap->iv_protmode = IEEE80211_PROT_CTSONLY; vap->iv_update_bss = vap_update_bss; /* attach support for operating mode */ ic->ic_vattach[vap->iv_opmode](vap); } void ieee80211_proto_vdetach(struct ieee80211vap *vap) { #define FREEAPPIE(ie) do { \ if (ie != NULL) \ IEEE80211_FREE(ie, M_80211_NODE_IE); \ } while (0) /* * Detach operating mode module. */ if (vap->iv_opdetach != NULL) vap->iv_opdetach(vap); /* * This should not be needed as we detach when reseting * the state but be conservative here since the * authenticator may do things like spawn kernel threads. */ if (vap->iv_auth->ia_detach != NULL) vap->iv_auth->ia_detach(vap); /* * Detach any ACL'ator. */ if (vap->iv_acl != NULL) vap->iv_acl->iac_detach(vap); FREEAPPIE(vap->iv_appie_beacon); FREEAPPIE(vap->iv_appie_probereq); FREEAPPIE(vap->iv_appie_proberesp); FREEAPPIE(vap->iv_appie_assocreq); FREEAPPIE(vap->iv_appie_assocresp); FREEAPPIE(vap->iv_appie_wpa); #undef FREEAPPIE } /* * Simple-minded authenticator module support. */ #define IEEE80211_AUTH_MAX (IEEE80211_AUTH_WPA+1) /* XXX well-known names */ static const char *auth_modnames[IEEE80211_AUTH_MAX] = { "wlan_internal", /* IEEE80211_AUTH_NONE */ "wlan_internal", /* IEEE80211_AUTH_OPEN */ "wlan_internal", /* IEEE80211_AUTH_SHARED */ "wlan_xauth", /* IEEE80211_AUTH_8021X */ "wlan_internal", /* IEEE80211_AUTH_AUTO */ "wlan_xauth", /* IEEE80211_AUTH_WPA */ }; static const struct ieee80211_authenticator *authenticators[IEEE80211_AUTH_MAX]; static const struct ieee80211_authenticator auth_internal = { .ia_name = "wlan_internal", .ia_attach = NULL, .ia_detach = NULL, .ia_node_join = NULL, .ia_node_leave = NULL, }; /* * Setup internal authenticators once; they are never unregistered. */ static void ieee80211_auth_setup(void) { ieee80211_authenticator_register(IEEE80211_AUTH_OPEN, &auth_internal); ieee80211_authenticator_register(IEEE80211_AUTH_SHARED, &auth_internal); ieee80211_authenticator_register(IEEE80211_AUTH_AUTO, &auth_internal); } SYSINIT(wlan_auth, SI_SUB_DRIVERS, SI_ORDER_FIRST, ieee80211_auth_setup, NULL); const struct ieee80211_authenticator * ieee80211_authenticator_get(int auth) { if (auth >= IEEE80211_AUTH_MAX) return NULL; if (authenticators[auth] == NULL) ieee80211_load_module(auth_modnames[auth]); return authenticators[auth]; } void ieee80211_authenticator_register(int type, const struct ieee80211_authenticator *auth) { if (type >= IEEE80211_AUTH_MAX) return; authenticators[type] = auth; } void ieee80211_authenticator_unregister(int type) { if (type >= IEEE80211_AUTH_MAX) return; authenticators[type] = NULL; } /* * Very simple-minded ACL module support. */ /* XXX just one for now */ static const struct ieee80211_aclator *acl = NULL; void ieee80211_aclator_register(const struct ieee80211_aclator *iac) { printf("wlan: %s acl policy registered\n", iac->iac_name); acl = iac; } void ieee80211_aclator_unregister(const struct ieee80211_aclator *iac) { if (acl == iac) acl = NULL; printf("wlan: %s acl policy unregistered\n", iac->iac_name); } const struct ieee80211_aclator * ieee80211_aclator_get(const char *name) { if (acl == NULL) ieee80211_load_module("wlan_acl"); return acl != NULL && strcmp(acl->iac_name, name) == 0 ? acl : NULL; } void ieee80211_print_essid(const uint8_t *essid, int len) { const uint8_t *p; int i; if (len > IEEE80211_NWID_LEN) len = IEEE80211_NWID_LEN; /* determine printable or not */ for (i = 0, p = essid; i < len; i++, p++) { if (*p < ' ' || *p > 0x7e) break; } if (i == len) { printf("\""); for (i = 0, p = essid; i < len; i++, p++) printf("%c", *p); printf("\""); } else { printf("0x"); for (i = 0, p = essid; i < len; i++, p++) printf("%02x", *p); } } void ieee80211_dump_pkt(struct ieee80211com *ic, const uint8_t *buf, int len, int rate, int rssi) { const struct ieee80211_frame *wh; int i; wh = (const struct ieee80211_frame *)buf; switch (wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) { case IEEE80211_FC1_DIR_NODS: printf("NODS %s", ether_sprintf(wh->i_addr2)); printf("->%s", ether_sprintf(wh->i_addr1)); printf("(%s)", ether_sprintf(wh->i_addr3)); break; case IEEE80211_FC1_DIR_TODS: printf("TODS %s", ether_sprintf(wh->i_addr2)); printf("->%s", ether_sprintf(wh->i_addr3)); printf("(%s)", ether_sprintf(wh->i_addr1)); break; case IEEE80211_FC1_DIR_FROMDS: printf("FRDS %s", ether_sprintf(wh->i_addr3)); printf("->%s", ether_sprintf(wh->i_addr1)); printf("(%s)", ether_sprintf(wh->i_addr2)); break; case IEEE80211_FC1_DIR_DSTODS: printf("DSDS %s", ether_sprintf((const uint8_t *)&wh[1])); printf("->%s", ether_sprintf(wh->i_addr3)); printf("(%s", ether_sprintf(wh->i_addr2)); printf("->%s)", ether_sprintf(wh->i_addr1)); break; } switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) { case IEEE80211_FC0_TYPE_DATA: printf(" data"); break; case IEEE80211_FC0_TYPE_MGT: printf(" %s", ieee80211_mgt_subtype_name(wh->i_fc[0])); break; default: printf(" type#%d", wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK); break; } if (IEEE80211_QOS_HAS_SEQ(wh)) { const struct ieee80211_qosframe *qwh = (const struct ieee80211_qosframe *)buf; printf(" QoS [TID %u%s]", qwh->i_qos[0] & IEEE80211_QOS_TID, qwh->i_qos[0] & IEEE80211_QOS_ACKPOLICY ? " ACM" : ""); } if (IEEE80211_IS_PROTECTED(wh)) { int off; off = ieee80211_anyhdrspace(ic, wh); printf(" WEP [IV %.02x %.02x %.02x", buf[off+0], buf[off+1], buf[off+2]); if (buf[off+IEEE80211_WEP_IVLEN] & IEEE80211_WEP_EXTIV) printf(" %.02x %.02x %.02x", buf[off+4], buf[off+5], buf[off+6]); printf(" KID %u]", buf[off+IEEE80211_WEP_IVLEN] >> 6); } if (rate >= 0) printf(" %dM", rate / 2); if (rssi >= 0) printf(" +%d", rssi); printf("\n"); if (len > 0) { for (i = 0; i < len; i++) { if ((i & 1) == 0) printf(" "); printf("%02x", buf[i]); } printf("\n"); } } static __inline int findrix(const struct ieee80211_rateset *rs, int r) { int i; for (i = 0; i < rs->rs_nrates; i++) if ((rs->rs_rates[i] & IEEE80211_RATE_VAL) == r) return i; return -1; } int ieee80211_fix_rate(struct ieee80211_node *ni, struct ieee80211_rateset *nrs, int flags) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; int i, j, rix, error; int okrate, badrate, fixedrate, ucastrate; const struct ieee80211_rateset *srs; uint8_t r; error = 0; okrate = badrate = 0; ucastrate = vap->iv_txparms[ieee80211_chan2mode(ni->ni_chan)].ucastrate; if (ucastrate != IEEE80211_FIXED_RATE_NONE) { /* * Workaround awkwardness with fixed rate. We are called * to check both the legacy rate set and the HT rate set * but we must apply any legacy fixed rate check only to the * legacy rate set and vice versa. We cannot tell what type * of rate set we've been given (legacy or HT) but we can * distinguish the fixed rate type (MCS have 0x80 set). * So to deal with this the caller communicates whether to * check MCS or legacy rate using the flags and we use the * type of any fixed rate to avoid applying an MCS to a * legacy rate and vice versa. */ if (ucastrate & 0x80) { if (flags & IEEE80211_F_DOFRATE) flags &= ~IEEE80211_F_DOFRATE; } else if ((ucastrate & 0x80) == 0) { if (flags & IEEE80211_F_DOFMCS) flags &= ~IEEE80211_F_DOFMCS; } /* NB: required to make MCS match below work */ ucastrate &= IEEE80211_RATE_VAL; } fixedrate = IEEE80211_FIXED_RATE_NONE; /* * XXX we are called to process both MCS and legacy rates; * we must use the appropriate basic rate set or chaos will * ensue; for now callers that want MCS must supply * IEEE80211_F_DOBRS; at some point we'll need to split this * function so there are two variants, one for MCS and one * for legacy rates. */ if (flags & IEEE80211_F_DOBRS) srs = (const struct ieee80211_rateset *) ieee80211_get_suphtrates(ic, ni->ni_chan); else srs = ieee80211_get_suprates(ic, ni->ni_chan); for (i = 0; i < nrs->rs_nrates; ) { if (flags & IEEE80211_F_DOSORT) { /* * Sort rates. */ for (j = i + 1; j < nrs->rs_nrates; j++) { if (IEEE80211_RV(nrs->rs_rates[i]) > IEEE80211_RV(nrs->rs_rates[j])) { r = nrs->rs_rates[i]; nrs->rs_rates[i] = nrs->rs_rates[j]; nrs->rs_rates[j] = r; } } } r = nrs->rs_rates[i] & IEEE80211_RATE_VAL; badrate = r; /* * Check for fixed rate. */ if (r == ucastrate) fixedrate = r; /* * Check against supported rates. */ rix = findrix(srs, r); if (flags & IEEE80211_F_DONEGO) { if (rix < 0) { /* * A rate in the node's rate set is not * supported. If this is a basic rate and we * are operating as a STA then this is an error. * Otherwise we just discard/ignore the rate. */ if ((flags & IEEE80211_F_JOIN) && (nrs->rs_rates[i] & IEEE80211_RATE_BASIC)) error++; } else if ((flags & IEEE80211_F_JOIN) == 0) { /* * Overwrite with the supported rate * value so any basic rate bit is set. */ nrs->rs_rates[i] = srs->rs_rates[rix]; } } if ((flags & IEEE80211_F_DODEL) && rix < 0) { /* * Delete unacceptable rates. */ nrs->rs_nrates--; for (j = i; j < nrs->rs_nrates; j++) nrs->rs_rates[j] = nrs->rs_rates[j + 1]; nrs->rs_rates[j] = 0; continue; } if (rix >= 0) okrate = nrs->rs_rates[i]; i++; } if (okrate == 0 || error != 0 || ((flags & (IEEE80211_F_DOFRATE|IEEE80211_F_DOFMCS)) && fixedrate != ucastrate)) { IEEE80211_NOTE(vap, IEEE80211_MSG_XRATE | IEEE80211_MSG_11N, ni, "%s: flags 0x%x okrate %d error %d fixedrate 0x%x " "ucastrate %x\n", __func__, fixedrate, ucastrate, flags); return badrate | IEEE80211_RATE_BASIC; } else return IEEE80211_RV(okrate); } /* * Reset 11g-related state. * * This is for per-VAP ERP/11g state. * * Eventually everything in ieee80211_reset_erp() will be * per-VAP and in here. */ void ieee80211_vap_reset_erp(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; vap->iv_nonerpsta = 0; vap->iv_longslotsta = 0; vap->iv_flags &= ~IEEE80211_F_USEPROT; /* * Set short preamble and ERP barker-preamble flags. */ if (IEEE80211_IS_CHAN_A(ic->ic_curchan) || (vap->iv_caps & IEEE80211_C_SHPREAMBLE)) { vap->iv_flags |= IEEE80211_F_SHPREAMBLE; vap->iv_flags &= ~IEEE80211_F_USEBARKER; } else { vap->iv_flags &= ~IEEE80211_F_SHPREAMBLE; vap->iv_flags |= IEEE80211_F_USEBARKER; } /* * Short slot time is enabled only when operating in 11g * and not in an IBSS. We must also honor whether or not * the driver is capable of doing it. */ ieee80211_vap_set_shortslottime(vap, IEEE80211_IS_CHAN_A(ic->ic_curchan) || IEEE80211_IS_CHAN_HT(ic->ic_curchan) || (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan) && vap->iv_opmode == IEEE80211_M_HOSTAP && (ic->ic_caps & IEEE80211_C_SHSLOT))); } /* * Reset 11g-related state. * * Note this resets the global state and a caller should schedule * a re-check of all the VAPs after setup to update said state. */ void ieee80211_reset_erp(struct ieee80211com *ic) { #if 0 ic->ic_flags &= ~IEEE80211_F_USEPROT; /* * Set short preamble and ERP barker-preamble flags. */ if (IEEE80211_IS_CHAN_A(ic->ic_curchan) || (ic->ic_caps & IEEE80211_C_SHPREAMBLE)) { ic->ic_flags |= IEEE80211_F_SHPREAMBLE; ic->ic_flags &= ~IEEE80211_F_USEBARKER; } else { ic->ic_flags &= ~IEEE80211_F_SHPREAMBLE; ic->ic_flags |= IEEE80211_F_USEBARKER; } #endif /* XXX TODO: schedule a new per-VAP ERP calculation */ } static struct ieee80211_node * vap_update_bss(struct ieee80211vap *vap, struct ieee80211_node *ni) { struct ieee80211_node *obss; obss = vap->iv_bss; vap->iv_bss = ni; return (obss); } /* * Deferred slot time update. * * For per-VAP slot time configuration, call the VAP * method if the VAP requires it. Otherwise, just call the * older global method. * * If the per-VAP method is called then it's expected that * the driver/firmware will take care of turning the per-VAP * flags into slot time configuration. * * If the per-VAP method is not called then the global flags will be * flipped into sync with the VAPs; ic_flags IEEE80211_F_SHSLOT will * be set only if all of the vaps will have it set. * * Look at the comments for vap_update_erp_protmode() for more * background; this assumes all VAPs are on the same channel. */ static void vap_update_slot(void *arg, int npending) { struct ieee80211vap *vap = arg; struct ieee80211com *ic = vap->iv_ic; struct ieee80211vap *iv; int num_shslot = 0, num_lgslot = 0; /* * Per-VAP path - we've already had the flags updated; * so just notify the driver and move on. */ if (vap->iv_updateslot != NULL) { vap->iv_updateslot(vap); return; } /* * Iterate over all of the VAP flags to update the * global flag. * * If all vaps have short slot enabled then flip on * short slot. If any vap has it disabled then * we leave it globally disabled. This should provide * correct behaviour in a multi-BSS scenario where * at least one VAP has short slot disabled for some * reason. */ IEEE80211_LOCK(ic); TAILQ_FOREACH(iv, &ic->ic_vaps, iv_next) { if (iv->iv_flags & IEEE80211_F_SHSLOT) num_shslot++; else num_lgslot++; } /* * It looks backwards but - if the number of short slot VAPs * is zero then we're not short slot. Else, we have one * or more short slot VAPs and we're checking to see if ANY * of them have short slot disabled. */ if (num_shslot == 0) ic->ic_flags &= ~IEEE80211_F_SHSLOT; else if (num_lgslot == 0) ic->ic_flags |= IEEE80211_F_SHSLOT; IEEE80211_UNLOCK(ic); /* * Call the driver with our new global slot time flags. */ if (ic->ic_updateslot != NULL) ic->ic_updateslot(ic); } /* * Deferred ERP protmode update. * * This currently calculates the global ERP protection mode flag * based on each of the VAPs. Any VAP with it enabled is enough * for the global flag to be enabled. All VAPs with it disabled * is enough for it to be disabled. * * This may make sense right now for the supported hardware where * net80211 is controlling the single channel configuration, but * offload firmware that's doing channel changes (eg off-channel * TDLS, off-channel STA, off-channel P2P STA/AP) may get some * silly looking flag updates. * * Ideally the protection mode calculation is done based on the * channel, and all VAPs using that channel will inherit it. * But until that's what net80211 does, this wil have to do. */ static void vap_update_erp_protmode(void *arg, int npending) { struct ieee80211vap *vap = arg; struct ieee80211com *ic = vap->iv_ic; struct ieee80211vap *iv; int enable_protmode = 0; int non_erp_present = 0; /* * Iterate over all of the VAPs to calculate the overlapping * ERP protection mode configuration and ERP present math. * * For now we assume that if a driver can handle this per-VAP * then it'll ignore the ic->ic_protmode variant and instead * will look at the vap related flags. */ IEEE80211_LOCK(ic); TAILQ_FOREACH(iv, &ic->ic_vaps, iv_next) { if (iv->iv_flags & IEEE80211_F_USEPROT) enable_protmode = 1; if (iv->iv_flags_ext & IEEE80211_FEXT_NONERP_PR) non_erp_present = 1; } if (enable_protmode) ic->ic_flags |= IEEE80211_F_USEPROT; else ic->ic_flags &= ~IEEE80211_F_USEPROT; if (non_erp_present) ic->ic_flags_ext |= IEEE80211_FEXT_NONERP_PR; else ic->ic_flags_ext &= ~IEEE80211_FEXT_NONERP_PR; /* Beacon update on all VAPs */ ieee80211_notify_erp_locked(ic); IEEE80211_UNLOCK(ic); IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG, "%s: called; enable_protmode=%d, non_erp_present=%d\n", __func__, enable_protmode, non_erp_present); /* * Now that the global configuration flags are calculated, * notify the VAP about its configuration. * * The global flags will be used when assembling ERP IEs * for multi-VAP operation, even if it's on a different * channel. Yes, that's going to need fixing in the * future. */ if (vap->iv_erp_protmode_update != NULL) vap->iv_erp_protmode_update(vap); } /* * Deferred ERP short preamble/barker update. * * All VAPs need to use short preamble for it to be globally * enabled or not. * * Look at the comments for vap_update_erp_protmode() for more * background; this assumes all VAPs are on the same channel. */ static void vap_update_preamble(void *arg, int npending) { struct ieee80211vap *vap = arg; struct ieee80211com *ic = vap->iv_ic; struct ieee80211vap *iv; int barker_count = 0, short_preamble_count = 0, count = 0; /* * Iterate over all of the VAPs to calculate the overlapping * short or long preamble configuration. * * For now we assume that if a driver can handle this per-VAP * then it'll ignore the ic->ic_flags variant and instead * will look at the vap related flags. */ IEEE80211_LOCK(ic); TAILQ_FOREACH(iv, &ic->ic_vaps, iv_next) { if (iv->iv_flags & IEEE80211_F_USEBARKER) barker_count++; if (iv->iv_flags & IEEE80211_F_SHPREAMBLE) short_preamble_count++; count++; } /* * As with vap_update_erp_protmode(), the global flags are * currently used for beacon IEs. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG, "%s: called; barker_count=%d, short_preamble_count=%d\n", __func__, barker_count, short_preamble_count); /* * Only flip on short preamble if all of the VAPs support * it. */ if (barker_count == 0 && short_preamble_count == count) { ic->ic_flags |= IEEE80211_F_SHPREAMBLE; ic->ic_flags &= ~IEEE80211_F_USEBARKER; } else { ic->ic_flags &= ~IEEE80211_F_SHPREAMBLE; ic->ic_flags |= IEEE80211_F_USEBARKER; } IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG, "%s: global barker=%d preamble=%d\n", __func__, !! (ic->ic_flags & IEEE80211_F_USEBARKER), !! (ic->ic_flags & IEEE80211_F_SHPREAMBLE)); /* Beacon update on all VAPs */ ieee80211_notify_erp_locked(ic); IEEE80211_UNLOCK(ic); /* Driver notification */ if (vap->iv_erp_protmode_update != NULL) vap->iv_preamble_update(vap); } /* * Deferred HT protmode update and beacon update. * * Look at the comments for vap_update_erp_protmode() for more * background; this assumes all VAPs are on the same channel. */ static void vap_update_ht_protmode(void *arg, int npending) { struct ieee80211vap *vap = arg; struct ieee80211vap *iv; struct ieee80211com *ic = vap->iv_ic; int num_vaps = 0, num_pure = 0; int num_optional = 0, num_ht2040 = 0, num_nonht = 0; int num_ht_sta = 0, num_ht40_sta = 0, num_sta = 0; int num_nonhtpr = 0; /* * Iterate over all of the VAPs to calculate everything. * * There are a few different flags to calculate: * * + whether there's HT only or HT+legacy stations; * + whether there's HT20, HT40, or HT20+HT40 stations; * + whether the desired protection mode is mixed, pure or * one of the two above. * * For now we assume that if a driver can handle this per-VAP * then it'll ignore the ic->ic_htprotmode / ic->ic_curhtprotmode * variant and instead will look at the vap related variables. * * XXX TODO: non-greenfield STAs present (IEEE80211_HTINFO_NONGF_PRESENT) ! */ IEEE80211_LOCK(ic); TAILQ_FOREACH(iv, &ic->ic_vaps, iv_next) { num_vaps++; /* overlapping BSSes advertising non-HT status present */ if (iv->iv_flags_ht & IEEE80211_FHT_NONHT_PR) num_nonht++; /* Operating mode flags */ if (iv->iv_curhtprotmode & IEEE80211_HTINFO_NONHT_PRESENT) num_nonhtpr++; switch (iv->iv_curhtprotmode & IEEE80211_HTINFO_OPMODE) { case IEEE80211_HTINFO_OPMODE_PURE: num_pure++; break; case IEEE80211_HTINFO_OPMODE_PROTOPT: num_optional++; break; case IEEE80211_HTINFO_OPMODE_HT20PR: num_ht2040++; break; } IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N, "%s: vap %s: nonht_pr=%d, curhtprotmode=0x%02x\n", __func__, ieee80211_get_vap_ifname(iv), !! (iv->iv_flags_ht & IEEE80211_FHT_NONHT_PR), iv->iv_curhtprotmode); num_ht_sta += iv->iv_ht_sta_assoc; num_ht40_sta += iv->iv_ht40_sta_assoc; num_sta += iv->iv_sta_assoc; } /* * Step 1 - if any VAPs indicate NONHT_PR set (overlapping BSS * non-HT present), set it here. This shouldn't be used by * anything but the old overlapping BSS logic so if any drivers * consume it, it's up to date. */ if (num_nonht > 0) ic->ic_flags_ht |= IEEE80211_FHT_NONHT_PR; else ic->ic_flags_ht &= ~IEEE80211_FHT_NONHT_PR; /* * Step 2 - default HT protection mode to MIXED (802.11-2016 10.26.3.1.) * * + If all VAPs are PURE, we can stay PURE. * + If all VAPs are PROTOPT, we can go to PROTOPT. * + If any VAP has HT20PR then it sees at least a HT40+HT20 station. * Note that we may have a VAP with one HT20 and a VAP with one HT40; * So we look at the sum ht and sum ht40 sta counts; if we have a * HT station and the HT20 != HT40 count, we have to do HT20PR here. * Note all stations need to be HT for this to be an option. * + The fall-through is MIXED, because it means we have some odd * non HT40-involved combination of opmode and this is the most * sensible default. */ ic->ic_curhtprotmode = IEEE80211_HTINFO_OPMODE_MIXED; if (num_pure == num_vaps) ic->ic_curhtprotmode = IEEE80211_HTINFO_OPMODE_PURE; if (num_optional == num_vaps) ic->ic_curhtprotmode = IEEE80211_HTINFO_OPMODE_PROTOPT; /* * Note: we need /a/ HT40 station somewhere for this to * be a possibility. */ if ((num_ht2040 > 0) || ((num_ht_sta > 0) && (num_ht40_sta > 0) && (num_ht_sta != num_ht40_sta))) ic->ic_curhtprotmode = IEEE80211_HTINFO_OPMODE_HT20PR; /* * Step 3 - if any of the stations across the VAPs are * non-HT then this needs to be flipped back to MIXED. */ if (num_ht_sta != num_sta) ic->ic_curhtprotmode = IEEE80211_HTINFO_OPMODE_MIXED; /* * Step 4 - If we see any overlapping BSS non-HT stations * via beacons then flip on NONHT_PRESENT. */ if (num_nonhtpr > 0) ic->ic_curhtprotmode |= IEEE80211_HTINFO_NONHT_PRESENT; /* Notify all VAPs to potentially update their beacons */ TAILQ_FOREACH(iv, &ic->ic_vaps, iv_next) ieee80211_htinfo_notify(iv); IEEE80211_UNLOCK(ic); IEEE80211_DPRINTF(vap, IEEE80211_MSG_11N, "%s: global: nonht_pr=%d ht_opmode=0x%02x\n", __func__, !! (ic->ic_flags_ht & IEEE80211_FHT_NONHT_PR), ic->ic_curhtprotmode); /* Driver update */ if (vap->iv_erp_protmode_update != NULL) vap->iv_ht_protmode_update(vap); } /* * Set the short slot time state and notify the driver. * * This is the per-VAP slot time state. */ void ieee80211_vap_set_shortslottime(struct ieee80211vap *vap, int onoff) { struct ieee80211com *ic = vap->iv_ic; /* XXX lock? */ /* * Only modify the per-VAP slot time. */ if (onoff) vap->iv_flags |= IEEE80211_F_SHSLOT; else vap->iv_flags &= ~IEEE80211_F_SHSLOT; IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG, "%s: called; onoff=%d\n", __func__, onoff); /* schedule the deferred slot flag update and update */ ieee80211_runtask(ic, &vap->iv_slot_task); } /* * Update the VAP short /long / barker preamble state and * update beacon state if needed. * * For now it simply copies the global flags into the per-vap * flags and schedules the callback. Later this will support * both global and per-VAP flags, especially useful for * and STA+STA multi-channel operation (eg p2p). */ void ieee80211_vap_update_preamble(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; /* XXX lock? */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG, "%s: called\n", __func__); /* schedule the deferred slot flag update and update */ ieee80211_runtask(ic, &vap->iv_preamble_task); } /* * Update the VAP 11g protection mode and update beacon state * if needed. */ void ieee80211_vap_update_erp_protmode(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; /* XXX lock? */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG, "%s: called\n", __func__); /* schedule the deferred slot flag update and update */ ieee80211_runtask(ic, &vap->iv_erp_protmode_task); } /* * Update the VAP 11n protection mode and update beacon state * if needed. */ void ieee80211_vap_update_ht_protmode(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; /* XXX lock? */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG, "%s: called\n", __func__); /* schedule the deferred protmode update */ ieee80211_runtask(ic, &vap->iv_ht_protmode_task); } /* * Check if the specified rate set supports ERP. * NB: the rate set is assumed to be sorted. */ int ieee80211_iserp_rateset(const struct ieee80211_rateset *rs) { static const int rates[] = { 2, 4, 11, 22, 12, 24, 48 }; int i, j; if (rs->rs_nrates < nitems(rates)) return 0; for (i = 0; i < nitems(rates); i++) { for (j = 0; j < rs->rs_nrates; j++) { int r = rs->rs_rates[j] & IEEE80211_RATE_VAL; if (rates[i] == r) goto next; if (r > rates[i]) return 0; } return 0; next: ; } return 1; } /* * Mark the basic rates for the rate table based on the * operating mode. For real 11g we mark all the 11b rates * and 6, 12, and 24 OFDM. For 11b compatibility we mark only * 11b rates. There's also a pseudo 11a-mode used to mark only * the basic OFDM rates. */ static void setbasicrates(struct ieee80211_rateset *rs, enum ieee80211_phymode mode, int add) { static const struct ieee80211_rateset basic[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_11A] = { 3, { 12, 24, 48 } }, [IEEE80211_MODE_11B] = { 2, { 2, 4 } }, /* NB: mixed b/g */ [IEEE80211_MODE_11G] = { 4, { 2, 4, 11, 22 } }, [IEEE80211_MODE_TURBO_A] = { 3, { 12, 24, 48 } }, [IEEE80211_MODE_TURBO_G] = { 4, { 2, 4, 11, 22 } }, [IEEE80211_MODE_STURBO_A] = { 3, { 12, 24, 48 } }, [IEEE80211_MODE_HALF] = { 3, { 6, 12, 24 } }, [IEEE80211_MODE_QUARTER] = { 3, { 3, 6, 12 } }, [IEEE80211_MODE_11NA] = { 3, { 12, 24, 48 } }, /* NB: mixed b/g */ [IEEE80211_MODE_11NG] = { 4, { 2, 4, 11, 22 } }, /* NB: mixed b/g */ [IEEE80211_MODE_VHT_2GHZ] = { 4, { 2, 4, 11, 22 } }, [IEEE80211_MODE_VHT_5GHZ] = { 3, { 12, 24, 48 } }, }; int i, j; for (i = 0; i < rs->rs_nrates; i++) { if (!add) rs->rs_rates[i] &= IEEE80211_RATE_VAL; for (j = 0; j < basic[mode].rs_nrates; j++) if (basic[mode].rs_rates[j] == rs->rs_rates[i]) { rs->rs_rates[i] |= IEEE80211_RATE_BASIC; break; } } } /* * Set the basic rates in a rate set. */ void ieee80211_setbasicrates(struct ieee80211_rateset *rs, enum ieee80211_phymode mode) { setbasicrates(rs, mode, 0); } /* * Add basic rates to a rate set. */ void ieee80211_addbasicrates(struct ieee80211_rateset *rs, enum ieee80211_phymode mode) { setbasicrates(rs, mode, 1); } /* * WME protocol support. * * The default 11a/b/g/n parameters come from the WiFi Alliance WMM * System Interopability Test Plan (v1.4, Appendix F) and the 802.11n * Draft 2.0 Test Plan (Appendix D). * * Static/Dynamic Turbo mode settings come from Atheros. */ typedef struct phyParamType { uint8_t aifsn; uint8_t logcwmin; uint8_t logcwmax; uint16_t txopLimit; uint8_t acm; } paramType; static const struct phyParamType phyParamForAC_BE[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_11A] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_11B] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_11G] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_FH] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_TURBO_A]= { 2, 3, 5, 0, 0 }, [IEEE80211_MODE_TURBO_G]= { 2, 3, 5, 0, 0 }, [IEEE80211_MODE_STURBO_A]={ 2, 3, 5, 0, 0 }, [IEEE80211_MODE_HALF] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_QUARTER]= { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_11NA] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_11NG] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_VHT_2GHZ] = { 3, 4, 6, 0, 0 }, [IEEE80211_MODE_VHT_5GHZ] = { 3, 4, 6, 0, 0 }, }; static const struct phyParamType phyParamForAC_BK[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_11A] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_11B] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_11G] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_FH] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_TURBO_A]= { 7, 3, 10, 0, 0 }, [IEEE80211_MODE_TURBO_G]= { 7, 3, 10, 0, 0 }, [IEEE80211_MODE_STURBO_A]={ 7, 3, 10, 0, 0 }, [IEEE80211_MODE_HALF] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_QUARTER]= { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_11NA] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_11NG] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_VHT_2GHZ] = { 7, 4, 10, 0, 0 }, [IEEE80211_MODE_VHT_5GHZ] = { 7, 4, 10, 0, 0 }, }; static const struct phyParamType phyParamForAC_VI[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_11A] = { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_11B] = { 1, 3, 4, 188, 0 }, [IEEE80211_MODE_11G] = { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_FH] = { 1, 3, 4, 188, 0 }, [IEEE80211_MODE_TURBO_A]= { 1, 2, 3, 94, 0 }, [IEEE80211_MODE_TURBO_G]= { 1, 2, 3, 94, 0 }, [IEEE80211_MODE_STURBO_A]={ 1, 2, 3, 94, 0 }, [IEEE80211_MODE_HALF] = { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_QUARTER]= { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_11NA] = { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_11NG] = { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_VHT_2GHZ] = { 1, 3, 4, 94, 0 }, [IEEE80211_MODE_VHT_5GHZ] = { 1, 3, 4, 94, 0 }, }; static const struct phyParamType phyParamForAC_VO[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_11A] = { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_11B] = { 1, 2, 3, 102, 0 }, [IEEE80211_MODE_11G] = { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_FH] = { 1, 2, 3, 102, 0 }, [IEEE80211_MODE_TURBO_A]= { 1, 2, 2, 47, 0 }, [IEEE80211_MODE_TURBO_G]= { 1, 2, 2, 47, 0 }, [IEEE80211_MODE_STURBO_A]={ 1, 2, 2, 47, 0 }, [IEEE80211_MODE_HALF] = { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_QUARTER]= { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_11NA] = { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_11NG] = { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_VHT_2GHZ] = { 1, 2, 3, 47, 0 }, [IEEE80211_MODE_VHT_5GHZ] = { 1, 2, 3, 47, 0 }, }; static const struct phyParamType bssPhyParamForAC_BE[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 3, 4, 10, 0, 0 }, [IEEE80211_MODE_11A] = { 3, 4, 10, 0, 0 }, [IEEE80211_MODE_11B] = { 3, 4, 10, 0, 0 }, [IEEE80211_MODE_11G] = { 3, 4, 10, 0, 0 }, [IEEE80211_MODE_FH] = { 3, 4, 10, 0, 0 }, [IEEE80211_MODE_TURBO_A]= { 2, 3, 10, 0, 0 }, [IEEE80211_MODE_TURBO_G]= { 2, 3, 10, 0, 0 }, [IEEE80211_MODE_STURBO_A]={ 2, 3, 10, 0, 0 }, [IEEE80211_MODE_HALF] = { 3, 4, 10, 0, 0 }, [IEEE80211_MODE_QUARTER]= { 3, 4, 10, 0, 0 }, [IEEE80211_MODE_11NA] = { 3, 4, 10, 0, 0 }, [IEEE80211_MODE_11NG] = { 3, 4, 10, 0, 0 }, }; static const struct phyParamType bssPhyParamForAC_VI[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 2, 3, 4, 94, 0 }, [IEEE80211_MODE_11A] = { 2, 3, 4, 94, 0 }, [IEEE80211_MODE_11B] = { 2, 3, 4, 188, 0 }, [IEEE80211_MODE_11G] = { 2, 3, 4, 94, 0 }, [IEEE80211_MODE_FH] = { 2, 3, 4, 188, 0 }, [IEEE80211_MODE_TURBO_A]= { 2, 2, 3, 94, 0 }, [IEEE80211_MODE_TURBO_G]= { 2, 2, 3, 94, 0 }, [IEEE80211_MODE_STURBO_A]={ 2, 2, 3, 94, 0 }, [IEEE80211_MODE_HALF] = { 2, 3, 4, 94, 0 }, [IEEE80211_MODE_QUARTER]= { 2, 3, 4, 94, 0 }, [IEEE80211_MODE_11NA] = { 2, 3, 4, 94, 0 }, [IEEE80211_MODE_11NG] = { 2, 3, 4, 94, 0 }, }; static const struct phyParamType bssPhyParamForAC_VO[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 2, 2, 3, 47, 0 }, [IEEE80211_MODE_11A] = { 2, 2, 3, 47, 0 }, [IEEE80211_MODE_11B] = { 2, 2, 3, 102, 0 }, [IEEE80211_MODE_11G] = { 2, 2, 3, 47, 0 }, [IEEE80211_MODE_FH] = { 2, 2, 3, 102, 0 }, [IEEE80211_MODE_TURBO_A]= { 1, 2, 2, 47, 0 }, [IEEE80211_MODE_TURBO_G]= { 1, 2, 2, 47, 0 }, [IEEE80211_MODE_STURBO_A]={ 1, 2, 2, 47, 0 }, [IEEE80211_MODE_HALF] = { 2, 2, 3, 47, 0 }, [IEEE80211_MODE_QUARTER]= { 2, 2, 3, 47, 0 }, [IEEE80211_MODE_11NA] = { 2, 2, 3, 47, 0 }, [IEEE80211_MODE_11NG] = { 2, 2, 3, 47, 0 }, }; static void _setifsparams(struct wmeParams *wmep, const paramType *phy) { wmep->wmep_aifsn = phy->aifsn; wmep->wmep_logcwmin = phy->logcwmin; wmep->wmep_logcwmax = phy->logcwmax; wmep->wmep_txopLimit = phy->txopLimit; } static void setwmeparams(struct ieee80211vap *vap, const char *type, int ac, struct wmeParams *wmep, const paramType *phy) { wmep->wmep_acm = phy->acm; _setifsparams(wmep, phy); IEEE80211_DPRINTF(vap, IEEE80211_MSG_WME, "set %s (%s) [acm %u aifsn %u logcwmin %u logcwmax %u txop %u]\n", ieee80211_wme_acnames[ac], type, wmep->wmep_acm, wmep->wmep_aifsn, wmep->wmep_logcwmin, wmep->wmep_logcwmax, wmep->wmep_txopLimit); } static void ieee80211_wme_initparams_locked(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_wme_state *wme = &ic->ic_wme; const paramType *pPhyParam, *pBssPhyParam; struct wmeParams *wmep; enum ieee80211_phymode mode; int i; IEEE80211_LOCK_ASSERT(ic); if ((ic->ic_caps & IEEE80211_C_WME) == 0 || ic->ic_nrunning > 1) return; /* * Clear the wme cap_info field so a qoscount from a previous * vap doesn't confuse later code which only parses the beacon * field and updates hardware when said field changes. * Otherwise the hardware is programmed with defaults, not what * the beacon actually announces. * * Note that we can't ever have 0xff as an actual value; * the only valid values are 0..15. */ wme->wme_wmeChanParams.cap_info = 0xfe; /* * Select mode; we can be called early in which case we * always use auto mode. We know we'll be called when * entering the RUN state with bsschan setup properly * so state will eventually get set correctly */ if (ic->ic_bsschan != IEEE80211_CHAN_ANYC) mode = ieee80211_chan2mode(ic->ic_bsschan); else mode = IEEE80211_MODE_AUTO; for (i = 0; i < WME_NUM_AC; i++) { switch (i) { case WME_AC_BK: pPhyParam = &phyParamForAC_BK[mode]; pBssPhyParam = &phyParamForAC_BK[mode]; break; case WME_AC_VI: pPhyParam = &phyParamForAC_VI[mode]; pBssPhyParam = &bssPhyParamForAC_VI[mode]; break; case WME_AC_VO: pPhyParam = &phyParamForAC_VO[mode]; pBssPhyParam = &bssPhyParamForAC_VO[mode]; break; case WME_AC_BE: default: pPhyParam = &phyParamForAC_BE[mode]; pBssPhyParam = &bssPhyParamForAC_BE[mode]; break; } wmep = &wme->wme_wmeChanParams.cap_wmeParams[i]; if (ic->ic_opmode == IEEE80211_M_HOSTAP) { setwmeparams(vap, "chan", i, wmep, pPhyParam); } else { setwmeparams(vap, "chan", i, wmep, pBssPhyParam); } wmep = &wme->wme_wmeBssChanParams.cap_wmeParams[i]; setwmeparams(vap, "bss ", i, wmep, pBssPhyParam); } /* NB: check ic_bss to avoid NULL deref on initial attach */ if (vap->iv_bss != NULL) { /* * Calculate aggressive mode switching threshold based * on beacon interval. This doesn't need locking since * we're only called before entering the RUN state at * which point we start sending beacon frames. */ wme->wme_hipri_switch_thresh = (HIGH_PRI_SWITCH_THRESH * vap->iv_bss->ni_intval) / 100; wme->wme_flags &= ~WME_F_AGGRMODE; ieee80211_wme_updateparams(vap); } } void ieee80211_wme_initparams(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; IEEE80211_LOCK(ic); ieee80211_wme_initparams_locked(vap); IEEE80211_UNLOCK(ic); } /* * Update WME parameters for ourself and the BSS. */ void ieee80211_wme_updateparams_locked(struct ieee80211vap *vap) { static const paramType aggrParam[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = { 2, 4, 10, 64, 0 }, [IEEE80211_MODE_11A] = { 2, 4, 10, 64, 0 }, [IEEE80211_MODE_11B] = { 2, 5, 10, 64, 0 }, [IEEE80211_MODE_11G] = { 2, 4, 10, 64, 0 }, [IEEE80211_MODE_FH] = { 2, 5, 10, 64, 0 }, [IEEE80211_MODE_TURBO_A] = { 1, 3, 10, 64, 0 }, [IEEE80211_MODE_TURBO_G] = { 1, 3, 10, 64, 0 }, [IEEE80211_MODE_STURBO_A] = { 1, 3, 10, 64, 0 }, [IEEE80211_MODE_HALF] = { 2, 4, 10, 64, 0 }, [IEEE80211_MODE_QUARTER] = { 2, 4, 10, 64, 0 }, [IEEE80211_MODE_11NA] = { 2, 4, 10, 64, 0 }, /* XXXcheck*/ [IEEE80211_MODE_11NG] = { 2, 4, 10, 64, 0 }, /* XXXcheck*/ [IEEE80211_MODE_VHT_2GHZ] = { 2, 4, 10, 64, 0 }, /* XXXcheck*/ [IEEE80211_MODE_VHT_5GHZ] = { 2, 4, 10, 64, 0 }, /* XXXcheck*/ }; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_wme_state *wme = &ic->ic_wme; const struct wmeParams *wmep; struct wmeParams *chanp, *bssp; enum ieee80211_phymode mode; int i; int do_aggrmode = 0; /* * Set up the channel access parameters for the physical * device. First populate the configured settings. */ for (i = 0; i < WME_NUM_AC; i++) { chanp = &wme->wme_chanParams.cap_wmeParams[i]; wmep = &wme->wme_wmeChanParams.cap_wmeParams[i]; chanp->wmep_aifsn = wmep->wmep_aifsn; chanp->wmep_logcwmin = wmep->wmep_logcwmin; chanp->wmep_logcwmax = wmep->wmep_logcwmax; chanp->wmep_txopLimit = wmep->wmep_txopLimit; chanp = &wme->wme_bssChanParams.cap_wmeParams[i]; wmep = &wme->wme_wmeBssChanParams.cap_wmeParams[i]; chanp->wmep_aifsn = wmep->wmep_aifsn; chanp->wmep_logcwmin = wmep->wmep_logcwmin; chanp->wmep_logcwmax = wmep->wmep_logcwmax; chanp->wmep_txopLimit = wmep->wmep_txopLimit; } /* * Select mode; we can be called early in which case we * always use auto mode. We know we'll be called when * entering the RUN state with bsschan setup properly * so state will eventually get set correctly */ if (ic->ic_bsschan != IEEE80211_CHAN_ANYC) mode = ieee80211_chan2mode(ic->ic_bsschan); else mode = IEEE80211_MODE_AUTO; /* * This implements aggressive mode as found in certain * vendors' AP's. When there is significant high * priority (VI/VO) traffic in the BSS throttle back BE * traffic by using conservative parameters. Otherwise * BE uses aggressive params to optimize performance of * legacy/non-QoS traffic. */ /* Hostap? Only if aggressive mode is enabled */ if (vap->iv_opmode == IEEE80211_M_HOSTAP && (wme->wme_flags & WME_F_AGGRMODE) != 0) do_aggrmode = 1; /* * Station? Only if we're in a non-QoS BSS. */ else if ((vap->iv_opmode == IEEE80211_M_STA && (vap->iv_bss->ni_flags & IEEE80211_NODE_QOS) == 0)) do_aggrmode = 1; /* * IBSS? Only if we we have WME enabled. */ else if ((vap->iv_opmode == IEEE80211_M_IBSS) && (vap->iv_flags & IEEE80211_F_WME)) do_aggrmode = 1; /* * If WME is disabled on this VAP, default to aggressive mode * regardless of the configuration. */ if ((vap->iv_flags & IEEE80211_F_WME) == 0) do_aggrmode = 1; /* XXX WDS? */ /* XXX MBSS? */ if (do_aggrmode) { chanp = &wme->wme_chanParams.cap_wmeParams[WME_AC_BE]; bssp = &wme->wme_bssChanParams.cap_wmeParams[WME_AC_BE]; chanp->wmep_aifsn = bssp->wmep_aifsn = aggrParam[mode].aifsn; chanp->wmep_logcwmin = bssp->wmep_logcwmin = aggrParam[mode].logcwmin; chanp->wmep_logcwmax = bssp->wmep_logcwmax = aggrParam[mode].logcwmax; chanp->wmep_txopLimit = bssp->wmep_txopLimit = (vap->iv_flags & IEEE80211_F_BURST) ? aggrParam[mode].txopLimit : 0; IEEE80211_DPRINTF(vap, IEEE80211_MSG_WME, "update %s (chan+bss) [acm %u aifsn %u logcwmin %u " "logcwmax %u txop %u]\n", ieee80211_wme_acnames[WME_AC_BE], chanp->wmep_acm, chanp->wmep_aifsn, chanp->wmep_logcwmin, chanp->wmep_logcwmax, chanp->wmep_txopLimit); } /* * Change the contention window based on the number of associated * stations. If the number of associated stations is 1 and * aggressive mode is enabled, lower the contention window even * further. */ if (vap->iv_opmode == IEEE80211_M_HOSTAP && vap->iv_sta_assoc < 2 && (wme->wme_flags & WME_F_AGGRMODE) != 0) { static const uint8_t logCwMin[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = 3, [IEEE80211_MODE_11A] = 3, [IEEE80211_MODE_11B] = 4, [IEEE80211_MODE_11G] = 3, [IEEE80211_MODE_FH] = 4, [IEEE80211_MODE_TURBO_A] = 3, [IEEE80211_MODE_TURBO_G] = 3, [IEEE80211_MODE_STURBO_A] = 3, [IEEE80211_MODE_HALF] = 3, [IEEE80211_MODE_QUARTER] = 3, [IEEE80211_MODE_11NA] = 3, [IEEE80211_MODE_11NG] = 3, [IEEE80211_MODE_VHT_2GHZ] = 3, [IEEE80211_MODE_VHT_5GHZ] = 3, }; chanp = &wme->wme_chanParams.cap_wmeParams[WME_AC_BE]; bssp = &wme->wme_bssChanParams.cap_wmeParams[WME_AC_BE]; chanp->wmep_logcwmin = bssp->wmep_logcwmin = logCwMin[mode]; IEEE80211_DPRINTF(vap, IEEE80211_MSG_WME, "update %s (chan+bss) logcwmin %u\n", ieee80211_wme_acnames[WME_AC_BE], chanp->wmep_logcwmin); } /* schedule the deferred WME update */ ieee80211_runtask(ic, &vap->iv_wme_task); IEEE80211_DPRINTF(vap, IEEE80211_MSG_WME, "%s: WME params updated, cap_info 0x%x\n", __func__, vap->iv_opmode == IEEE80211_M_STA ? wme->wme_wmeChanParams.cap_info : wme->wme_bssChanParams.cap_info); } void ieee80211_wme_updateparams(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; if (ic->ic_caps & IEEE80211_C_WME) { IEEE80211_LOCK(ic); ieee80211_wme_updateparams_locked(vap); IEEE80211_UNLOCK(ic); } } /* * Fetch the WME parameters for the given VAP. * * When net80211 grows p2p, etc support, this may return different * parameters for each VAP. */ void ieee80211_wme_vap_getparams(struct ieee80211vap *vap, struct chanAccParams *wp) { memcpy(wp, &vap->iv_ic->ic_wme.wme_chanParams, sizeof(*wp)); } /* * For NICs which only support one set of WME parameters (ie, softmac NICs) * there may be different VAP WME parameters but only one is "active". * This returns the "NIC" WME parameters for the currently active * context. */ void ieee80211_wme_ic_getparams(struct ieee80211com *ic, struct chanAccParams *wp) { memcpy(wp, &ic->ic_wme.wme_chanParams, sizeof(*wp)); } /* * Return whether to use QoS on a given WME queue. * * This is intended to be called from the transmit path of softmac drivers * which are setting NoAck bits in transmit descriptors. * * Ideally this would be set in some transmit field before the packet is * queued to the driver but net80211 isn't quite there yet. */ int ieee80211_wme_vap_ac_is_noack(struct ieee80211vap *vap, int ac) { /* Bounds/sanity check */ if (ac < 0 || ac >= WME_NUM_AC) return (0); /* Again, there's only one global context for now */ return (!! vap->iv_ic->ic_wme.wme_chanParams.cap_wmeParams[ac].wmep_noackPolicy); } static void parent_updown(void *arg, int npending) { struct ieee80211com *ic = arg; ic->ic_parent(ic); } static void update_mcast(void *arg, int npending) { struct ieee80211com *ic = arg; ic->ic_update_mcast(ic); } static void update_promisc(void *arg, int npending) { struct ieee80211com *ic = arg; ic->ic_update_promisc(ic); } static void update_channel(void *arg, int npending) { struct ieee80211com *ic = arg; ic->ic_set_channel(ic); ieee80211_radiotap_chan_change(ic); } static void update_chw(void *arg, int npending) { struct ieee80211com *ic = arg; /* * XXX should we defer the channel width _config_ update until now? */ ic->ic_update_chw(ic); } /* * Deferred WME parameter and beacon update. * * In preparation for per-VAP WME configuration, call the VAP * method if the VAP requires it. Otherwise, just call the * older global method. There isn't a per-VAP WME configuration * just yet so for now just use the global configuration. */ static void vap_update_wme(void *arg, int npending) { struct ieee80211vap *vap = arg; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_wme_state *wme = &ic->ic_wme; /* Driver update */ if (vap->iv_wme_update != NULL) vap->iv_wme_update(vap, ic->ic_wme.wme_chanParams.cap_wmeParams); else ic->ic_wme.wme_update(ic); IEEE80211_LOCK(ic); /* * Arrange for the beacon update. * * XXX what about MBSS, WDS? */ if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_IBSS) { /* * Arrange for a beacon update and bump the parameter * set number so associated stations load the new values. */ wme->wme_bssChanParams.cap_info = (wme->wme_bssChanParams.cap_info+1) & WME_QOSINFO_COUNT; ieee80211_beacon_notify(vap, IEEE80211_BEACON_WME); } IEEE80211_UNLOCK(ic); } static void restart_vaps(void *arg, int npending) { struct ieee80211com *ic = arg; ieee80211_suspend_all(ic); ieee80211_resume_all(ic); } /* * Block until the parent is in a known state. This is * used after any operations that dispatch a task (e.g. * to auto-configure the parent device up/down). */ void ieee80211_waitfor_parent(struct ieee80211com *ic) { taskqueue_block(ic->ic_tq); ieee80211_draintask(ic, &ic->ic_parent_task); ieee80211_draintask(ic, &ic->ic_mcast_task); ieee80211_draintask(ic, &ic->ic_promisc_task); ieee80211_draintask(ic, &ic->ic_chan_task); ieee80211_draintask(ic, &ic->ic_bmiss_task); ieee80211_draintask(ic, &ic->ic_chw_task); taskqueue_unblock(ic->ic_tq); } /* * Check to see whether the current channel needs reset. * * Some devices don't handle being given an invalid channel * in their operating mode very well (eg wpi(4) will throw a * firmware exception.) * * Return 0 if we're ok, 1 if the channel needs to be reset. * * See PR kern/202502. */ static int ieee80211_start_check_reset_chan(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; if ((vap->iv_opmode == IEEE80211_M_IBSS && IEEE80211_IS_CHAN_NOADHOC(ic->ic_curchan)) || (vap->iv_opmode == IEEE80211_M_HOSTAP && IEEE80211_IS_CHAN_NOHOSTAP(ic->ic_curchan))) return (1); return (0); } /* * Reset the curchan to a known good state. */ static void ieee80211_start_reset_chan(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; ic->ic_curchan = &ic->ic_channels[0]; } /* * Start a vap running. If this is the first vap to be * set running on the underlying device then we * automatically bring the device up. */ void ieee80211_start_locked(struct ieee80211vap *vap) { struct ifnet *ifp = vap->iv_ifp; struct ieee80211com *ic = vap->iv_ic; IEEE80211_LOCK_ASSERT(ic); IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE | IEEE80211_MSG_DEBUG, "start running, %d vaps running\n", ic->ic_nrunning); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { /* * Mark us running. Note that it's ok to do this first; * if we need to bring the parent device up we defer that * to avoid dropping the com lock. We expect the device * to respond to being marked up by calling back into us * through ieee80211_start_all at which point we'll come * back in here and complete the work. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; - ieee80211_notify_ifnet_change(vap); + ieee80211_notify_ifnet_change(vap, IFF_DRV_RUNNING); /* * We are not running; if this we are the first vap * to be brought up auto-up the parent if necessary. */ if (ic->ic_nrunning++ == 0) { /* reset the channel to a known good channel */ if (ieee80211_start_check_reset_chan(vap)) ieee80211_start_reset_chan(vap); IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE | IEEE80211_MSG_DEBUG, "%s: up parent %s\n", __func__, ic->ic_name); ieee80211_runtask(ic, &ic->ic_parent_task); return; } } /* * If the parent is up and running, then kick the * 802.11 state machine as appropriate. */ if (vap->iv_roaming != IEEE80211_ROAMING_MANUAL) { if (vap->iv_opmode == IEEE80211_M_STA) { #if 0 /* XXX bypasses scan too easily; disable for now */ /* * Try to be intelligent about clocking the state * machine. If we're currently in RUN state then * we should be able to apply any new state/parameters * simply by re-associating. Otherwise we need to * re-scan to select an appropriate ap. */ if (vap->iv_state >= IEEE80211_S_RUN) ieee80211_new_state_locked(vap, IEEE80211_S_ASSOC, 1); else #endif ieee80211_new_state_locked(vap, IEEE80211_S_SCAN, 0); } else { /* * For monitor+wds mode there's nothing to do but * start running. Otherwise if this is the first * vap to be brought up, start a scan which may be * preempted if the station is locked to a particular * channel. */ vap->iv_flags_ext |= IEEE80211_FEXT_REINIT; if (vap->iv_opmode == IEEE80211_M_MONITOR || vap->iv_opmode == IEEE80211_M_WDS) ieee80211_new_state_locked(vap, IEEE80211_S_RUN, -1); else ieee80211_new_state_locked(vap, IEEE80211_S_SCAN, 0); } } } /* * Start a single vap. */ void ieee80211_init(void *arg) { struct ieee80211vap *vap = arg; IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE | IEEE80211_MSG_DEBUG, "%s\n", __func__); IEEE80211_LOCK(vap->iv_ic); ieee80211_start_locked(vap); IEEE80211_UNLOCK(vap->iv_ic); } /* * Start all runnable vap's on a device. */ void ieee80211_start_all(struct ieee80211com *ic) { struct ieee80211vap *vap; IEEE80211_LOCK(ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { struct ifnet *ifp = vap->iv_ifp; if (IFNET_IS_UP_RUNNING(ifp)) /* NB: avoid recursion */ ieee80211_start_locked(vap); } IEEE80211_UNLOCK(ic); } /* * Stop a vap. We force it down using the state machine * then mark it's ifnet not running. If this is the last * vap running on the underlying device then we close it * too to insure it will be properly initialized when the * next vap is brought up. */ void ieee80211_stop_locked(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ifnet *ifp = vap->iv_ifp; IEEE80211_LOCK_ASSERT(ic); IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE | IEEE80211_MSG_DEBUG, "stop running, %d vaps running\n", ic->ic_nrunning); ieee80211_new_state_locked(vap, IEEE80211_S_INIT, -1); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; /* mark us stopped */ - ieee80211_notify_ifnet_change(vap); + ieee80211_notify_ifnet_change(vap, IFF_DRV_RUNNING); if (--ic->ic_nrunning == 0) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE | IEEE80211_MSG_DEBUG, "down parent %s\n", ic->ic_name); ieee80211_runtask(ic, &ic->ic_parent_task); } } } void ieee80211_stop(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; IEEE80211_LOCK(ic); ieee80211_stop_locked(vap); IEEE80211_UNLOCK(ic); } /* * Stop all vap's running on a device. */ void ieee80211_stop_all(struct ieee80211com *ic) { struct ieee80211vap *vap; IEEE80211_LOCK(ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { struct ifnet *ifp = vap->iv_ifp; if (IFNET_IS_UP_RUNNING(ifp)) /* NB: avoid recursion */ ieee80211_stop_locked(vap); } IEEE80211_UNLOCK(ic); ieee80211_waitfor_parent(ic); } /* * Stop all vap's running on a device and arrange * for those that were running to be resumed. */ void ieee80211_suspend_all(struct ieee80211com *ic) { struct ieee80211vap *vap; IEEE80211_LOCK(ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { struct ifnet *ifp = vap->iv_ifp; if (IFNET_IS_UP_RUNNING(ifp)) { /* NB: avoid recursion */ vap->iv_flags_ext |= IEEE80211_FEXT_RESUME; ieee80211_stop_locked(vap); } } IEEE80211_UNLOCK(ic); ieee80211_waitfor_parent(ic); } /* * Start all vap's marked for resume. */ void ieee80211_resume_all(struct ieee80211com *ic) { struct ieee80211vap *vap; IEEE80211_LOCK(ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { struct ifnet *ifp = vap->iv_ifp; if (!IFNET_IS_UP_RUNNING(ifp) && (vap->iv_flags_ext & IEEE80211_FEXT_RESUME)) { vap->iv_flags_ext &= ~IEEE80211_FEXT_RESUME; ieee80211_start_locked(vap); } } IEEE80211_UNLOCK(ic); } /* * Restart all vap's running on a device. */ void ieee80211_restart_all(struct ieee80211com *ic) { /* * NB: do not use ieee80211_runtask here, we will * block & drain net80211 taskqueue. */ taskqueue_enqueue(taskqueue_thread, &ic->ic_restart_task); } void ieee80211_beacon_miss(struct ieee80211com *ic) { IEEE80211_LOCK(ic); if ((ic->ic_flags & IEEE80211_F_SCAN) == 0) { /* Process in a taskq, the handler may reenter the driver */ ieee80211_runtask(ic, &ic->ic_bmiss_task); } IEEE80211_UNLOCK(ic); } static void beacon_miss(void *arg, int npending) { struct ieee80211com *ic = arg; struct ieee80211vap *vap; IEEE80211_LOCK(ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { /* * We only pass events through for sta vap's in RUN+ state; * may be too restrictive but for now this saves all the * handlers duplicating these checks. */ if (vap->iv_opmode == IEEE80211_M_STA && vap->iv_state >= IEEE80211_S_RUN && vap->iv_bmiss != NULL) vap->iv_bmiss(vap); } IEEE80211_UNLOCK(ic); } static void beacon_swmiss(void *arg, int npending) { struct ieee80211vap *vap = arg; struct ieee80211com *ic = vap->iv_ic; IEEE80211_LOCK(ic); if (vap->iv_state >= IEEE80211_S_RUN) { /* XXX Call multiple times if npending > zero? */ vap->iv_bmiss(vap); } IEEE80211_UNLOCK(ic); } /* * Software beacon miss handling. Check if any beacons * were received in the last period. If not post a * beacon miss; otherwise reset the counter. */ void ieee80211_swbmiss(void *arg) { struct ieee80211vap *vap = arg; struct ieee80211com *ic = vap->iv_ic; IEEE80211_LOCK_ASSERT(ic); KASSERT(vap->iv_state >= IEEE80211_S_RUN, ("wrong state %d", vap->iv_state)); if (ic->ic_flags & IEEE80211_F_SCAN) { /* * If scanning just ignore and reset state. If we get a * bmiss after coming out of scan because we haven't had * time to receive a beacon then we should probe the AP * before posting a real bmiss (unless iv_bmiss_max has * been artifiically lowered). A cleaner solution might * be to disable the timer on scan start/end but to handle * case of multiple sta vap's we'd need to disable the * timers of all affected vap's. */ vap->iv_swbmiss_count = 0; } else if (vap->iv_swbmiss_count == 0) { if (vap->iv_bmiss != NULL) ieee80211_runtask(ic, &vap->iv_swbmiss_task); } else vap->iv_swbmiss_count = 0; callout_reset(&vap->iv_swbmiss, vap->iv_swbmiss_period, ieee80211_swbmiss, vap); } /* * Start an 802.11h channel switch. We record the parameters, * mark the operation pending, notify each vap through the * beacon update mechanism so it can update the beacon frame * contents, and then switch vap's to CSA state to block outbound * traffic. Devices that handle CSA directly can use the state * switch to do the right thing so long as they call * ieee80211_csa_completeswitch when it's time to complete the * channel change. Devices that depend on the net80211 layer can * use ieee80211_beacon_update to handle the countdown and the * channel switch. */ void ieee80211_csa_startswitch(struct ieee80211com *ic, struct ieee80211_channel *c, int mode, int count) { struct ieee80211vap *vap; IEEE80211_LOCK_ASSERT(ic); ic->ic_csa_newchan = c; ic->ic_csa_mode = mode; ic->ic_csa_count = count; ic->ic_flags |= IEEE80211_F_CSAPENDING; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_IBSS || vap->iv_opmode == IEEE80211_M_MBSS) ieee80211_beacon_notify(vap, IEEE80211_BEACON_CSA); /* switch to CSA state to block outbound traffic */ if (vap->iv_state == IEEE80211_S_RUN) ieee80211_new_state_locked(vap, IEEE80211_S_CSA, 0); } ieee80211_notify_csa(ic, c, mode, count); } /* * Complete the channel switch by transitioning all CSA VAPs to RUN. * This is called by both the completion and cancellation functions * so each VAP is placed back in the RUN state and can thus transmit. */ static void csa_completeswitch(struct ieee80211com *ic) { struct ieee80211vap *vap; ic->ic_csa_newchan = NULL; ic->ic_flags &= ~IEEE80211_F_CSAPENDING; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) if (vap->iv_state == IEEE80211_S_CSA) ieee80211_new_state_locked(vap, IEEE80211_S_RUN, 0); } /* * Complete an 802.11h channel switch started by ieee80211_csa_startswitch. * We clear state and move all vap's in CSA state to RUN state * so they can again transmit. * * Although this may not be completely correct, update the BSS channel * for each VAP to the newly configured channel. The setcurchan sets * the current operating channel for the interface (so the radio does * switch over) but the VAP BSS isn't updated, leading to incorrectly * reported information via ioctl. */ void ieee80211_csa_completeswitch(struct ieee80211com *ic) { struct ieee80211vap *vap; IEEE80211_LOCK_ASSERT(ic); KASSERT(ic->ic_flags & IEEE80211_F_CSAPENDING, ("csa not pending")); ieee80211_setcurchan(ic, ic->ic_csa_newchan); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) if (vap->iv_state == IEEE80211_S_CSA) vap->iv_bss->ni_chan = ic->ic_curchan; csa_completeswitch(ic); } /* * Cancel an 802.11h channel switch started by ieee80211_csa_startswitch. * We clear state and move all vap's in CSA state to RUN state * so they can again transmit. */ void ieee80211_csa_cancelswitch(struct ieee80211com *ic) { IEEE80211_LOCK_ASSERT(ic); csa_completeswitch(ic); } /* * Complete a DFS CAC started by ieee80211_dfs_cac_start. * We clear state and move all vap's in CAC state to RUN state. */ void ieee80211_cac_completeswitch(struct ieee80211vap *vap0) { struct ieee80211com *ic = vap0->iv_ic; struct ieee80211vap *vap; IEEE80211_LOCK(ic); /* * Complete CAC state change for lead vap first; then * clock all the other vap's waiting. */ KASSERT(vap0->iv_state == IEEE80211_S_CAC, ("wrong state %d", vap0->iv_state)); ieee80211_new_state_locked(vap0, IEEE80211_S_RUN, 0); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) if (vap->iv_state == IEEE80211_S_CAC && vap != vap0) ieee80211_new_state_locked(vap, IEEE80211_S_RUN, 0); IEEE80211_UNLOCK(ic); } /* * Force all vap's other than the specified vap to the INIT state * and mark them as waiting for a scan to complete. These vaps * will be brought up when the scan completes and the scanning vap * reaches RUN state by wakeupwaiting. */ static void markwaiting(struct ieee80211vap *vap0) { struct ieee80211com *ic = vap0->iv_ic; struct ieee80211vap *vap; IEEE80211_LOCK_ASSERT(ic); /* * A vap list entry can not disappear since we are running on the * taskqueue and a vap destroy will queue and drain another state * change task. */ TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { if (vap == vap0) continue; if (vap->iv_state != IEEE80211_S_INIT) { /* NB: iv_newstate may drop the lock */ vap->iv_newstate(vap, IEEE80211_S_INIT, 0); IEEE80211_LOCK_ASSERT(ic); vap->iv_flags_ext |= IEEE80211_FEXT_SCANWAIT; } } } /* * Wakeup all vap's waiting for a scan to complete. This is the * companion to markwaiting (above) and is used to coordinate * multiple vaps scanning. * This is called from the state taskqueue. */ static void wakeupwaiting(struct ieee80211vap *vap0) { struct ieee80211com *ic = vap0->iv_ic; struct ieee80211vap *vap; IEEE80211_LOCK_ASSERT(ic); /* * A vap list entry can not disappear since we are running on the * taskqueue and a vap destroy will queue and drain another state * change task. */ TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { if (vap == vap0) continue; if (vap->iv_flags_ext & IEEE80211_FEXT_SCANWAIT) { vap->iv_flags_ext &= ~IEEE80211_FEXT_SCANWAIT; /* NB: sta's cannot go INIT->RUN */ /* NB: iv_newstate may drop the lock */ /* * This is problematic if the interface has OACTIVE * set. Only the deferred ieee80211_newstate_cb() * will end up actually /clearing/ the OACTIVE * flag on a state transition to RUN from a non-RUN * state. * * But, we're not actually deferring this callback; * and when the deferred call occurs it shows up as * a RUN->RUN transition! So the flag isn't/wasn't * cleared! * * I'm also not sure if it's correct to actually * do the transitions here fully through the deferred * paths either as other things can be invoked as * part of that state machine. * * So just keep this in mind when looking at what * the markwaiting/wakeupwaiting routines are doing * and how they invoke vap state changes. */ vap->iv_newstate(vap, vap->iv_opmode == IEEE80211_M_STA ? IEEE80211_S_SCAN : IEEE80211_S_RUN, 0); IEEE80211_LOCK_ASSERT(ic); } } } /* * Handle post state change work common to all operating modes. */ static void ieee80211_newstate_cb(void *xvap, int npending) { struct ieee80211vap *vap = xvap; struct ieee80211com *ic = vap->iv_ic; enum ieee80211_state nstate, ostate; int arg, rc; IEEE80211_LOCK(ic); nstate = vap->iv_nstate; arg = vap->iv_nstate_arg; if (vap->iv_flags_ext & IEEE80211_FEXT_REINIT) { /* * We have been requested to drop back to the INIT before * proceeding to the new state. */ /* Deny any state changes while we are here. */ vap->iv_nstate = IEEE80211_S_INIT; IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: %s -> %s arg %d\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[vap->iv_nstate], arg); vap->iv_newstate(vap, vap->iv_nstate, 0); IEEE80211_LOCK_ASSERT(ic); vap->iv_flags_ext &= ~(IEEE80211_FEXT_REINIT | IEEE80211_FEXT_STATEWAIT); /* enqueue new state transition after cancel_scan() task */ ieee80211_new_state_locked(vap, nstate, arg); goto done; } ostate = vap->iv_state; if (nstate == IEEE80211_S_SCAN && ostate != IEEE80211_S_INIT) { /* * SCAN was forced; e.g. on beacon miss. Force other running * vap's to INIT state and mark them as waiting for the scan to * complete. This insures they don't interfere with our * scanning. Since we are single threaded the vaps can not * transition again while we are executing. * * XXX not always right, assumes ap follows sta */ markwaiting(vap); } IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: %s -> %s arg %d\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate], arg); rc = vap->iv_newstate(vap, nstate, arg); IEEE80211_LOCK_ASSERT(ic); vap->iv_flags_ext &= ~IEEE80211_FEXT_STATEWAIT; if (rc != 0) { /* State transition failed */ KASSERT(rc != EINPROGRESS, ("iv_newstate was deferred")); KASSERT(nstate != IEEE80211_S_INIT, ("INIT state change failed")); IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: %s returned error %d\n", __func__, ieee80211_state_name[nstate], rc); goto done; } /* * Handle the case of a RUN->RUN transition occuring when STA + AP * VAPs occur on the same radio. * * The mark and wakeup waiting routines call iv_newstate() directly, * but they do not end up deferring state changes here. * Thus, although the VAP newstate method sees a transition * of RUN->INIT->RUN, the deferred path here only sees a RUN->RUN * transition. If OACTIVE is set then it is never cleared. * * So, if we're here and the state is RUN, just clear OACTIVE. * At some point if the markwaiting/wakeupwaiting paths end up * also invoking the deferred state updates then this will * be no-op code - and also if OACTIVE is finally retired, it'll * also be no-op code. */ if (nstate == IEEE80211_S_RUN) { /* * OACTIVE may be set on the vap if the upper layer * tried to transmit (e.g. IPv6 NDP) before we reach * RUN state. Clear it and restart xmit. * * Note this can also happen as a result of SLEEP->RUN * (i.e. coming out of power save mode). * * Historically this was done only for a state change * but is needed earlier; see next comment. The 2nd half * of the work is still only done in case of an actual * state change below. */ /* * Unblock the VAP queue; a RUN->RUN state can happen * on a STA+AP setup on the AP vap. See wakeupwaiting(). */ vap->iv_ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* * XXX TODO Kick-start a VAP queue - this should be a method! */ } /* No actual transition, skip post processing */ if (ostate == nstate) goto done; if (nstate == IEEE80211_S_RUN) { /* bring up any vaps waiting on us */ wakeupwaiting(vap); } else if (nstate == IEEE80211_S_INIT) { /* * Flush the scan cache if we did the last scan (XXX?) * and flush any frames on send queues from this vap. * Note the mgt q is used only for legacy drivers and * will go away shortly. */ ieee80211_scan_flush(vap); /* * XXX TODO: ic/vap queue flush */ } done: IEEE80211_UNLOCK(ic); } /* * Public interface for initiating a state machine change. * This routine single-threads the request and coordinates * the scheduling of multiple vaps for the purpose of selecting * an operating channel. Specifically the following scenarios * are handled: * o only one vap can be selecting a channel so on transition to * SCAN state if another vap is already scanning then * mark the caller for later processing and return without * doing anything (XXX? expectations by caller of synchronous operation) * o only one vap can be doing CAC of a channel so on transition to * CAC state if another vap is already scanning for radar then * mark the caller for later processing and return without * doing anything (XXX? expectations by caller of synchronous operation) * o if another vap is already running when a request is made * to SCAN then an operating channel has been chosen; bypass * the scan and just join the channel * * Note that the state change call is done through the iv_newstate * method pointer so any driver routine gets invoked. The driver * will normally call back into operating mode-specific * ieee80211_newstate routines (below) unless it needs to completely * bypass the state machine (e.g. because the firmware has it's * own idea how things should work). Bypassing the net80211 layer * is usually a mistake and indicates lack of proper integration * with the net80211 layer. */ int ieee80211_new_state_locked(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211vap *vp; enum ieee80211_state ostate; int nrunning, nscanning; IEEE80211_LOCK_ASSERT(ic); if (vap->iv_flags_ext & IEEE80211_FEXT_STATEWAIT) { if (vap->iv_nstate == IEEE80211_S_INIT || ((vap->iv_state == IEEE80211_S_INIT || (vap->iv_flags_ext & IEEE80211_FEXT_REINIT)) && vap->iv_nstate == IEEE80211_S_SCAN && nstate > IEEE80211_S_SCAN)) { /* * XXX The vap is being stopped/started, * do not allow any other state changes * until this is completed. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: %s -> %s (%s) transition discarded\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate], ieee80211_state_name[vap->iv_nstate]); return -1; } else if (vap->iv_state != vap->iv_nstate) { #if 0 /* Warn if the previous state hasn't completed. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: pending %s -> %s transition lost\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[vap->iv_nstate]); #else /* XXX temporarily enable to identify issues */ if_printf(vap->iv_ifp, "%s: pending %s -> %s transition lost\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[vap->iv_nstate]); #endif } } nrunning = nscanning = 0; /* XXX can track this state instead of calculating */ TAILQ_FOREACH(vp, &ic->ic_vaps, iv_next) { if (vp != vap) { if (vp->iv_state >= IEEE80211_S_RUN) nrunning++; /* XXX doesn't handle bg scan */ /* NB: CAC+AUTH+ASSOC treated like SCAN */ else if (vp->iv_state > IEEE80211_S_INIT) nscanning++; } } ostate = vap->iv_state; IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: %s -> %s (arg %d) (nrunning %d nscanning %d)\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate], arg, nrunning, nscanning); switch (nstate) { case IEEE80211_S_SCAN: if (ostate == IEEE80211_S_INIT) { /* * INIT -> SCAN happens on initial bringup. */ KASSERT(!(nscanning && nrunning), ("%d scanning and %d running", nscanning, nrunning)); if (nscanning) { /* * Someone is scanning, defer our state * change until the work has completed. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: defer %s -> %s\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); vap->iv_flags_ext |= IEEE80211_FEXT_SCANWAIT; return 0; } if (nrunning) { /* * Someone is operating; just join the channel * they have chosen. */ /* XXX kill arg? */ /* XXX check each opmode, adhoc? */ if (vap->iv_opmode == IEEE80211_M_STA) nstate = IEEE80211_S_SCAN; else nstate = IEEE80211_S_RUN; #ifdef IEEE80211_DEBUG if (nstate != IEEE80211_S_SCAN) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: override, now %s -> %s\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); } #endif } } break; case IEEE80211_S_RUN: if (vap->iv_opmode == IEEE80211_M_WDS && (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY) && nscanning) { /* * Legacy WDS with someone else scanning; don't * go online until that completes as we should * follow the other vap to the channel they choose. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: defer %s -> %s (legacy WDS)\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); vap->iv_flags_ext |= IEEE80211_FEXT_SCANWAIT; return 0; } if (vap->iv_opmode == IEEE80211_M_HOSTAP && IEEE80211_IS_CHAN_DFS(ic->ic_bsschan) && (vap->iv_flags_ext & IEEE80211_FEXT_DFS) && !IEEE80211_IS_CHAN_CACDONE(ic->ic_bsschan)) { /* * This is a DFS channel, transition to CAC state * instead of RUN. This allows us to initiate * Channel Availability Check (CAC) as specified * by 11h/DFS. */ nstate = IEEE80211_S_CAC; IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: override %s -> %s (DFS)\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); } break; case IEEE80211_S_INIT: /* cancel any scan in progress */ ieee80211_cancel_scan(vap); if (ostate == IEEE80211_S_INIT ) { /* XXX don't believe this */ /* INIT -> INIT. nothing to do */ vap->iv_flags_ext &= ~IEEE80211_FEXT_SCANWAIT; } /* fall thru... */ default: break; } /* defer the state change to a thread */ vap->iv_nstate = nstate; vap->iv_nstate_arg = arg; vap->iv_flags_ext |= IEEE80211_FEXT_STATEWAIT; ieee80211_runtask(ic, &vap->iv_nstate_task); return EINPROGRESS; } int ieee80211_new_state(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ieee80211com *ic = vap->iv_ic; int rc; IEEE80211_LOCK(ic); rc = ieee80211_new_state_locked(vap, nstate, arg); IEEE80211_UNLOCK(ic); return rc; } diff --git a/sys/net80211/ieee80211_proto.h b/sys/net80211/ieee80211_proto.h index e9b5157716d4..3bbeccfd0547 100644 --- a/sys/net80211/ieee80211_proto.h +++ b/sys/net80211/ieee80211_proto.h @@ -1,466 +1,466 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 Atsushi Onoe * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NET80211_IEEE80211_PROTO_H_ #define _NET80211_IEEE80211_PROTO_H_ /* * 802.11 protocol implementation definitions. */ enum ieee80211_state { IEEE80211_S_INIT = 0, /* default state */ IEEE80211_S_SCAN = 1, /* scanning */ IEEE80211_S_AUTH = 2, /* try to authenticate */ IEEE80211_S_ASSOC = 3, /* try to assoc */ IEEE80211_S_CAC = 4, /* doing channel availability check */ IEEE80211_S_RUN = 5, /* operational (e.g. associated) */ IEEE80211_S_CSA = 6, /* channel switch announce pending */ IEEE80211_S_SLEEP = 7, /* power save */ }; #define IEEE80211_S_MAX (IEEE80211_S_SLEEP+1) #define IEEE80211_SEND_MGMT(_ni,_type,_arg) \ ((*(_ni)->ni_ic->ic_send_mgmt)(_ni, _type, _arg)) extern const char *mgt_subtype_name[]; extern const char *ctl_subtype_name[]; extern const char *ieee80211_phymode_name[IEEE80211_MODE_MAX]; extern const int ieee80211_opcap[IEEE80211_OPMODE_MAX]; static __inline const char * ieee80211_mgt_subtype_name(uint8_t subtype) { return mgt_subtype_name[(subtype & IEEE80211_FC0_SUBTYPE_MASK) >> IEEE80211_FC0_SUBTYPE_SHIFT]; } static __inline const char * ieee80211_ctl_subtype_name(uint8_t subtype) { return ctl_subtype_name[(subtype & IEEE80211_FC0_SUBTYPE_MASK) >> IEEE80211_FC0_SUBTYPE_SHIFT]; } const char *ieee80211_reason_to_string(uint16_t); void ieee80211_proto_attach(struct ieee80211com *); void ieee80211_proto_detach(struct ieee80211com *); void ieee80211_proto_vattach(struct ieee80211vap *); void ieee80211_proto_vdetach(struct ieee80211vap *); void ieee80211_promisc(struct ieee80211vap *, bool); void ieee80211_allmulti(struct ieee80211vap *, bool); void ieee80211_syncflag(struct ieee80211vap *, int flag); void ieee80211_syncflag_ht(struct ieee80211vap *, int flag); void ieee80211_syncflag_vht(struct ieee80211vap *, int flag); void ieee80211_syncflag_ext(struct ieee80211vap *, int flag); #define ieee80211_input(ni, m, rssi, nf) \ ((ni)->ni_vap->iv_input(ni, m, NULL, rssi, nf)) int ieee80211_input_all(struct ieee80211com *, struct mbuf *, int, int); int ieee80211_input_mimo(struct ieee80211_node *, struct mbuf *); int ieee80211_input_mimo_all(struct ieee80211com *, struct mbuf *); struct ieee80211_bpf_params; int ieee80211_mgmt_output(struct ieee80211_node *, struct mbuf *, int, struct ieee80211_bpf_params *); int ieee80211_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); int ieee80211_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *ro); int ieee80211_vap_pkt_send_dest(struct ieee80211vap *, struct mbuf *, struct ieee80211_node *); int ieee80211_raw_output(struct ieee80211vap *, struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); void ieee80211_send_setup(struct ieee80211_node *, struct mbuf *, int, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); int ieee80211_vap_transmit(struct ifnet *ifp, struct mbuf *m); void ieee80211_vap_qflush(struct ifnet *ifp); int ieee80211_send_nulldata(struct ieee80211_node *); int ieee80211_classify(struct ieee80211_node *, struct mbuf *m); struct mbuf *ieee80211_mbuf_adjust(struct ieee80211vap *, int, struct ieee80211_key *, struct mbuf *); struct mbuf *ieee80211_encap(struct ieee80211vap *, struct ieee80211_node *, struct mbuf *); void ieee80211_free_mbuf(struct mbuf *); int ieee80211_send_mgmt(struct ieee80211_node *, int, int); struct ieee80211_appie; int ieee80211_probereq_ie(struct ieee80211vap *, struct ieee80211com *, uint8_t **, uint32_t *, const uint8_t *, size_t, bool); int ieee80211_send_probereq(struct ieee80211_node *ni, const uint8_t sa[IEEE80211_ADDR_LEN], const uint8_t da[IEEE80211_ADDR_LEN], const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t *ssid, size_t ssidlen); struct mbuf * ieee80211_ff_encap1(struct ieee80211vap *, struct mbuf *, const struct ether_header *); void ieee80211_tx_complete(struct ieee80211_node *, struct mbuf *, int); /* * The formation of ProbeResponse frames requires guidance to * deal with legacy clients. When the client is identified as * "legacy 11b" ieee80211_send_proberesp is passed this token. */ #define IEEE80211_SEND_LEGACY_11B 0x1 /* legacy 11b client */ #define IEEE80211_SEND_LEGACY_11 0x2 /* other legacy client */ #define IEEE80211_SEND_LEGACY 0x3 /* any legacy client */ struct mbuf *ieee80211_alloc_proberesp(struct ieee80211_node *, int); int ieee80211_send_proberesp(struct ieee80211vap *, const uint8_t da[IEEE80211_ADDR_LEN], int); struct mbuf *ieee80211_alloc_rts(struct ieee80211com *ic, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN], uint16_t); struct mbuf *ieee80211_alloc_cts(struct ieee80211com *, const uint8_t [IEEE80211_ADDR_LEN], uint16_t); struct mbuf *ieee80211_alloc_prot(struct ieee80211_node *, const struct mbuf *, uint8_t, int); uint8_t *ieee80211_add_rates(uint8_t *, const struct ieee80211_rateset *); uint8_t *ieee80211_add_xrates(uint8_t *, const struct ieee80211_rateset *); uint8_t *ieee80211_add_ssid(uint8_t *, const uint8_t *, u_int); uint8_t *ieee80211_add_wpa(uint8_t *, const struct ieee80211vap *); uint8_t *ieee80211_add_rsn(uint8_t *, const struct ieee80211vap *); uint8_t *ieee80211_add_qos(uint8_t *, const struct ieee80211_node *); uint16_t ieee80211_getcapinfo(struct ieee80211vap *, struct ieee80211_channel *); struct ieee80211_wme_state; uint8_t * ieee80211_add_wme_info(uint8_t *frm, struct ieee80211_wme_state *wme, struct ieee80211_node *ni); void ieee80211_vap_reset_erp(struct ieee80211vap *); void ieee80211_reset_erp(struct ieee80211com *); void ieee80211_vap_set_shortslottime(struct ieee80211vap *, int onoff); int ieee80211_iserp_rateset(const struct ieee80211_rateset *); void ieee80211_setbasicrates(struct ieee80211_rateset *, enum ieee80211_phymode); void ieee80211_addbasicrates(struct ieee80211_rateset *, enum ieee80211_phymode); /* * Return the size of the 802.11 header for a management or data frame. */ static __inline int ieee80211_hdrsize(const void *data) { const struct ieee80211_frame *wh = data; int size = sizeof(struct ieee80211_frame); /* NB: we don't handle control frames */ KASSERT((wh->i_fc[0]&IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_CTL, ("%s: control frame", __func__)); if (IEEE80211_IS_DSTODS(wh)) size += IEEE80211_ADDR_LEN; if (IEEE80211_QOS_HAS_SEQ(wh)) size += sizeof(uint16_t); return size; } /* * Like ieee80211_hdrsize, but handles any type of frame. */ static __inline int ieee80211_anyhdrsize(const void *data) { const struct ieee80211_frame *wh = data; if ((wh->i_fc[0]&IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL) { switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_CTS: case IEEE80211_FC0_SUBTYPE_ACK: return sizeof(struct ieee80211_frame_ack); case IEEE80211_FC0_SUBTYPE_BAR: return sizeof(struct ieee80211_frame_bar); } return sizeof(struct ieee80211_frame_min); } else return ieee80211_hdrsize(data); } /* * Template for an in-kernel authenticator. Authenticators * register with the protocol code and are typically loaded * as separate modules as needed. One special authenticator * is xauth; it intercepts requests so that protocols like * WPA can be handled in user space. */ struct ieee80211_authenticator { const char *ia_name; /* printable name */ int (*ia_attach)(struct ieee80211vap *); void (*ia_detach)(struct ieee80211vap *); void (*ia_node_join)(struct ieee80211_node *); void (*ia_node_leave)(struct ieee80211_node *); }; void ieee80211_authenticator_register(int type, const struct ieee80211_authenticator *); void ieee80211_authenticator_unregister(int type); const struct ieee80211_authenticator *ieee80211_authenticator_get(int auth); struct ieee80211req; /* * Template for an MAC ACL policy module. Such modules * register with the protocol code and are passed the sender's * address of each received auth frame for validation. */ struct ieee80211_aclator { const char *iac_name; /* printable name */ int (*iac_attach)(struct ieee80211vap *); void (*iac_detach)(struct ieee80211vap *); int (*iac_check)(struct ieee80211vap *, const struct ieee80211_frame *wh); int (*iac_add)(struct ieee80211vap *, const uint8_t mac[IEEE80211_ADDR_LEN]); int (*iac_remove)(struct ieee80211vap *, const uint8_t mac[IEEE80211_ADDR_LEN]); int (*iac_flush)(struct ieee80211vap *); int (*iac_setpolicy)(struct ieee80211vap *, int); int (*iac_getpolicy)(struct ieee80211vap *); int (*iac_setioctl)(struct ieee80211vap *, struct ieee80211req *); int (*iac_getioctl)(struct ieee80211vap *, struct ieee80211req *); }; void ieee80211_aclator_register(const struct ieee80211_aclator *); void ieee80211_aclator_unregister(const struct ieee80211_aclator *); const struct ieee80211_aclator *ieee80211_aclator_get(const char *name); /* flags for ieee80211_fix_rate() */ #define IEEE80211_F_DOSORT 0x00000001 /* sort rate list */ #define IEEE80211_F_DOFRATE 0x00000002 /* use fixed legacy rate */ #define IEEE80211_F_DONEGO 0x00000004 /* calc negotiated rate */ #define IEEE80211_F_DODEL 0x00000008 /* delete ignore rate */ #define IEEE80211_F_DOBRS 0x00000010 /* check basic rate set */ #define IEEE80211_F_JOIN 0x00000020 /* sta joining our bss */ #define IEEE80211_F_DOFMCS 0x00000040 /* use fixed HT rate */ int ieee80211_fix_rate(struct ieee80211_node *, struct ieee80211_rateset *, int); /* * WME/WMM support. */ struct wmeParams { uint8_t wmep_acm; uint8_t wmep_aifsn; uint8_t wmep_logcwmin; /* log2(cwmin) */ uint8_t wmep_logcwmax; /* log2(cwmax) */ uint8_t wmep_txopLimit; uint8_t wmep_noackPolicy; /* 0 (ack), 1 (no ack) */ }; #define IEEE80211_TXOP_TO_US(_txop) ((_txop)<<5) #define IEEE80211_US_TO_TXOP(_us) ((_us)>>5) struct chanAccParams { uint8_t cap_info; /* version of the current set */ struct wmeParams cap_wmeParams[WME_NUM_AC]; }; struct ieee80211_wme_state { u_int wme_flags; #define WME_F_AGGRMODE 0x00000001 /* STATUS: WME aggressive mode */ u_int wme_hipri_traffic; /* VI/VO frames in beacon interval */ u_int wme_hipri_switch_thresh;/* aggressive mode switch thresh */ u_int wme_hipri_switch_hysteresis;/* aggressive mode switch hysteresis */ struct wmeParams wme_params[WME_NUM_AC]; /* from assoc resp for each AC */ struct chanAccParams wme_wmeChanParams; /* WME params applied to self */ struct chanAccParams wme_wmeBssChanParams;/* WME params bcast to stations */ struct chanAccParams wme_chanParams; /* params applied to self */ struct chanAccParams wme_bssChanParams; /* params bcast to stations */ int (*wme_update)(struct ieee80211com *); }; void ieee80211_wme_initparams(struct ieee80211vap *); void ieee80211_wme_updateparams(struct ieee80211vap *); void ieee80211_wme_updateparams_locked(struct ieee80211vap *); void ieee80211_wme_vap_getparams(struct ieee80211vap *vap, struct chanAccParams *); void ieee80211_wme_ic_getparams(struct ieee80211com *ic, struct chanAccParams *); int ieee80211_wme_vap_ac_is_noack(struct ieee80211vap *vap, int ac); void ieee80211_vap_update_preamble(struct ieee80211vap *vap); void ieee80211_vap_update_erp_protmode(struct ieee80211vap *vap); void ieee80211_vap_update_ht_protmode(struct ieee80211vap *vap); /* * Return pointer to the QoS field from a Qos frame. */ static __inline uint8_t * ieee80211_getqos(void *data) { struct ieee80211_frame *wh = data; KASSERT(IEEE80211_QOS_HAS_SEQ(wh), ("QoS field is absent!")); if (IEEE80211_IS_DSTODS(wh)) return (((struct ieee80211_qosframe_addr4 *)wh)->i_qos); else return (((struct ieee80211_qosframe *)wh)->i_qos); } /* * Return the WME TID from a QoS frame. If no TID * is present return the index for the "non-QoS" entry. */ static __inline uint8_t ieee80211_gettid(const struct ieee80211_frame *wh) { uint8_t tid; if (IEEE80211_QOS_HAS_SEQ(wh)) { if (IEEE80211_IS_DSTODS(wh)) tid = ((const struct ieee80211_qosframe_addr4 *)wh)-> i_qos[0]; else tid = ((const struct ieee80211_qosframe *)wh)->i_qos[0]; tid &= IEEE80211_QOS_TID; } else tid = IEEE80211_NONQOS_TID; return tid; } void ieee80211_waitfor_parent(struct ieee80211com *); void ieee80211_start_locked(struct ieee80211vap *); void ieee80211_init(void *); void ieee80211_start_all(struct ieee80211com *); void ieee80211_stop_locked(struct ieee80211vap *); void ieee80211_stop(struct ieee80211vap *); void ieee80211_stop_all(struct ieee80211com *); void ieee80211_suspend_all(struct ieee80211com *); void ieee80211_resume_all(struct ieee80211com *); void ieee80211_restart_all(struct ieee80211com *); void ieee80211_dturbo_switch(struct ieee80211vap *, int newflags); void ieee80211_swbmiss(void *arg); void ieee80211_beacon_miss(struct ieee80211com *); int ieee80211_new_state(struct ieee80211vap *, enum ieee80211_state, int); int ieee80211_new_state_locked(struct ieee80211vap *, enum ieee80211_state, int); void ieee80211_print_essid(const uint8_t *, int); void ieee80211_dump_pkt(struct ieee80211com *, const uint8_t *, int, int, int); extern const char *ieee80211_opmode_name[]; extern const char *ieee80211_state_name[IEEE80211_S_MAX]; extern const char *ieee80211_wme_acnames[]; /* * Beacon frames constructed by ieee80211_beacon_alloc * have the following structure filled in so drivers * can update the frame later w/ minimal overhead. */ struct ieee80211_beacon_offsets { uint8_t bo_flags[4]; /* update/state flags */ uint16_t *bo_caps; /* capabilities */ uint8_t *bo_cfp; /* start of CFParms element */ uint8_t *bo_tim; /* start of atim/dtim */ uint8_t *bo_wme; /* start of WME parameters */ uint8_t *bo_tdma; /* start of TDMA parameters */ uint8_t *bo_tim_trailer;/* start of fixed-size trailer */ uint16_t bo_tim_len; /* atim/dtim length in bytes */ uint16_t bo_tim_trailer_len;/* tim trailer length in bytes */ uint8_t *bo_erp; /* start of ERP element */ uint8_t *bo_htinfo; /* start of HT info element */ uint8_t *bo_ath; /* start of ATH parameters */ uint8_t *bo_appie; /* start of AppIE element */ uint16_t bo_appie_len; /* AppIE length in bytes */ uint16_t bo_csa_trailer_len; uint8_t *bo_csa; /* start of CSA element */ uint8_t *bo_quiet; /* start of Quiet element */ uint8_t *bo_meshconf; /* start of MESHCONF element */ uint8_t *bo_vhtinfo; /* start of VHT info element (XXX VHTCAP?) */ uint8_t *bo_spare[2]; }; struct mbuf *ieee80211_beacon_alloc(struct ieee80211_node *); /* * Beacon frame updates are signaled through calls to iv_update_beacon * with one of the IEEE80211_BEACON_* tokens defined below. For devices * that construct beacon frames on the host this can trigger a rebuild * or defer the processing. For devices that offload beacon frame * handling this callback can be used to signal a rebuild. The bo_flags * array in the ieee80211_beacon_offsets structure is intended to record * deferred processing requirements; ieee80211_beacon_update uses the * state to optimize work. Since this structure is owned by the driver * and not visible to the 802.11 layer drivers must supply an iv_update_beacon * callback that marks the flag bits and schedules (as necessary) an update. */ enum { IEEE80211_BEACON_CAPS = 0, /* capabilities */ IEEE80211_BEACON_TIM = 1, /* DTIM/ATIM */ IEEE80211_BEACON_WME = 2, IEEE80211_BEACON_ERP = 3, /* Extended Rate Phy */ IEEE80211_BEACON_HTINFO = 4, /* HT Information */ IEEE80211_BEACON_APPIE = 5, /* Application IE's */ IEEE80211_BEACON_CFP = 6, /* CFParms */ IEEE80211_BEACON_CSA = 7, /* Channel Switch Announcement */ IEEE80211_BEACON_TDMA = 9, /* TDMA Info */ IEEE80211_BEACON_ATH = 10, /* ATH parameters */ IEEE80211_BEACON_MESHCONF = 11, /* Mesh Configuration */ IEEE80211_BEACON_QUIET = 12, /* Quiet time IE */ IEEE80211_BEACON_VHTINFO = 13, /* VHT information */ }; int ieee80211_beacon_update(struct ieee80211_node *, struct mbuf *, int mcast); void ieee80211_csa_startswitch(struct ieee80211com *, struct ieee80211_channel *, int mode, int count); void ieee80211_csa_completeswitch(struct ieee80211com *); void ieee80211_csa_cancelswitch(struct ieee80211com *); void ieee80211_cac_completeswitch(struct ieee80211vap *); /* * Notification methods called from the 802.11 state machine. * Note that while these are defined here, their implementation * is OS-specific. */ void ieee80211_notify_node_join(struct ieee80211_node *, int newassoc); void ieee80211_notify_node_leave(struct ieee80211_node *); void ieee80211_notify_scan_done(struct ieee80211vap *); void ieee80211_notify_wds_discover(struct ieee80211_node *); void ieee80211_notify_csa(struct ieee80211com *, const struct ieee80211_channel *, int mode, int count); void ieee80211_notify_radar(struct ieee80211com *, const struct ieee80211_channel *); enum ieee80211_notify_cac_event { IEEE80211_NOTIFY_CAC_START = 0, /* CAC timer started */ IEEE80211_NOTIFY_CAC_STOP = 1, /* CAC intentionally stopped */ IEEE80211_NOTIFY_CAC_RADAR = 2, /* CAC stopped due to radar detectio */ IEEE80211_NOTIFY_CAC_EXPIRE = 3, /* CAC expired w/o radar */ }; void ieee80211_notify_cac(struct ieee80211com *, const struct ieee80211_channel *, enum ieee80211_notify_cac_event); void ieee80211_notify_node_deauth(struct ieee80211_node *); void ieee80211_notify_node_auth(struct ieee80211_node *); void ieee80211_notify_country(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t cc[2]); void ieee80211_notify_radio(struct ieee80211com *, int); -void ieee80211_notify_ifnet_change(struct ieee80211vap *); +void ieee80211_notify_ifnet_change(struct ieee80211vap *, int); #endif /* _NET80211_IEEE80211_PROTO_H_ */ diff --git a/sys/netlink/netlink_route.c b/sys/netlink/netlink_route.c index cc1a0cc6db8d..0622656715c4 100644 --- a/sys/netlink/netlink_route.c +++ b/sys/netlink/netlink_route.c @@ -1,136 +1,139 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_route_core #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG); #define HANDLER_MAX_NUM (NL_RTM_MAX + 10) static const struct rtnl_cmd_handler *rtnl_handler[HANDLER_MAX_NUM] = {}; bool rtnl_register_messages(const struct rtnl_cmd_handler *handlers, int count) { for (int i = 0; i < count; i++) { if (handlers[i].cmd >= HANDLER_MAX_NUM) return (false); MPASS(rtnl_handler[handlers[i].cmd] == NULL); } for (int i = 0; i < count; i++) rtnl_handler[handlers[i].cmd] = &handlers[i]; return (true); } /* * Handler called by netlink subsystem when matching netlink message is received */ static int rtnl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt) { const struct rtnl_cmd_handler *cmd; struct epoch_tracker et; struct nlpcb *nlp = npt->nlp; int error = 0; if (__predict_false(hdr->nlmsg_type >= HANDLER_MAX_NUM)) { NLMSG_REPORT_ERR_MSG(npt, "unknown message type: %d", hdr->nlmsg_type); return (ENOTSUP); } cmd = rtnl_handler[hdr->nlmsg_type]; if (__predict_false(cmd == NULL)) { NLMSG_REPORT_ERR_MSG(npt, "unknown message type: %d", hdr->nlmsg_type); return (ENOTSUP); } NLP_LOG(LOG_DEBUG2, nlp, "received msg %s(%d) len %d", cmd->name, hdr->nlmsg_type, hdr->nlmsg_len); if (cmd->priv != 0 && !nlp_has_priv(nlp, cmd->priv)) { NLP_LOG(LOG_DEBUG2, nlp, "priv %d check failed for msg %s", cmd->priv, cmd->name); return (EPERM); } else if (cmd->priv != 0) NLP_LOG(LOG_DEBUG3, nlp, "priv %d check passed for msg %s", cmd->priv, cmd->name); bool need_epoch = !(cmd->flags & RTNL_F_NOEPOCH); if (need_epoch) NET_EPOCH_ENTER(et); error = cmd->cb(hdr, nlp, npt); if (need_epoch) NET_EPOCH_EXIT(et); NLP_LOG(LOG_DEBUG3, nlp, "message %s -> error %d", cmd->name, error); return (error); } -static struct rtbridge nlbridge = { .route_f = rtnl_handle_route_event }; +static struct rtbridge nlbridge = { + .route_f = rtnl_handle_route_event, + .ifmsg_f = rtnl_handle_ifnet_event, +}; static struct rtbridge *nlbridge_orig_p; static void rtnl_load(void *u __unused) { NL_LOG(LOG_NOTICE, "rtnl loading"); nlbridge_orig_p = netlink_callback_p; netlink_callback_p = &nlbridge; rtnl_neighs_init(); rtnl_ifaces_init(); rtnl_nexthops_init(); rtnl_routes_init(); netlink_register_proto(NETLINK_ROUTE, "NETLINK_ROUTE", rtnl_handle_message); } SYSINIT(rtnl_load, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtnl_load, NULL); static void rtnl_unload(void *u __unused) { netlink_callback_p = nlbridge_orig_p; rtnl_ifaces_destroy(); rtnl_neighs_destroy(); /* Wait till all consumers read nlbridge data */ epoch_wait_preempt(net_epoch_preempt); } SYSUNINIT(rtnl_unload, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtnl_unload, NULL); diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c index 5ffe11cc7e80..579869e9662c 100644 --- a/sys/netlink/route/iface.c +++ b/sys/netlink/route/iface.c @@ -1,881 +1,900 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* scope deembedding */ #define DEBUG_MOD_NAME nl_iface #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG); struct netlink_walkargs { struct nl_writer *nw; struct nlmsghdr hdr; struct nlpcb *so; uint32_t fibnum; int family; int error; int count; int dumped; }; -static eventhandler_tag ifdetach_event, ifattach_event, ifaddr_event; +static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event; static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners); static struct sx rtnl_cloner_lock; SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock"); /* * RTM_GETLINK request * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0}, * {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32 * * Reply: * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0}, {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"} [ {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"}, {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000}, {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6}, {{nla_len=5, nla_type=IFLA_LINKMODE}, 0}, {{nla_len=8, nla_type=IFLA_MTU}, 1500}, {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68}, {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000}, {{nla_len=8, nla_type=IFLA_GROUP}, 0}, {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0}, {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1}, {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535}, {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536}, {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1}, {{nla_len=5, nla_type=IFLA_CARRIER}, 1}, {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"}, {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2}, {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0}, {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1}, {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1}, */ struct if_state { uint8_t ifla_operstate; uint8_t ifla_carrier; }; static void get_operstate_ether(struct ifnet *ifp, struct if_state *pstate) { struct ifmediareq ifmr = {}; int error; error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (void *)&ifmr); if (error != 0) { NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d", if_name(ifp), error); return; } switch (IFM_TYPE(ifmr.ifm_active)) { case IFM_ETHER: if (ifmr.ifm_status & IFM_ACTIVE) { pstate->ifla_carrier = 1; if (ifp->if_flags & IFF_MONITOR) pstate->ifla_operstate = IF_OPER_DORMANT; else pstate->ifla_operstate = IF_OPER_UP; } else pstate->ifla_operstate = IF_OPER_DOWN; } } static bool get_stats(struct nl_writer *nw, struct ifnet *ifp) { struct rtnl_link_stats64 *stats; int nla_len = sizeof(struct nlattr) + sizeof(*stats); struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); if (nla == NULL) return (false); nla->nla_type = IFLA_STATS64; nla->nla_len = nla_len; stats = (struct rtnl_link_stats64 *)(nla + 1); stats->rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); stats->tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); stats->rx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); stats->tx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); stats->rx_errors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); stats->tx_errors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); stats->rx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); stats->tx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); stats->multicast = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); stats->rx_nohandler = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); return (true); } static void get_operstate(struct ifnet *ifp, struct if_state *pstate) { pstate->ifla_operstate = IF_OPER_UNKNOWN; pstate->ifla_carrier = 0; /* no carrier */ switch (ifp->if_type) { case IFT_ETHER: get_operstate_ether(ifp, pstate); break; case IFT_LOOP: if (ifp->if_flags & IFF_UP) { pstate->ifla_operstate = IF_OPER_UP; pstate->ifla_carrier = 1; } else pstate->ifla_operstate = IF_OPER_DOWN; break; } } static unsigned ifp_flags_to_netlink(const struct ifnet *ifp) { return (ifp->if_flags | ifp->if_drv_flags); } #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen)) static bool dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa) { uint32_t addr_len = 0; const void *addr_data = NULL; #ifdef INET6 struct in6_addr addr6; #endif if (sa == NULL) return (true); switch (sa->sa_family) { #ifdef INET case AF_INET: addr_len = sizeof(struct in_addr); addr_data = &((const struct sockaddr_in *)sa)->sin_addr; break; #endif #ifdef INET6 case AF_INET6: in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len); addr_len = sizeof(struct in6_addr); addr_data = &addr6; break; #endif case AF_LINK: addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen; addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa); break; default: NL_LOG(LOG_DEBUG, "unsupported family: %d, skipping", sa->sa_family); return (true); } return (nlattr_add(nw, attr, addr_len, addr_data)); } /* * Dumps interface state, properties and metrics. * @nw: message writer * @ifp: target interface * @hdr: template header + * @if_flags_mask: changed if_[drv]_flags bitmask * * This function is called without epoch and MAY sleep. */ static bool -dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr) +dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr, + int if_flags_mask) { struct ifinfomsg *ifinfo; NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp)); if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg))) goto enomem; ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg); ifinfo->ifi_family = AF_UNSPEC; ifinfo->__ifi_pad = 0; ifinfo->ifi_type = ifp->if_type; ifinfo->ifi_index = ifp->if_index; ifinfo->ifi_flags = ifp_flags_to_netlink(ifp); - ifinfo->ifi_change = 0; - - nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp)); + ifinfo->ifi_change = if_flags_mask; struct if_state ifs = {}; get_operstate(ifp, &ifs); + if (ifs.ifla_operstate == IF_OPER_UP) + ifinfo->ifi_flags |= IFF_LOWER_UP; + + nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp)); nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate); nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier); /* nlattr_add_u8(nw, IFLA_PROTO_DOWN, val); nlattr_add_u8(nw, IFLA_LINKMODE, val); */ if ((ifp->if_addr != NULL)) { dump_sa(nw, IFLA_ADDRESS, ifp->if_addr->ifa_addr); } if ((ifp->if_broadcastaddr != NULL)) { nlattr_add(nw, IFLA_BROADCAST, ifp->if_addrlen, ifp->if_broadcastaddr); } nlattr_add_u32(nw, IFLA_MTU, ifp->if_mtu); /* nlattr_add_u32(nw, IFLA_MIN_MTU, 60); nlattr_add_u32(nw, IFLA_MAX_MTU, 9000); nlattr_add_u32(nw, IFLA_GROUP, 0); */ get_stats(nw, ifp); uint32_t val = (ifp->if_flags & IFF_PROMISC) != 0; nlattr_add_u32(nw, IFLA_PROMISCUITY, val); if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp)); nlmsg_abort(nw); return (false); } static bool check_ifmsg(void *hdr, struct nl_pstate *npt) { struct ifinfomsg *ifm = hdr; if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 || ifm->ifi_flags != 0 || ifm->ifi_change != 0) { nlmsg_report_err_msg(npt, "strict checking: non-zero values in ifinfomsg header"); return (false); } return (true); } #define _IN(_field) offsetof(struct ifinfomsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_link, _field) static const struct nlfield_parser nlf_p_if[] = { { .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 }, { .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 }, }; static const struct nlattr_parser nla_p_linfo[] = { { .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn }, { .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla }, }; NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo); static const struct nlattr_parser nla_p_if[] = { { .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, { .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 }, { .type = IFLA_LINK, .off = _OUT(ifi_index), .cb = nlattr_get_uint32 }, { .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested }, { .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string }, { .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, }; #undef _IN #undef _OUT NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if); static bool match_iface(struct nl_parsed_link *attrs, struct ifnet *ifp) { if (attrs->ifi_index != 0 && attrs->ifi_index != ifp->if_index) return (false); if (attrs->ifi_type != 0 && attrs->ifi_index != ifp->if_type) return (false); if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp))) return (false); /* TODO: add group match */ return (true); } /* * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0}, * {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, * [ * [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"], * [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF] * ] */ static int rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct epoch_tracker et; struct ifnet *ifp; int error = 0; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); struct netlink_walkargs wa = { .so = nlp, .nw = npt->nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, .hdr.nlmsg_type = NL_RTM_NEWLINK, }; /* Fast track for an interface w/ explicit index match */ if (attrs.ifi_index != 0) { NET_EPOCH_ENTER(et); ifp = ifnet_byindex_ref(attrs.ifi_index); NET_EPOCH_EXIT(et); NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u", attrs.ifi_index); if (ifp != NULL) { if (match_iface(&attrs, ifp)) { - if (!dump_iface(wa.nw, ifp, &wa.hdr)) + if (!dump_iface(wa.nw, ifp, &wa.hdr, 0)) error = ENOMEM; } else error = ESRCH; if_rele(ifp); } else error = ESRCH; return (error); } /* * Fetching some link properties require performing ioctl's that may be blocking. * Address it by saving referenced pointers of the matching links, * exiting from epoch and going through the list one-by-one. */ NL_LOG(LOG_DEBUG2, "Start dump"); struct ifnet **match_array; int offset = 0, base_count = 16; /* start with 128 bytes */ match_array = malloc(base_count * sizeof(void *), M_TEMP, M_NOWAIT); NLP_LOG(LOG_DEBUG3, nlp, "MATCHING: index=%u type=%d name=%s", attrs.ifi_index, attrs.ifi_type, attrs.ifla_ifname); NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { wa.count++; if (match_iface(&attrs, ifp)) { if (offset < base_count) { if (!if_try_ref(ifp)) continue; match_array[offset++] = ifp; continue; } /* Too many matches, need to reallocate */ struct ifnet **new_array; int sz = base_count * sizeof(void *); base_count *= 2; new_array = malloc(sz * 2, M_TEMP, M_NOWAIT); if (new_array == NULL) { error = ENOMEM; break; } memcpy(new_array, match_array, sz); free(match_array, M_TEMP); match_array = new_array; } } NET_EPOCH_EXIT(et); NL_LOG(LOG_DEBUG2, "Matched %d interface(s), dumping", offset); for (int i = 0; error == 0 && i < offset; i++) { - if (!dump_iface(wa.nw, match_array[i], &wa.hdr)) + if (!dump_iface(wa.nw, match_array[i], &wa.hdr, 0)) error = ENOMEM; } for (int i = 0; i < offset; i++) if_rele(match_array[i]); free(match_array, M_TEMP); NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped); if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } /* * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[ * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, * {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"], * [ * {nla_len=16, nla_type=IFLA_LINKINFO}, * [ * {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"... * ] * ] */ static int rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct epoch_tracker et; struct ifnet *ifp; int error; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); NET_EPOCH_ENTER(et); ifp = ifnet_byindex_ref(attrs.ifi_index); NET_EPOCH_EXIT(et); if (ifp == NULL) { NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index); return (ENOENT); } NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp)); sx_xlock(&ifnet_detach_sxlock); error = if_clone_destroy(if_name(ifp)); sx_xunlock(&ifnet_detach_sxlock); NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error); if_rele(ifp); return (error); } static int rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct nl_cloner *cloner; int error; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); if (attrs.ifla_ifname == NULL || strlen(attrs.ifla_ifname) == 0) { /* Applications like ip(8) verify RTM_NEWLINK existance * by calling it with empty arguments. Always return "innocent" * error. */ NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute"); return (EPERM); } if (attrs.ifla_cloner == NULL || strlen(attrs.ifla_cloner) == 0) { NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute"); return (EINVAL); } sx_slock(&rtnl_cloner_lock); SLIST_FOREACH(cloner, &nl_cloners, next) { if (!strcmp(attrs.ifla_cloner, cloner->name)) { error = cloner->create_f(&attrs, nlp, npt); sx_sunlock(&rtnl_cloner_lock); return (error); } } sx_sunlock(&rtnl_cloner_lock); /* TODO: load cloner module if not exists & privilege permits */ NLMSG_REPORT_ERR_MSG(npt, "interface type %s not supported", attrs.ifla_cloner); return (ENOTSUP); return (error); } /* {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")}, [ {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")}, {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")}, {{nla_len=7, nla_type=IFA_LABEL}, "lo"}, {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}, {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]}, --- {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735}, {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")}, [ {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")}, {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}}, {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]}, */ static uint8_t ifa_get_scope(const struct ifaddr *ifa) { const struct sockaddr *sa; uint8_t addr_scope = RT_SCOPE_UNIVERSE; sa = ifa->ifa_addr; switch (sa->sa_family) { #ifdef INET case AF_INET: { struct in_addr addr; addr = ((const struct sockaddr_in *)sa)->sin_addr; if (IN_LOOPBACK(addr.s_addr)) addr_scope = RT_SCOPE_HOST; else if (IN_LINKLOCAL(addr.s_addr)) addr_scope = RT_SCOPE_LINK; break; } #endif #ifdef INET6 case AF_INET6: { const struct in6_addr *addr; addr = &((const struct sockaddr_in6 *)sa)->sin6_addr; if (IN6_IS_ADDR_LOOPBACK(addr)) addr_scope = RT_SCOPE_HOST; else if (IN6_IS_ADDR_LINKLOCAL(addr)) addr_scope = RT_SCOPE_LINK; break; } #endif } return (addr_scope); } static uint8_t inet6_get_plen(const struct in6_addr *addr) { return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); } static uint8_t get_sa_plen(const struct sockaddr *sa) { #ifdef INET const struct in_addr *paddr; #endif #ifdef INET6 const struct in6_addr *paddr6; #endif switch (sa->sa_family) { #ifdef INET case AF_INET: if (sa == NULL) return (32); paddr = &(((const struct sockaddr_in *)sa)->sin_addr); return bitcount32(paddr->s_addr);; #endif #ifdef INET6 case AF_INET6: if (sa == NULL) return (128); paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr); return inet6_get_plen(paddr6); #endif } return (0); } /* * {'attrs': [('IFA_ADDRESS', '12.0.0.1'), ('IFA_LOCAL', '12.0.0.1'), ('IFA_LABEL', 'eth10'), ('IFA_FLAGS', 128), ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})], */ static bool dump_iface_addr(struct nl_writer *nw, struct ifnet *ifp, struct ifaddr *ifa, const struct nlmsghdr *hdr) { struct ifaddrmsg *ifamsg; struct sockaddr *sa = ifa->ifa_addr; NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s", ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp)); if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg))) goto enomem; ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg); ifamsg->ifa_family = sa->sa_family; ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask); ifamsg->ifa_flags = 0; // ifa_flags is useless ifamsg->ifa_scope = ifa_get_scope(ifa); ifamsg->ifa_index = ifp->if_index; struct sockaddr *dst_sa = ifa->ifa_dstaddr; if ((dst_sa == NULL) || (dst_sa->sa_family != sa->sa_family)) dst_sa = sa; dump_sa(nw, IFA_ADDRESS, dst_sa); dump_sa(nw, IFA_LOCAL, sa); nlattr_add_string(nw, IFA_LABEL, if_name(ifp)); uint32_t val = 0; // ifa->ifa_flags; nlattr_add_u32(nw, IFA_FLAGS, val); if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s", rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp)); nlmsg_abort(nw); return (false); } static int rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct ifaddr *ifa; struct ifnet *ifp; int error = 0; struct netlink_walkargs wa = { .so = nlp, .nw = npt->nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, .hdr.nlmsg_type = NL_RTM_NEWADDR, }; NL_LOG(LOG_DEBUG2, "Start dump"); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (wa.family != 0 && wa.family != ifa->ifa_addr->sa_family) continue; if (ifa->ifa_addr->sa_family == AF_LINK) continue; wa.count++; if (!dump_iface_addr(wa.nw, ifp, ifa, &wa.hdr)) { error = ENOMEM; break; } wa.dumped++; } if (error != 0) break; } NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped); if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } static void rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd) { struct nlmsghdr hdr = {}; struct nl_writer nw = {}; uint32_t group = 0; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: group = RTNLGRP_IPV4_IFADDR; break; #endif #ifdef INET6 case AF_INET6: group = RTNLGRP_IPV6_IFADDR; break; #endif default: NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d", ifa->ifa_addr->sa_family); return; } if (!nl_has_listeners(NETLINK_ROUTE, group)) return; if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) { NL_LOG(LOG_DEBUG, "error allocating group writer"); return; } hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR; dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr); nlmsg_flush(&nw); } static void -rtnl_handle_ifattach(void *arg, struct ifnet *ifp) +rtnl_handle_ifevent(struct ifnet *ifp, int nlmsg_type, int if_flags_mask) { - struct nlmsghdr hdr = { .nlmsg_type = NL_RTM_NEWLINK }; + struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type }; struct nl_writer nw = {}; if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK)) return; if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) { NL_LOG(LOG_DEBUG, "error allocating mbuf"); return; } - dump_iface(&nw, ifp, &hdr); + dump_iface(&nw, ifp, &hdr, if_flags_mask); nlmsg_flush(&nw); } +static void +rtnl_handle_ifattach(void *arg, struct ifnet *ifp) +{ + NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); + rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0); +} + static void rtnl_handle_ifdetach(void *arg, struct ifnet *ifp) { - struct nlmsghdr hdr = { .nlmsg_type = NL_RTM_DELLINK }; - struct nl_writer nw = {}; + NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); + rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0); +} - if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK)) - return; +static void +rtnl_handle_iflink(void *arg, struct ifnet *ifp) +{ + NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); + rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0); +} - if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) { - NL_LOG(LOG_DEBUG, "error allocating mbuf"); - return; - } - dump_iface(&nw, ifp, &hdr); - nlmsg_flush(&nw); +void +rtnl_handle_ifnet_event(struct ifnet *ifp, int if_flags_mask) +{ + NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); + rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask); } static const struct rtnl_cmd_handler cmd_handlers[] = { { .cmd = NL_RTM_GETLINK, .name = "RTM_GETLINK", .cb = &rtnl_handle_getlink, .flags = RTNL_F_NOEPOCH, }, { .cmd = NL_RTM_DELLINK, .name = "RTM_DELLINK", .cb = &rtnl_handle_dellink, .priv = PRIV_NET_IFDESTROY, .flags = RTNL_F_NOEPOCH, }, { .cmd = NL_RTM_NEWLINK, .name = "RTM_NEWLINK", .cb = &rtnl_handle_newlink, .priv = PRIV_NET_IFCREATE, .flags = RTNL_F_NOEPOCH, }, { .cmd = NL_RTM_GETADDR, .name = "RTM_GETADDR", .cb = &rtnl_handle_getaddr, }, { .cmd = NL_RTM_NEWADDR, .name = "RTM_NEWADDR", .cb = &rtnl_handle_getaddr, }, { .cmd = NL_RTM_DELADDR, .name = "RTM_DELADDR", .cb = &rtnl_handle_getaddr, }, }; static const struct nlhdr_parser *all_parsers[] = { &ifmsg_parser }; void rtnl_iface_add_cloner(struct nl_cloner *cloner) { sx_xlock(&rtnl_cloner_lock); SLIST_INSERT_HEAD(&nl_cloners, cloner, next); sx_xunlock(&rtnl_cloner_lock); } void rtnl_iface_del_cloner(struct nl_cloner *cloner) { sx_xlock(&rtnl_cloner_lock); SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next); sx_xunlock(&rtnl_cloner_lock); } void rtnl_ifaces_init(void) { ifattach_event = EVENTHANDLER_REGISTER( ifnet_arrival_event, rtnl_handle_ifattach, NULL, EVENTHANDLER_PRI_ANY); ifdetach_event = EVENTHANDLER_REGISTER( ifnet_departure_event, rtnl_handle_ifdetach, NULL, EVENTHANDLER_PRI_ANY); ifaddr_event = EVENTHANDLER_REGISTER( rt_addrmsg, rtnl_handle_ifaddr, NULL, EVENTHANDLER_PRI_ANY); + iflink_event = EVENTHANDLER_REGISTER( + ifnet_link_event, rtnl_handle_iflink, NULL, + EVENTHANDLER_PRI_ANY); NL_VERIFY_PARSERS(all_parsers); rtnl_iface_drivers_register(); rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); } void rtnl_ifaces_destroy(void) { EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event); EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event); EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event); + EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event); } diff --git a/sys/netlink/route/interface.h b/sys/netlink/route/interface.h index cae763cc4a58..1b8f1cf7b53d 100644 --- a/sys/netlink/route/interface.h +++ b/sys/netlink/route/interface.h @@ -1,245 +1,248 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Interface-related (RTM_LINK) message header and attributes. */ #ifndef _NETLINK_ROUTE_INTERFACE_H_ #define _NETLINK_ROUTE_INTERFACE_H_ /* Base header for all of the relevant messages */ struct ifinfomsg { unsigned char ifi_family; /* not used */ unsigned char __ifi_pad; unsigned short ifi_type; /* ARPHRD_* */ int ifi_index; /* Inteface index */ unsigned ifi_flags; /* IFF_* flags */ unsigned ifi_change; /* IFF_* change mask */ }; +/* Linux-specific link-level state flag */ +#define IFF_LOWER_UP IFF_NETLINK_1 + #ifndef _KERNEL /* Compatilbility helpers */ #define _IFINFO_HDRLEN ((int)sizeof(struct ifinfomsg)) #define IFLA_RTA(_ifi) ((struct rtattr *)NL_ITEM_DATA(_ifi, _IFINFO_HDRLEN)) #define IFLA_PAYLOAD(_ifi) NLMSG_PAYLOAD(_ifi, _IFINFO_HDRLEN) #endif enum { IFLA_UNSPEC = 0, IFLA_ADDRESS = 1, /* binary: Link-level address (MAC) */ #define IFLA_ADDRESS IFLA_ADDRESS IFLA_BROADCAST = 2, /* binary: link-level broadcast address */ #define IFLA_BROADCAST IFLA_BROADCAST IFLA_IFNAME = 3, /* string: Interface name */ #define IFLA_IFNAME IFLA_IFNAME IFLA_MTU = 4, /* u32: Current interface L3 mtu */ #define IFLA_MTU IFLA_MTU IFLA_LINK = 5, /* u32: interface index */ #define IFLA_LINK IFLA_LINK IFLA_QDISC = 6, /* string: Queing policy (not supported) */ #define IFLA_QDISC IFLA_QDISC IFLA_STATS = 7, /* Interface counters */ #define IFLA_STATS IFLA_STATS IFLA_COST = 8, /* not supported */ #define IFLA_COST IFLA_COST IFLA_PRIORITY = 9, /* not supported */ #define IFLA_PRIORITY IFLA_PRIORITY IFLA_MASTER = 10, /* u32: parent interface ifindex */ #define IFLA_MASTER IFLA_MASTER IFLA_WIRELESS = 11, /* not supported */ #define IFLA_WIRELESS IFLA_WIRELESS IFLA_PROTINFO = 12, /* protocol-specific data */ #define IFLA_PROTINFO IFLA_PROTINFO IFLA_TXQLEN = 13, /* u32: transmit queue length */ #define IFLA_TXQLEN IFLA_TXQLEN IFLA_MAP = 14, /* not supported */ #define IFLA_MAP IFLA_MAP IFLA_WEIGHT = 15, /* not supported */ #define IFLA_WEIGHT IFLA_WEIGHT IFLA_OPERSTATE = 16, /* u8: ifOperStatus per RFC 2863 */ #define IFLA_OPERSTATE IFLA_OPERSTATE IFLA_LINKMODE = 17, /* u8: ifmedia (not supported) */ #define IFLA_LINKMODE IFLA_LINKMODE IFLA_LINKINFO = 18, /* nested: IFLA_INFO_ */ #define IFLA_LINKINFO IFLA_LINKINFO IFLA_NET_NS_PID = 19, /* u32: vnet id (not supported) */ #define IFLA_NET_NS_PID IFLA_NET_NS_PID IFLA_IFALIAS = 20, /* not supported */ #define IFLA_IFALIAS IFLA_IFALIAS IFLA_NUM_VF = 21, /* not supported */ #define IFLA_NUM_VF IFLA_NUM_VF IFLA_VFINFO_LIST= 22, /* not supported */ #define IFLA_VFINFO_LIST IFLA_VFINFO_LIST IFLA_STATS64 = 23, /* rtnl_link_stats64: iface stats */ #define IFLA_STATS64 IFLA_STATS64 IFLA_VF_PORTS, IFLA_PORT_SELF, IFLA_AF_SPEC, IFLA_GROUP, /* Group the device belongs to */ IFLA_NET_NS_FD, IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */ #define IFLA_PROMISCUITY IFLA_PROMISCUITY IFLA_NUM_TX_QUEUES, IFLA_NUM_RX_QUEUES, IFLA_CARRIER, IFLA_PHYS_PORT_ID, IFLA_CARRIER_CHANGES, IFLA_PHYS_SWITCH_ID, IFLA_LINK_NETNSID, IFLA_PHYS_PORT_NAME, IFLA_PROTO_DOWN, IFLA_GSO_MAX_SEGS, IFLA_GSO_MAX_SIZE, IFLA_PAD, IFLA_XDP, IFLA_EVENT, IFLA_NEW_NETNSID, IFLA_IF_NETNSID, IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */ IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, IFLA_NEW_IFINDEX, IFLA_MIN_MTU, IFLA_MAX_MTU, IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, IFLA_PROTO_DOWN_REASON, __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) /* * Attributes that can be used as filters: * IFLA_IFNAME, IFLA_GROUP, IFLA_ALT_IFNAME * Headers that can be used as filters: * ifi_index, ifi_type */ /* * IFLA_OPERSTATE. * The values below represent the possible * states of ifOperStatus defined by RFC 2863 */ enum { IF_OPER_UNKNOWN = 0, /* status can not be determined */ IF_OPER_NOTPRESENT = 1, /* some (hardware) component not present */ IF_OPER_DOWN = 2, /* down */ IF_OPER_LOWERLAYERDOWN = 3, /* some lower-level interface is down */ IF_OPER_TESTING = 4, /* in some test mode */ IF_OPER_DORMANT = 5, /* "up" but waiting for some condition (802.1X) */ IF_OPER_UP = 6, /* ready to pass packets */ }; /* IFLA_STATS */ struct rtnl_link_stats { uint32_t rx_packets; /* total RX packets (IFCOUNTER_IPACKETS) */ uint32_t tx_packets; /* total TX packets (IFCOUNTER_OPACKETS) */ uint32_t rx_bytes; /* total RX bytes (IFCOUNTER_IBYTES) */ uint32_t tx_bytes; /* total TX bytes (IFCOUNTER_OBYTES) */ uint32_t rx_errors; /* RX errors (IFCOUNTER_IERRORS) */ uint32_t tx_errors; /* RX errors (IFCOUNTER_OERRORS) */ uint32_t rx_dropped; /* RX drop (no space in ring/no bufs) (IFCOUNTER_IQDROPS) */ uint32_t tx_dropped; /* TX drop (IFCOUNTER_OQDROPS) */ uint32_t multicast; /* RX multicast packets (IFCOUNTER_IMCASTS) */ uint32_t collisions; /* not supported */ uint32_t rx_length_errors; /* not supported */ uint32_t rx_over_errors; /* not supported */ uint32_t rx_crc_errors; /* not supported */ uint32_t rx_frame_errors; /* not supported */ uint32_t rx_fifo_errors; /* not supported */ uint32_t rx_missed_errors; /* not supported */ uint32_t tx_aborted_errors; /* not supported */ uint32_t tx_carrier_errors; /* not supported */ uint32_t tx_fifo_errors; /* not supported */ uint32_t tx_heartbeat_errors; /* not supported */ uint32_t tx_window_errors; /* not supported */ uint32_t rx_compressed; /* not supported */ uint32_t tx_compressed; /* not supported */ uint32_t rx_nohandler; /* dropped due to no proto handler (IFCOUNTER_NOPROTO) */ }; /* IFLA_STATS64 */ struct rtnl_link_stats64 { uint64_t rx_packets; /* total RX packets (IFCOUNTER_IPACKETS) */ uint64_t tx_packets; /* total TX packets (IFCOUNTER_OPACKETS) */ uint64_t rx_bytes; /* total RX bytes (IFCOUNTER_IBYTES) */ uint64_t tx_bytes; /* total TX bytes (IFCOUNTER_OBYTES) */ uint64_t rx_errors; /* RX errors (IFCOUNTER_IERRORS) */ uint64_t tx_errors; /* RX errors (IFCOUNTER_OERRORS) */ uint64_t rx_dropped; /* RX drop (no space in ring/no bufs) (IFCOUNTER_IQDROPS) */ uint64_t tx_dropped; /* TX drop (IFCOUNTER_OQDROPS) */ uint64_t multicast; /* RX multicast packets (IFCOUNTER_IMCASTS) */ uint64_t collisions; /* not supported */ uint64_t rx_length_errors; /* not supported */ uint64_t rx_over_errors; /* not supported */ uint64_t rx_crc_errors; /* not supported */ uint64_t rx_frame_errors; /* not supported */ uint64_t rx_fifo_errors; /* not supported */ uint64_t rx_missed_errors; /* not supported */ uint64_t tx_aborted_errors; /* not supported */ uint64_t tx_carrier_errors; /* not supported */ uint64_t tx_fifo_errors; /* not supported */ uint64_t tx_heartbeat_errors; /* not supported */ uint64_t tx_window_errors; /* not supported */ uint64_t rx_compressed; /* not supported */ uint64_t tx_compressed; /* not supported */ uint64_t rx_nohandler; /* dropped due to no proto handler (IFCOUNTER_NOPROTO) */ }; /* IFLA_LINKINFO child nlattr types */ enum { IFLA_INFO_UNSPEC, IFLA_INFO_KIND = 1, /* string, link type ("vlan") */ IFLA_INFO_DATA = 2, /* Per-link-type custom data */ IFLA_INFO_XSTATS = 3, IFLA_INFO_SLAVE_KIND = 4, IFLA_INFO_SLAVE_DATA = 5, __IFLA_INFO_MAX, }; #define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) /* IFLA_INFO_DATA vlan attributes */ enum { IFLA_VLAN_UNSPEC, IFLA_VLAN_ID, IFLA_VLAN_FLAGS, IFLA_VLAN_EGRESS_QOS, IFLA_VLAN_INGRESS_QOS, IFLA_VLAN_PROTOCOL, __IFLA_VLAN_MAX, }; #define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) struct ifla_vlan_flags { uint32_t flags; uint32_t mask; }; #endif diff --git a/sys/netlink/route/route_var.h b/sys/netlink/route/route_var.h index 7a31a8c896a5..0bcfcc962020 100644 --- a/sys/netlink/route/route_var.h +++ b/sys/netlink/route/route_var.h @@ -1,101 +1,102 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This file contains definitions shared among NETLINK_ROUTE family */ #ifndef _NETLINK_ROUTE_ROUTE_VAR_H_ #define _NETLINK_ROUTE_ROUTE_VAR_H_ #include /* values for priv_check */ struct nlmsghdr; struct nlpcb; struct nl_pstate; typedef int rtnl_msg_cb_f(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt); struct rtnl_cmd_handler { int cmd; const char *name; rtnl_msg_cb_f *cb; int priv; int flags; }; #define RTNL_F_NOEPOCH 0x01 bool rtnl_register_messages(const struct rtnl_cmd_handler *handlers, int count); /* route.c */ struct rib_cmd_info; void rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc); void rtnl_routes_init(void); /* neigh.c */ void rtnl_neighs_init(void); void rtnl_neighs_destroy(void); /* iface.c */ struct nl_parsed_link { char *ifla_group; char *ifla_ifname; char *ifla_cloner; struct nlattr *ifla_idata; unsigned short ifi_type; int ifi_index; uint32_t ifla_mtu; }; typedef int rtnl_iface_create_f(struct nl_parsed_link *lattrs, struct nlpcb *nlp, struct nl_pstate *npt); typedef int rtnl_iface_modify_f(struct nl_parsed_link *lattrs, struct nlpcb *nlp, struct nl_pstate *npt); struct nl_cloner { const char *name; rtnl_iface_create_f *create_f; rtnl_iface_modify_f *modify_f; SLIST_ENTRY(nl_cloner) next; }; void rtnl_ifaces_init(void); void rtnl_ifaces_destroy(void); void rtnl_iface_add_cloner(struct nl_cloner *cloner); void rtnl_iface_del_cloner(struct nl_cloner *cloner); +void rtnl_handle_ifnet_event(struct ifnet *ifp, int if_change_mask); /* iface_drivers.c */ void rtnl_iface_drivers_register(void); /* nexthop.c */ void rtnl_nexthops_init(void); struct nhop_object *nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, int nh_flags, int *perror); #endif