Index: stable/6/sys/net/if.c =================================================================== --- stable/6/sys/net/if.c (revision 149441) +++ stable/6/sys/net/if.c (revision 149442) @@ -1,2255 +1,2269 @@ /*- * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_inet.h" #include "opt_mac.h" #include "opt_carp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) /*XXX*/ #include #include #ifdef INET6 #include #include #endif #endif #ifdef INET #include #endif #ifdef DEV_CARP #include #endif SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); void (*bstp_linkstate_p)(struct ifnet *ifp, int state); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_grow(void); static void if_init(void *); static void if_check(void *); static void if_qflush(struct ifaltq *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); static void if_slowtimo(void *); static void if_unroute(struct ifnet *, int flag, int fam); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int if_rtdel(struct radix_node *, void *); static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); static void if_start_deferred(void *context, int pending); static void do_link_state_change(void *, int); #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif int if_index = 0; struct ifindex_entry *ifindex_table = NULL; int ifqmaxlen = IFQ_MAXLEN; struct ifnethead ifnet; /* depend on static init XXX */ struct mtx ifnet_lock; static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; static int if_indexlim = 8; static struct knlist ifklist; static void filt_netdetach(struct knote *kn); static int filt_netdev(struct knote *kn, long hint); static struct filterops netdev_filtops = { 1, NULL, filt_netdetach, filt_netdev }; /* * System initialization */ SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL) SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL) MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); static d_open_t netopen; static d_close_t netclose; static d_ioctl_t netioctl; static d_kqfilter_t netkqfilter; static struct cdevsw net_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_open = netopen, .d_close = netclose, .d_ioctl = netioctl, .d_name = "net", .d_kqfilter = netkqfilter, }; static int netopen(struct cdev *dev, int flag, int mode, struct thread *td) { return (0); } static int netclose(struct cdev *dev, int flags, int fmt, struct thread *td) { return (0); } static int netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { struct ifnet *ifp; int error, idx; /* only support interface specific ioctls */ if (IOCGROUP(cmd) != 'i') return (EOPNOTSUPP); idx = minor(dev); if (idx == 0) { /* * special network device, not interface. */ if (cmd == SIOCGIFCONF) return (ifconf(cmd, data)); /* XXX remove cmd */ return (EOPNOTSUPP); } ifp = ifnet_byindex(idx); if (ifp == NULL) return (ENXIO); error = ifhwioctl(cmd, ifp, data, td); if (error == ENOIOCTL) error = EOPNOTSUPP; return (error); } static int netkqfilter(struct cdev *dev, struct knote *kn) { struct knlist *klist; struct ifnet *ifp; int idx; switch (kn->kn_filter) { case EVFILT_NETDEV: kn->kn_fop = &netdev_filtops; break; default: return (1); } idx = minor(dev); if (idx == 0) { klist = &ifklist; } else { ifp = ifnet_byindex(idx); if (ifp == NULL) return (1); klist = &ifp->if_klist; } kn->kn_hook = (caddr_t)klist; knlist_add(klist, kn, 0); return (0); } static void filt_netdetach(struct knote *kn) { struct knlist *klist = (struct knlist *)kn->kn_hook; knlist_remove(klist, kn, 0); } static int filt_netdev(struct knote *kn, long hint) { struct knlist *klist = (struct knlist *)kn->kn_hook; /* * Currently NOTE_EXIT is abused to indicate device detach. */ if (hint == NOTE_EXIT) { kn->kn_data = NOTE_LINKINV; kn->kn_flags |= (EV_EOF | EV_ONESHOT); knlist_remove_inevent(klist, kn); return (1); } if (hint != 0) kn->kn_data = hint; /* current status */ if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ /* ARGSUSED*/ static void if_init(void *dummy __unused) { IFNET_LOCK_INIT(); TAILQ_INIT(&ifnet); knlist_init(&ifklist, NULL, NULL, NULL, NULL); if_grow(); /* create initial table */ ifdev_byindex(0) = make_dev(&net_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "network"); if_clone_init(); } static void if_grow(void) { u_int n; struct ifindex_entry *e; if_indexlim <<= 1; n = if_indexlim * sizeof(*e); e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); if (ifindex_table != NULL) { memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2); free((caddr_t)ifindex_table, M_IFNET); } ifindex_table = e; } /* ARGSUSED*/ static void if_check(void *dummy __unused) { struct ifnet *ifp; int s; s = splimp(); IFNET_RLOCK(); /* could sleep on rare error; mostly okay XXX */ TAILQ_FOREACH(ifp, &ifnet, if_link) { if (ifp->if_snd.ifq_maxlen == 0) { if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n"); ifp->if_snd.ifq_maxlen = ifqmaxlen; } if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) { if_printf(ifp, "XXX: driver didn't initialize queue mtx\n"); mtx_init(&ifp->if_snd.ifq_mtx, "unknown", MTX_NETWORK_LOCK, MTX_DEF); } } IFNET_RUNLOCK(); splx(s); if_slowtimo(0); } /* * Allocate a struct ifnet and in index for an interface. */ struct ifnet* if_alloc(u_char type) { struct ifnet *ifp; ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO); /* * Try to find an empty slot below if_index. If we fail, take * the next slot. * * XXX: should be locked! */ for (ifp->if_index = 1; ifp->if_index <= if_index; ifp->if_index++) { if (ifnet_byindex(ifp->if_index) == NULL) break; } /* Catch if_index overflow. */ if (ifp->if_index < 1) { free(ifp, M_IFNET); return (NULL); } if (ifp->if_index > if_index) if_index = ifp->if_index; if (if_index >= if_indexlim) if_grow(); ifnet_byindex(ifp->if_index) = ifp; ifp->if_type = type; if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); if (ifp->if_l2com == NULL) { free(ifp, M_IFNET); return (NULL); } } IF_ADDR_LOCK_INIT(ifp); return (ifp); } void if_free(struct ifnet *ifp) { /* Do not add code to this function! Add it to if_free_type(). */ if_free_type(ifp, ifp->if_type); } void if_free_type(struct ifnet *ifp, u_char type) { if (ifp != ifnet_byindex(ifp->if_index)) { if_printf(ifp, "%s: value was not if_alloced, skipping\n", __func__); return; } IF_ADDR_LOCK_DESTROY(ifp); ifnet_byindex(ifp->if_index) = NULL; /* XXX: should be locked with if_findindex() */ while (if_index > 0 && ifnet_byindex(if_index) == NULL) if_index--; if (if_com_free[type] != NULL) if_com_free[type](ifp->if_l2com, type); free(ifp, M_IFNET); }; /* * Attach an interface to the * list of "active" interfaces. */ void if_attach(struct ifnet *ifp) { unsigned socksize, ifasize; int namelen, masklen; struct sockaddr_dl *sdl; struct ifaddr *ifa; if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index)) panic ("%s: BUG: if_attach called without if_alloc'd input()\n", ifp->if_xname); TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); IF_AFDATA_LOCK_INIT(ifp); ifp->if_afdata_initialized = 0; IFNET_WLOCK(); TAILQ_INSERT_TAIL(&ifnet, ifp, if_link); IFNET_WUNLOCK(); /* * XXX - * The old code would work if the interface passed a pre-existing * chain of ifaddrs to this code. We don't trust our callers to * properly initialize the tailq, however, so we no longer allow * this unlikely case. */ TAILQ_INIT(&ifp->if_addrhead); TAILQ_INIT(&ifp->if_prefixhead); TAILQ_INIT(&ifp->if_multiaddrs); knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL); getmicrotime(&ifp->if_lastchange); ifp->if_data.ifi_epoch = time_uptime; ifp->if_data.ifi_datalen = sizeof(struct if_data); #ifdef MAC mac_init_ifnet(ifp); mac_create_ifnet(ifp); #endif ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw, unit2minor(ifp->if_index), UID_ROOT, GID_WHEEL, 0600, "%s/%s", net_cdevsw.d_name, ifp->if_xname); make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d", net_cdevsw.d_name, ifp->if_index); mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); /* * create a Link Level name for this device */ namelen = strlen(ifp->if_xname); /* * Always save enough space for any possiable name so we can do * a rename in place later. */ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifp->if_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO); IFA_LOCK_INIT(ifa); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifaddr_byindex(ifp->if_index) = ifa; ifa->ifa_ifp = ifp; ifa->ifa_rtrequest = link_rtrequest; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; ifa->ifa_refcnt = 1; TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */ ifp->if_snd.altq_type = 0; ifp->if_snd.altq_disc = NULL; ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE; ifp->if_snd.altq_tbr = NULL; ifp->if_snd.altq_ifp = ifp; if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } static void if_attachdomain(void *dummy) { struct ifnet *ifp; int s; s = splnet(); TAILQ_FOREACH(ifp, &ifnet, if_link) if_attachdomain1(ifp); splx(s); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; int s; s = splnet(); /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ if (IF_AFDATA_TRYLOCK(ifp) == 0) { splx(s); return; } if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); splx(s); printf("if_attachdomain called more than once on %s\n", ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } splx(s); } /* * Remove any network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa, *next; TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family == AF_LINK) continue; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeaddr(ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IFAFREE(ifa); } } /* * Detach an interface, removing it from the * list of "active" interfaces and freeing the struct ifnet. */ void if_detach(struct ifnet *ifp) { struct ifaddr *ifa; struct radix_node_head *rnh; int s; int i; struct domain *dp; struct ifnet *iter; int found; /* * Remove/wait for pending events. */ taskqueue_drain(taskqueue_swi, &ifp->if_linktask); #ifdef DEV_CARP /* Maybe hook to the generalized departure handler above?!? */ if (ifp->if_carp) carp_ifdetach(ifp); #endif /* * Remove routes and flush queues. */ s = splnet(); if_down(ifp); #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif /* * Remove address from ifindex_table[] and maybe decrement if_index. * Clean up all addresses. */ ifaddr_byindex(ifp->if_index) = NULL; destroy_dev(ifdev_byindex(ifp->if_index)); ifdev_byindex(ifp->if_index) = NULL; /* We can now free link ifaddr. */ if (!TAILQ_EMPTY(&ifp->if_addrhead)) { ifa = TAILQ_FIRST(&ifp->if_addrhead); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IFAFREE(ifa); } /* * Delete all remaining routes using this interface * Unfortuneatly the only way to do this is to slog through * the entire routing table looking for routes which point * to this interface...oh well... */ for (i = 1; i <= AF_MAX; i++) { if ((rnh = rt_tables[i]) == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); RADIX_NODE_HEAD_UNLOCK(rnh); } /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); IF_AFDATA_LOCK(ifp); for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); } IF_AFDATA_UNLOCK(ifp); #ifdef MAC mac_destroy_ifnet(ifp); #endif /* MAC */ KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT); knlist_clear(&ifp->if_klist, 0); knlist_destroy(&ifp->if_klist); IFNET_WLOCK(); found = 0; TAILQ_FOREACH(iter, &ifnet, if_link) if (iter == ifp) { found = 1; break; } if (found) TAILQ_REMOVE(&ifnet, ifp, if_link); IFNET_WUNLOCK(); mtx_destroy(&ifp->if_snd.ifq_mtx); IF_AFDATA_DESTROY(ifp); splx(s); } /* * Delete Routes for a Network Interface * * Called for each routing entry via the rnh->rnh_walktree() call above * to delete all route entries referencing a detaching network interface. * * Arguments: * rn pointer to node in the routing table * arg argument passed to rnh->rnh_walktree() - detaching interface * * Returns: * 0 successful * errno failed - reason indicated * */ static int if_rtdel(struct radix_node *rn, void *arg) { struct rtentry *rt = (struct rtentry *)rn; struct ifnet *ifp = arg; int err; if (rt->rt_ifp == ifp) { /* * Protect (sorta) against walktree recursion problems * with cloned routes */ if ((rt->rt_flags & RTF_UP) == 0) return (0); err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, (struct rtentry **) NULL); if (err) { log(LOG_WARNING, "if_rtdel: error %d\n", err); } } return (0); } #define sa_equal(a1, a2) (bcmp((a1), (a2), ((a1))->sa_len) == 0) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithaddr(struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) goto done; /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) goto done; } ifa = NULL; done: IFNET_RUNLOCK(); return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr)) goto done; } } ifa = NULL; done: IFNET_RUNLOCK(); return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = (struct ifaddr *) 0; u_int af = addr->sa_family; char *addr_data = addr->sa_data, *cplim; /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; if (sdl->sdl_index && sdl->sdl_index <= if_index) return (ifaddr_byindex(sdl->sdl_index)); } /* * Scan though each interface, looking for ones that have * addresses in this address family. */ IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) { TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != 0 && sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { /* * if we have a special address handler, * then use it instead of the generic one. */ if (ifa->ifa_claim_addr) { if ((*ifa->ifa_claim_addr)(ifa, addr)) goto done; continue; } /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one) then remember the new one * before continuing to search * for an even better one. */ if (ifa_maybe == 0 || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) ifa_maybe = ifa; } } } ifa = ifa_maybe; done: IFNET_RUNLOCK(); return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = 0; u_int af = addr->sa_family; if (af >= AF_MAX) return (0); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == 0) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: return (ifa); } #include /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ static void link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { struct ifaddr *ifa, *oifa; struct sockaddr *dst; struct ifnet *ifp; RT_LOCK_ASSERT(rt); if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) || ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0)) return; ifa = ifaof_ifpforaddr(dst, ifp); if (ifa) { IFAREF(ifa); /* XXX */ oifa = rt->rt_ifa; rt->rt_ifa = ifa; IFAFREE(oifa); if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, info); } } /* * Mark an interface down and notify protocols of * the transition. * NOTE: must be called at splnet or eqivalent. */ static void if_unroute(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; + KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP")); + ifp->if_flags &= ~flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); if_qflush(&ifp->if_snd); #ifdef DEV_CARP if (ifp->if_carp) carp_carpdev_state(ifp->if_carp); #endif rt_ifmsg(ifp); } /* * Mark an interface up and notify protocols of * the transition. * NOTE: must be called at splnet or eqivalent. */ static void if_route(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; + KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP")); + ifp->if_flags |= flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFUP, ifa->ifa_addr); #ifdef DEV_CARP if (ifp->if_carp) carp_carpdev_state(ifp->if_carp); #endif rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); #endif } void (*vlan_link_state_p)(struct ifnet *, int); /* XXX: private from if_vlan */ /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp = (struct ifnet *)arg; int link_state = ifp->if_link_state; int link; /* Notify that the link state has changed. */ rt_ifmsg(ifp); if (link_state == LINK_STATE_UP) link = NOTE_LINKUP; else if (link_state == LINK_STATE_DOWN) link = NOTE_LINKDOWN; else link = NOTE_LINKINV; KNOTE_UNLOCKED(&ifp->if_klist, link); if (ifp->if_nvlans != 0) (*vlan_link_state_p)(ifp, link); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && IFP2AC(ifp)->ac_netgraph != NULL) (*ng_ether_link_state_p)(ifp, link_state); #ifdef DEV_CARP if (ifp->if_carp) carp_carpdev_state(ifp->if_carp); #endif if (ifp->if_bridge) { KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!")); (*bstp_linkstate_p)(ifp, link_state); } devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname, (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); } /* * Mark an interface down and notify protocols of * the transition. * NOTE: must be called at splnet or eqivalent. */ void if_down(struct ifnet *ifp) { if_unroute(ifp, IFF_UP, AF_UNSPEC); } /* * Mark an interface up and notify protocols of * the transition. * NOTE: must be called at splnet or eqivalent. */ void if_up(struct ifnet *ifp) { if_route(ifp, IFF_UP, AF_UNSPEC); } /* * Flush an interface queue. */ static void if_qflush(struct ifaltq *ifq) { struct mbuf *m, *n; IFQ_LOCK(ifq); #ifdef ALTQ if (ALTQ_IS_ENABLED(ifq)) ALTQ_PURGE(ifq); #endif n = ifq->ifq_head; while ((m = n) != 0) { n = m->m_act; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; IFQ_UNLOCK(ifq); } /* * Handle interface watchdog timer routines. Called * from softclock, we decrement timers (if set) and * call the appropriate interface routine on expiration. * * XXXRW: Note that because timeouts run with Giant, if_watchdog() is called * holding Giant. If we switch to an MPSAFE callout, we likely need to grab * Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface. */ static void if_slowtimo(void *arg) { struct ifnet *ifp; int s = splimp(); IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) { if (ifp->if_timer == 0 || --ifp->if_timer) continue; if (ifp->if_watchdog) (*ifp->if_watchdog)(ifp); } IFNET_RUNLOCK(); splx(s); timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ); } /* * Map interface name to * interface structure pointer. */ struct ifnet * ifunit(const char *name) { struct ifnet *ifp; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } IFNET_RUNLOCK(); return (ifp); } /* * Hardware specific interface ioctls. */ static int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; struct ifstat *ifs; int error = 0; - int new_flags; + int new_flags, temp_flags; size_t namelen, onamelen; char new_name[IFNAMSIZ]; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: - ifr->ifr_flags = ifp->if_flags & 0xffff; - ifr->ifr_flagshigh = ifp->if_flags >> 16; + temp_flags = ifp->if_flags | ifp->if_drv_flags; + ifr->ifr_flags = temp_flags & 0xffff; + ifr->ifr_flagshigh = temp_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; #ifdef MAC case SIOCGIFMAC: error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: ifr->ifr_phys = ifp->if_physical; break; case SIOCSIFFLAGS: error = suser(td); if (error) return (error); + /* + * Currently, no driver owned flags pass the IFF_CANTCHANGE + * check, so we don't need special handling here yet. + */ new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_SMART) { /* Smart drivers twiddle their own routes */ } else if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { int s = splimp(); if_down(ifp); splx(s); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { int s = splimp(); if_up(ifp); splx(s); } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (new_flags & IFF_PPROMISC) { /* Permanently promiscuous mode requested */ ifp->if_flags |= IFF_PROMISC; } else if (ifp->if_pcount == 0) { ifp->if_flags &= ~IFF_PROMISC; } if (ifp->if_ioctl != NULL) { IFF_LOCKGIANT(ifp); (void) (*ifp->if_ioctl)(ifp, cmd, data); IFF_UNLOCKGIANT(ifp); } getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = suser(td); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); IFF_LOCKGIANT(ifp); error = (*ifp->if_ioctl)(ifp, cmd, data); IFF_UNLOCKGIANT(ifp); if (error == 0) getmicrotime(&ifp->if_lastchange); break; #ifdef MAC case SIOCSIFMAC: error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = suser(td); if (error != 0) return (error); error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); if (error != 0) return (error); if (new_name[0] == '\0') return (EINVAL); if (ifunit(new_name) != NULL) return (EEXIST); /* Announce the departure of the interface. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); log(LOG_INFO, "%s: changing name to '%s'\n", ifp->if_xname, new_name); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifaddr_byindex(ifp->if_index); IFA_LOCK(ifa); sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IFA_UNLOCK(ifa); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); /* Announce the return of the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); break; case SIOCSIFMETRIC: error = suser(td); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = suser(td); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); IFF_LOCKGIANT(ifp); error = (*ifp->if_ioctl)(ifp, cmd, data); IFF_UNLOCKGIANT(ifp); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = suser(td); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); IFF_LOCKGIANT(ifp); error = (*ifp->if_ioctl)(ifp, cmd, data); IFF_UNLOCKGIANT(ifp); if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif } break; } case SIOCADDMULTI: case SIOCDELMULTI: error = suser(td); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct ifmultiaddr *ifma; error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSLIFPHYADDR: case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = suser(td); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); IFF_LOCKGIANT(ifp); error = (*ifp->if_ioctl)(ifp, cmd, data); IFF_UNLOCKGIANT(ifp); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: ifs = (struct ifstat *)data; ifs->ascii[0] = '\0'; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGLIFPHYADDR: case SIOCGIFMEDIA: case SIOCGIFGENERIC: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); IFF_LOCKGIANT(ifp); error = (*ifp->if_ioctl)(ifp, cmd, data); IFF_UNLOCKGIANT(ifp); break; case SIOCSIFLLADDR: error = suser(td); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; default: error = ENOIOCTL; break; } return (error); } /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; switch (cmd) { case SIOCGIFCONF: case OSIOCGIFCONF: return (ifconf(cmd, data)); } ifr = (struct ifreq *)data; switch (cmd) { case SIOCIFCREATE: case SIOCIFDESTROY: if ((error = suser(td)) != 0) return (error); return ((cmd == SIOCIFCREATE) ? if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) : if_clone_destroy(ifr->ifr_name)); case SIOCIFGCLONERS: return (if_clone_list((struct if_clonereq *)data)); } ifp = ifunit(ifr->ifr_name); if (ifp == 0) return (ENXIO); error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) return (error); oif_flags = ifp->if_flags; if (so->so_proto == 0) return (EOPNOTSUPP); #ifndef COMPAT_43 error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); #else { int ocmd = cmd; switch (cmd) { case SIOCSIFDSTADDR: case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFNETMASK: #if BYTE_ORDER != BIG_ENDIAN if (ifr->ifr_addr.sa_family == 0 && ifr->ifr_addr.sa_len < 16) { ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len; ifr->ifr_addr.sa_len = 16; } #else if (ifr->ifr_addr.sa_len == 0) ifr->ifr_addr.sa_len = 16; #endif break; case OSIOCGIFADDR: cmd = SIOCGIFADDR; break; case OSIOCGIFDSTADDR: cmd = SIOCGIFDSTADDR; break; case OSIOCGIFBRDADDR: cmd = SIOCGIFBRDADDR; break; case OSIOCGIFNETMASK: cmd = SIOCGIFNETMASK; } error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); switch (ocmd) { case OSIOCGIFADDR: case OSIOCGIFDSTADDR: case OSIOCGIFBRDADDR: case OSIOCGIFNETMASK: *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family; } } #endif /* COMPAT_43 */ if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 DELAY(100);/* XXX: temporary workaround for fxp issue*/ if (ifp->if_flags & IFF_UP) { int s = splimp(); in6_if_up(ifp); splx(s); } #endif } return (error); } /* - * The code common to hadling reference counted flags, + * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. + * + * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; + KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, + ("if_setflag: setting driver-ownded flag %d", flag)); + /* Sanity checks to catch programming errors */ if (onswitch) { if (*refcount < 0) { if_printf(ifp, "refusing to increment negative refcount %d " "for interface flag %d\n", *refcount, flag); return (EINVAL); } } else { if (*refcount <= 0) { if_printf(ifp, "refusing to decrement non-positive refcount %d" "for interface flag %d\n", *refcount, flag); return (EINVAL); } } /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ if (ifp->if_ioctl == NULL) { error = EOPNOTSUPP; goto recover; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; IFF_LOCKGIANT(ifp); error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); IFF_UNLOCKGIANT(ifp); if (error) goto recover; /* Notify userland that interface flags have changed */ rt_ifmsg(ifp); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC)) log(LOG_INFO, "%s: promiscuous mode %s\n", ifp->if_xname, (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */ max_len = MAXPHYS - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); /* could sleep XXX */ TAILQ_FOREACH(ifp, &ifnet, if_link) { int addrs; /* * Zero the ifr_name buffer to make sure we don't * disclose the contents of the stack. */ memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) return (ENAMETOOLONG); addrs = 0; TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (jailed(curthread->td_ucred) && prison_if(curthread->td_ucred, sa)) continue; addrs++; #ifdef COMPAT_43 if (cmd == OSIOCGIFCONF) { struct osockaddr *osa = (struct osockaddr *)&ifr.ifr_addr; ifr.ifr_addr = *sa; osa->sa_family = sa->sa_family; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else #endif if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (!sbuf_overflowed(sb)) valid_len = sbuf_len(sb); } if (addrs == 0) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (!sbuf_overflowed(sb)) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); } static struct ifmultiaddr * if_findmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa_equal(ifma->ifma_addr, sa)) break; } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { FREE(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } MALLOC(dupsa, struct sockaddr *, llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { FREE(ifma->ifma_addr, M_IFMADDR); FREE(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ static void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 1, ("if_freemulti: refcount %d", ifma->ifma_refcount)); KASSERT(ifma->ifma_protospec == NULL, ("if_freemulti: protospec not NULL")); if (ifma->ifma_lladdr != NULL) FREE(ifma->ifma_lladdr, M_IFMADDR); FREE(ifma->ifma_addr, M_IFMADDR); FREE(ifma, M_IFMADDR); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; int error; /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_LOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_UNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. If 'llsa' was * returned, we will need to free it later. */ llsa = NULL; ll_ifma = NULL; if (ifp->if_resolvemulti != NULL) { error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) goto unlock_out; } /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. * * XXXRW: How come we don't announce ll_ifma? */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_UNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if (ifp->if_ioctl != NULL) { IFF_LOCKGIANT(ifp); (void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); IFF_UNLOCKGIANT(ifp); } if (llsa != NULL) FREE(llsa, M_IFMADDR); return (0); free_llsa_out: if (llsa != NULL) FREE(llsa, M_IFMADDR); unlock_out: IF_ADDR_UNLOCK(ifp); return (error); } /* * Remove a reference to a multicast address on this interface. Yell * if the request does not match an existing membership. */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma, *ll_ifma; IF_ADDR_LOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma == NULL) { IF_ADDR_UNLOCK(ifp); return ENOENT; } if (ifma->ifma_refcount > 1) { ifma->ifma_refcount--; IF_ADDR_UNLOCK(ifp); return 0; } sa = ifma->ifma_lladdr; if (sa != NULL) ll_ifma = if_findmulti(ifp, sa); else ll_ifma = NULL; /* * XXXRW: How come we don't announce ll_ifma? */ rt_newmaddrmsg(RTM_DELMADDR, ifma); TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); if_freemulti(ifma); if (ll_ifma != NULL) { if (ll_ifma->ifma_refcount == 1) { TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link); if_freemulti(ll_ifma); } else ll_ifma->ifma_refcount--; } IF_ADDR_UNLOCK(ifp); /* * Make sure the interface driver is notified * in the case of a link layer mcast group being left. */ if (ifp->if_ioctl) { IFF_LOCKGIANT(ifp); (void) (*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); IFF_UNLOCKGIANT(ifp); } return 0; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. */ int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; ifa = ifaddr_byindex(ifp->if_index); if (ifa == NULL) return (EINVAL); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) return (EINVAL); if (len != sdl->sdl_alen) /* don't allow length to change */ return (EINVAL); switch (ifp->if_type) { case IFT_ETHER: /* these types use struct arpcom */ case IFT_FDDI: case IFT_XETHER: case IFT_ISO88025: case IFT_L2VLAN: case IFT_BRIDGE: bcopy(lladdr, IFP2ENADDR(ifp), len); /* * XXX We also need to store the lladdr in LLADDR(sdl), * which is done below. This is a pain because we must * remember to keep the info in sync. */ /* FALLTHROUGH */ case IFT_ARCNET: bcopy(lladdr, LLADDR(sdl), len); break; default: return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { if (ifp->if_ioctl) { IFF_LOCKGIANT(ifp); ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); IFF_UNLOCKGIANT(ifp); } #ifdef INET /* * Also send gratuitous ARPs to notify other nodes about * the address change. */ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr != NULL && ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); } #endif } return (0); } /* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a * static string works well. */ void if_initname(struct ifnet *ifp, const char *name, int unit) { ifp->if_dname = name; ifp->if_dunit = unit; if (unit != IF_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); else strlcpy(ifp->if_xname, name, IFNAMSIZ); } int if_printf(struct ifnet *ifp, const char * fmt, ...) { va_list ap; int retval; retval = printf("%s: ", ifp->if_xname); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } /* * When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot * be called without Giant. However, we often can't acquire the Giant lock * at those points; instead, we run it via a task queue that holds Giant via * if_start_deferred. * * XXXRW: We need to make sure that the ifnet isn't fully detached until any * outstanding if_start_deferred() tasks that will run after the free. This * probably means waiting in if_detach(). */ void if_start(struct ifnet *ifp) { NET_ASSERT_GIANT(); if ((ifp->if_flags & IFF_NEEDSGIANT) != 0 && debug_mpsafenet != 0) { if (mtx_owned(&Giant)) (*(ifp)->if_start)(ifp); else taskqueue_enqueue(taskqueue_swi_giant, &ifp->if_starttask); } else (*(ifp)->if_start)(ifp); } static void if_start_deferred(void *context, int pending) { struct ifnet *ifp; /* * This code must be entered with Giant, and should never run if * we're not running with debug.mpsafenet. */ KASSERT(debug_mpsafenet != 0, ("if_start_deferred: debug.mpsafenet")); GIANT_REQUIRED; ifp = context; (ifp->if_start)(ifp); } int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) { int active = 0; IF_LOCK(ifq); if (_IF_QFULL(ifq)) { _IF_DROP(ifq); IF_UNLOCK(ifq); m_freem(m); return (0); } if (ifp != NULL) { ifp->if_obytes += m->m_pkthdr.len + adjust; if (m->m_flags & (M_BCAST|M_MCAST)) ifp->if_omcasts++; - active = ifp->if_flags & IFF_OACTIVE; + active = ifp->if_drv_flags & IFF_DRV_OACTIVE; } _IF_ENQUEUE(ifq, m); IF_UNLOCK(ifq); if (ifp != NULL && !active) if_start(ifp); return (1); } void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f) { KASSERT(if_com_alloc[type] == NULL, ("if_register_com_alloc: %d already registered", type)); KASSERT(if_com_free[type] == NULL, ("if_register_com_alloc: %d free already registered", type)); if_com_alloc[type] = a; if_com_free[type] = f; } void if_deregister_com_alloc(u_char type) { KASSERT(if_com_alloc[type] == NULL, ("if_deregister_com_alloc: %d not registered", type)); KASSERT(if_com_free[type] == NULL, ("if_deregister_com_alloc: %d free not registered", type)); if_com_alloc[type] = NULL; if_com_free[type] = NULL; } Index: stable/6/sys/net/if.h =================================================================== --- stable/6/sys/net/if.h (revision 149441) +++ stable/6/sys/net/if.h (revision 149442) @@ -1,363 +1,388 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_H_ #define _NET_IF_H_ #include #ifdef _KERNEL #include #endif #if __BSD_VISIBLE /* * does not depend on on most other systems. This * helps userland compatibility. (struct timeval ifi_lastchange) */ #ifndef _KERNEL #include #endif struct ifnet; #endif /* * Length of interface external name, including terminating '\0'. * Note: this is the same size as a generic device's external name. */ #define IF_NAMESIZE 16 #if __BSD_VISIBLE #define IFNAMSIZ IF_NAMESIZE #define IF_MAXUNIT 0x7fff /* historical value */ #endif #if __BSD_VISIBLE /* * Structure used to query names of interface cloners. */ struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; /* * Structure describing information about an interface * which may be of interest to management entities. */ struct if_data { /* generic interface information */ u_char ifi_type; /* ethernet, tokenring, etc */ u_char ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ u_char ifi_addrlen; /* media address length */ u_char ifi_hdrlen; /* media header length */ u_char ifi_link_state; /* current link state */ u_char ifi_recvquota; /* polling quota for receive intrs */ u_char ifi_xmitquota; /* polling quota for xmit intrs */ u_char ifi_datalen; /* length of this data struct */ u_long ifi_mtu; /* maximum transmission unit */ u_long ifi_metric; /* routing metric (external only) */ u_long ifi_baudrate; /* linespeed */ /* volatile statistics */ u_long ifi_ipackets; /* packets received on interface */ u_long ifi_ierrors; /* input errors on interface */ u_long ifi_opackets; /* packets sent on interface */ u_long ifi_oerrors; /* output errors on interface */ u_long ifi_collisions; /* collisions on csma interfaces */ u_long ifi_ibytes; /* total number of octets received */ u_long ifi_obytes; /* total number of octets sent */ u_long ifi_imcasts; /* packets received via multicast */ u_long ifi_omcasts; /* packets sent via multicast */ u_long ifi_iqdrops; /* dropped on input, this interface */ u_long ifi_noproto; /* destined for unsupported protocol */ u_long ifi_hwassist; /* HW offload capabilities */ time_t ifi_epoch; /* uptime at attach or stat reset */ #ifdef __alpha__ u_int ifi_timepad; /* time_t is int, not long on alpha */ #endif struct timeval ifi_lastchange; /* time of last administrative change */ }; +/* + * Interface flags are of two types: network stack owned flags, and driver + * owned flags. Historically, these values were stored in the same ifnet + * flags field, but with the advent of fine-grained locking, they have been + * broken out such that the network stack is responsible for synchronizing + * the stack-owned fields, and the device driver the device-owned fields. + * Both halves can perform lockless reads of the other half's field, subject + * to accepting the involved races. + * + * Both sets of flags come from the same number space, and should not be + * permitted to conflict, as they are exposed to user space via a single + * field. + * + * For historical reasons, the old flag names for driver flags are exposed to + * user space. + */ #define IFF_UP 0x1 /* interface is up */ #define IFF_BROADCAST 0x2 /* broadcast address valid */ #define IFF_DEBUG 0x4 /* turn on debugging */ #define IFF_LOOPBACK 0x8 /* is a loopback net */ #define IFF_POINTOPOINT 0x10 /* interface is point-to-point link */ #define IFF_SMART 0x20 /* interface manages own routes */ -#define IFF_RUNNING 0x40 /* resources allocated */ +#define IFF_DRV_RUNNING 0x40 /* resources allocated */ #define IFF_NOARP 0x80 /* no address resolution protocol */ #define IFF_PROMISC 0x100 /* receive all packets */ #define IFF_ALLMULTI 0x200 /* receive all multicast packets */ -#define IFF_OACTIVE 0x400 /* tx hardware queue is full */ +#define IFF_DRV_OACTIVE 0x400 /* tx hardware queue is full */ #define IFF_SIMPLEX 0x800 /* can't hear own transmissions */ #define IFF_LINK0 0x1000 /* per link layer defined bit */ #define IFF_LINK1 0x2000 /* per link layer defined bit */ #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */ #define IFF_MULTICAST 0x8000 /* supports multicast */ #define IFF_POLLING 0x10000 /* Interface is in polling mode. */ #define IFF_PPROMISC 0x20000 /* user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* static ARP */ #define IFF_NEEDSGIANT 0x100000 /* hold Giant over if_start calls */ +/* + * Old names for driver flags so that user space tools can continue to use + * the old names. + */ +#ifndef _KERNEL +#define IFF_RUNNING IFF_DRV_RUNNING +#define IFF_OACTIVE IFF_DRV_OACTIVE +#endif + /* flags set internally only: */ #define IFF_CANTCHANGE \ - (IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\ + (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_SMART|IFF_PROMISC|\ IFF_POLLING) /* * Values for if_link_state. */ #define LINK_STATE_UNKNOWN 0 /* link invalid/unknown */ #define LINK_STATE_DOWN 1 /* link is down */ #define LINK_STATE_UP 2 /* link is up */ /* * Some convenience macros used for setting ifi_baudrate. * XXX 1000 vs. 1024? --thorpej@netbsd.org */ #define IF_Kbps(x) ((x) * 1000) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ /* Capabilities that interfaces can advertise. */ #define IFCAP_RXCSUM 0x0001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x0002 /* can offload checksum on TX */ #define IFCAP_NETCONS 0x0004 /* can be a network console */ #define IFCAP_VLAN_MTU 0x0008 /* VLAN-compatible MTU */ #define IFCAP_VLAN_HWTAGGING 0x0010 /* hardware VLAN tag support */ #define IFCAP_JUMBO_MTU 0x0020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x0040 /* driver supports polling */ #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Message format for use in obtaining information about interfaces * from getkerninfo and the routing socket */ struct if_msghdr { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from getkerninfo and the routing socket */ struct ifa_msghdr { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ int ifam_metric; /* value of ifa_metric */ }; /* * Message format for use in obtaining information about multicast addresses * from the routing socket */ struct ifma_msghdr { u_short ifmam_msglen; /* to skip over non-understood messages */ u_char ifmam_version; /* future binary compatibility */ u_char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ int ifmam_flags; /* value of ifa_flags */ u_short ifmam_index; /* index for associated ifp */ }; /* * Message format announcing the arrival or departure of a network interface. */ struct if_announcemsghdr { u_short ifan_msglen; /* to skip over non-understood messages */ u_char ifan_version; /* future binary compatibility */ u_char ifan_type; /* message type */ u_short ifan_index; /* index for associated ifp */ char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_short ifan_what; /* what type of announcement */ }; #define IFAN_ARRIVAL 0 /* interface arrival */ #define IFAN_DEPARTURE 1 /* interface departure */ /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; short ifru_flags[2]; short ifru_index; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; caddr_t ifru_data; int ifru_cap[2]; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ #define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ #define ifr_media ifr_ifru.ifru_media /* physical media */ #define ifr_data ifr_ifru.ifru_data /* for use by interface */ #define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \ (sizeof(struct ifreq) - sizeof(struct sockaddr) + \ (ifr).ifr_addr.sa_len) : sizeof(struct ifreq)) struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; }; struct ifmediareq { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* current media options */ int ifm_mask; /* don't care mask */ int ifm_status; /* media status */ int ifm_active; /* active options */ int ifm_count; /* # entries in ifm_ulist array */ int *ifm_ulist; /* media words */ }; struct ifdrv { char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; size_t ifd_len; void *ifd_data; }; /* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting * needed by ifconfig(8): each line starts with a TAB and ends with * a newline. The canonical example to copy and paste is in if_tun.c. */ #define IFSTATMAX 800 /* 10 lines of text */ struct ifstat { char ifs_name[IFNAMSIZ]; /* if name, e.g. "en0" */ char ascii[IFSTATMAX + 1]; }; /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ struct ifconf { int ifc_len; /* size of associated buffer */ union { caddr_t ifcu_buf; struct ifreq *ifcu_req; } ifc_ifcu; #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; /* * Structure for SIOC[AGD]LIFADDR */ struct if_laddrreq { char iflr_name[IFNAMSIZ]; u_int flags; #define IFLR_PREFIX 0x8000 /* in: prefix given out: kernel fills id */ u_int prefixlen; /* in/out */ struct sockaddr_storage addr; /* in/out */ struct sockaddr_storage dstaddr; /* out */ }; #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); #endif #endif #ifndef _KERNEL struct if_nameindex { unsigned int if_index; /* 1, 2, ... */ char *if_name; /* null terminated name: "le0", ... */ }; __BEGIN_DECLS void if_freenameindex(struct if_nameindex *); char *if_indextoname(unsigned int, char *); struct if_nameindex *if_nameindex(void); unsigned int if_nametoindex(const char *); __END_DECLS #endif #ifdef _KERNEL struct thread; /* XXX - this should go away soon. */ #include #endif #endif /* !_NET_IF_H_ */ Index: stable/6/sys/net/if_var.h =================================================================== --- stable/6/sys/net/if_var.h (revision 149441) +++ stable/6/sys/net/if_var.h (revision 149442) @@ -1,668 +1,672 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, rt) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ #ifdef __STDC__ /* * Forward structure declarations for function prototypes [sic]. */ struct mbuf; struct thread; struct rtentry; struct rt_addrinfo; struct socket; struct ether_header; struct carp_if; #endif #include /* get TAILQ macros */ #ifdef _KERNEL #include #include #endif /* _KERNEL */ #include /* XXX */ #include /* XXX */ #include /* XXX */ #include #define IF_DUNIT_NONE -1 #include TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ TAILQ_HEAD(ifprefixhead, ifprefix); TAILQ_HEAD(ifmultihead, ifmultiaddr); /* * Structure defining a queue for a network interface. */ struct ifqueue { struct mbuf *ifq_head; struct mbuf *ifq_tail; int ifq_len; int ifq_maxlen; int ifq_drops; struct mtx ifq_mtx; }; /* * Structure defining a network interface. * * (Would like to call this struct ``if'', but C isn't PL/1.) */ struct ifnet { void *if_softc; /* pointer to driver state */ void *if_l2com; /* pointer to protocol bits */ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ char if_xname[IFNAMSIZ]; /* external name (name + unit) */ const char *if_dname; /* driver name */ int if_dunit; /* unit or IF_DUNIT_NONE */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ /* * if_addrhead is the list of all addresses associated to * an interface. * Some code in the kernel assumes that first element * of the list has type AF_LINK, and contains sockaddr_dl * addresses which store the link-level address and the name * of the interface. * However, access to the AF_LINK address through this * field is deprecated. Use ifaddr_byindex() instead. */ struct knlist if_klist; /* events attached to this if */ int if_pcount; /* number of promiscuous listeners */ struct carp_if *if_carp; /* carp interface structure */ struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ short if_timer; /* time 'til if_watchdog called */ u_short if_nvlans; /* number of active vlans */ int if_flags; /* up/down, broadcast, etc. */ int if_capabilities; /* interface capabilities */ int if_capenable; /* enabled features */ void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ struct if_data if_data; struct ifmultihead if_multiaddrs; /* multicast addresses configured */ int if_amcount; /* number of all-multicast requests */ /* procedure handles */ int (*if_output) /* output routine (enqueue) */ (struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); void (*if_input) /* input routine (from h/w driver) */ (struct ifnet *, struct mbuf *); void (*if_start) /* initiate output routine */ (struct ifnet *); int (*if_ioctl) /* ioctl routine */ (struct ifnet *, u_long, caddr_t); void (*if_watchdog) /* timer routine */ (struct ifnet *); void (*if_init) /* Init routine */ (void *); int (*if_resolvemulti) /* validate/resolve multicast */ (struct ifnet *, struct sockaddr **, struct sockaddr *); void *if_spare1; /* spare pointer 1 */ void *if_spare2; /* spare pointer 2 */ void *if_spare3; /* spare pointer 3 */ int if_drv_flags; /* driver-managed status flags */ u_int if_spare_flags2; /* spare flags 2 */ struct ifaltq if_snd; /* output queue (includes altq) */ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */ void *if_bridge; /* bridge glue */ struct lltable *lltables; /* list of L3-L2 resolution tables */ struct label *if_label; /* interface MAC label */ /* these are only used by IPv6 */ struct ifprefixhead if_prefixhead; /* list of prefixes per if */ void *if_afdata[AF_MAX]; int if_afdata_initialized; struct mtx if_afdata_mtx; struct task if_starttask; /* task for IFF_NEEDSGIANT */ struct task if_linktask; /* task for link change events */ struct mtx if_addr_mtx; /* mutex to protect address lists */ }; typedef void if_init_f_t(void *); /* * XXX These aliases are terribly dangerous because they could apply * to anything. */ #define if_mtu if_data.ifi_mtu #define if_type if_data.ifi_type #define if_physical if_data.ifi_physical #define if_addrlen if_data.ifi_addrlen #define if_hdrlen if_data.ifi_hdrlen #define if_metric if_data.ifi_metric #define if_link_state if_data.ifi_link_state #define if_baudrate if_data.ifi_baudrate #define if_hwassist if_data.ifi_hwassist #define if_ipackets if_data.ifi_ipackets #define if_ierrors if_data.ifi_ierrors #define if_opackets if_data.ifi_opackets #define if_oerrors if_data.ifi_oerrors #define if_collisions if_data.ifi_collisions #define if_ibytes if_data.ifi_ibytes #define if_obytes if_data.ifi_obytes #define if_imcasts if_data.ifi_imcasts #define if_omcasts if_data.ifi_omcasts #define if_iqdrops if_data.ifi_iqdrops #define if_noproto if_data.ifi_noproto #define if_lastchange if_data.ifi_lastchange #define if_recvquota if_data.ifi_recvquota #define if_xmitquota if_data.ifi_xmitquota #define if_rawoutput(if, m, sa) if_output(if, m, sa, (struct rtentry *)NULL) /* for compatibility with other BSDs */ #define if_addrlist if_addrhead #define if_list if_link /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \ "if_addr_mtx", NULL, MTX_DEF) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx) #define IF_ADDR_LOCK(if) mtx_lock(&(if)->if_addr_mtx) #define IF_ADDR_UNLOCK(if) mtx_unlock(&(if)->if_addr_mtx) #define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED) /* * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq) * are queues of messages stored on ifqueue structures * (defined above). Entries are added to and deleted from these structures * by these macros, which should be called with ipl raised to splimp(). */ #define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx) #define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx) #define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED) #define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen) #define _IF_DROP(ifq) ((ifq)->ifq_drops++) #define _IF_QLEN(ifq) ((ifq)->ifq_len) #define _IF_ENQUEUE(ifq, m) do { \ (m)->m_nextpkt = NULL; \ if ((ifq)->ifq_tail == NULL) \ (ifq)->ifq_head = m; \ else \ (ifq)->ifq_tail->m_nextpkt = m; \ (ifq)->ifq_tail = m; \ (ifq)->ifq_len++; \ } while (0) #define IF_ENQUEUE(ifq, m) do { \ IF_LOCK(ifq); \ _IF_ENQUEUE(ifq, m); \ IF_UNLOCK(ifq); \ } while (0) #define _IF_PREPEND(ifq, m) do { \ (m)->m_nextpkt = (ifq)->ifq_head; \ if ((ifq)->ifq_tail == NULL) \ (ifq)->ifq_tail = (m); \ (ifq)->ifq_head = (m); \ (ifq)->ifq_len++; \ } while (0) #define IF_PREPEND(ifq, m) do { \ IF_LOCK(ifq); \ _IF_PREPEND(ifq, m); \ IF_UNLOCK(ifq); \ } while (0) #define _IF_DEQUEUE(ifq, m) do { \ (m) = (ifq)->ifq_head; \ if (m) { \ if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \ (ifq)->ifq_tail = NULL; \ (m)->m_nextpkt = NULL; \ (ifq)->ifq_len--; \ } \ } while (0) #define IF_DEQUEUE(ifq, m) do { \ IF_LOCK(ifq); \ _IF_DEQUEUE(ifq, m); \ IF_UNLOCK(ifq); \ } while (0) #define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head) #define IF_POLL(ifq, m) _IF_POLL(ifq, m) #define _IF_DRAIN(ifq) do { \ struct mbuf *m; \ for (;;) { \ _IF_DEQUEUE(ifq, m); \ if (m == NULL) \ break; \ m_freem(m); \ } \ } while (0) #define IF_DRAIN(ifq) do { \ IF_LOCK(ifq); \ _IF_DRAIN(ifq); \ IF_UNLOCK(ifq); \ } while(0) #ifdef _KERNEL /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); #define IF_AFDATA_LOCK_INIT(ifp) \ mtx_init(&(ifp)->if_afdata_mtx, "if_afdata", NULL, MTX_DEF) #define IF_AFDATA_LOCK(ifp) mtx_lock(&(ifp)->if_afdata_mtx) #define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_mtx) #define IF_AFDATA_UNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_mtx) #define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_mtx) #define IFF_LOCKGIANT(ifp) do { \ if ((ifp)->if_flags & IFF_NEEDSGIANT) \ mtx_lock(&Giant); \ } while (0) #define IFF_UNLOCKGIANT(ifp) do { \ if ((ifp)->if_flags & IFF_NEEDSGIANT) \ mtx_unlock(&Giant); \ } while (0) int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust); #define IF_HANDOFF(ifq, m, ifp) \ if_handoff((struct ifqueue *)ifq, m, ifp, 0) #define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \ if_handoff((struct ifqueue *)ifq, m, ifp, adj) void if_start(struct ifnet *); #define IFQ_ENQUEUE(ifq, m, err) \ do { \ IF_LOCK(ifq); \ if (ALTQ_IS_ENABLED(ifq)) \ ALTQ_ENQUEUE(ifq, m, NULL, err); \ else { \ if (_IF_QFULL(ifq)) { \ m_freem(m); \ (err) = ENOBUFS; \ } else { \ _IF_ENQUEUE(ifq, m); \ (err) = 0; \ } \ } \ if (err) \ (ifq)->ifq_drops++; \ IF_UNLOCK(ifq); \ } while (0) #define IFQ_DEQUEUE_NOLOCK(ifq, m) \ do { \ if (TBR_IS_ENABLED(ifq)) \ (m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \ else if (ALTQ_IS_ENABLED(ifq)) \ ALTQ_DEQUEUE(ifq, m); \ else \ _IF_DEQUEUE(ifq, m); \ } while (0) #define IFQ_DEQUEUE(ifq, m) \ do { \ IF_LOCK(ifq); \ IFQ_DEQUEUE_NOLOCK(ifq, m); \ IF_UNLOCK(ifq); \ } while (0) #define IFQ_POLL_NOLOCK(ifq, m) \ do { \ if (TBR_IS_ENABLED(ifq)) \ (m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \ else if (ALTQ_IS_ENABLED(ifq)) \ ALTQ_POLL(ifq, m); \ else \ _IF_POLL(ifq, m); \ } while (0) #define IFQ_POLL(ifq, m) \ do { \ IF_LOCK(ifq); \ IFQ_POLL_NOLOCK(ifq, m); \ IF_UNLOCK(ifq); \ } while (0) #define IFQ_PURGE_NOLOCK(ifq) \ do { \ if (ALTQ_IS_ENABLED(ifq)) { \ ALTQ_PURGE(ifq); \ } else \ _IF_DRAIN(ifq); \ } while (0) #define IFQ_PURGE(ifq) \ do { \ IF_LOCK(ifq); \ IFQ_PURGE_NOLOCK(ifq); \ IF_UNLOCK(ifq); \ } while (0) #define IFQ_SET_READY(ifq) \ do { ((ifq)->altq_flags |= ALTQF_READY); } while (0) #define IFQ_LOCK(ifq) IF_LOCK(ifq) #define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq) #define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq) #define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0) #define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++) #define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len) #define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++) #define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len)) +/* + * The IFF_DRV_OACTIVE test should really occur in the device driver, not in + * the handoff logic, as that flag is locked by the device driver. + */ #define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \ do { \ int len; \ short mflags; \ \ len = (m)->m_pkthdr.len; \ mflags = (m)->m_flags; \ IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \ if ((err) == 0) { \ (ifp)->if_obytes += len + (adj); \ if (mflags & M_MCAST) \ (ifp)->if_omcasts++; \ - if (((ifp)->if_flags & IFF_OACTIVE) == 0) \ + if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \ if_start(ifp); \ } \ } while (0) #define IFQ_HANDOFF(ifp, m, err) \ IFQ_HANDOFF_ADJ(ifp, m, 0, err) #define IFQ_DRV_DEQUEUE(ifq, m) \ do { \ (m) = (ifq)->ifq_drv_head; \ if (m) { \ if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \ (ifq)->ifq_drv_tail = NULL; \ (m)->m_nextpkt = NULL; \ (ifq)->ifq_drv_len--; \ } else { \ IFQ_LOCK(ifq); \ IFQ_DEQUEUE_NOLOCK(ifq, m); \ while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \ struct mbuf *m0; \ IFQ_DEQUEUE_NOLOCK(ifq, m0); \ if (m0 == NULL) \ break; \ m0->m_nextpkt = NULL; \ if ((ifq)->ifq_drv_tail == NULL) \ (ifq)->ifq_drv_head = m0; \ else \ (ifq)->ifq_drv_tail->m_nextpkt = m0; \ (ifq)->ifq_drv_tail = m0; \ (ifq)->ifq_drv_len++; \ } \ IFQ_UNLOCK(ifq); \ } \ } while (0) #define IFQ_DRV_PREPEND(ifq, m) \ do { \ (m)->m_nextpkt = (ifq)->ifq_drv_head; \ if ((ifq)->ifq_drv_tail == NULL) \ (ifq)->ifq_drv_tail = (m); \ (ifq)->ifq_drv_head = (m); \ (ifq)->ifq_drv_len++; \ } while (0) #define IFQ_DRV_IS_EMPTY(ifq) \ (((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0)) #define IFQ_DRV_PURGE(ifq) \ do { \ struct mbuf *m, *n = (ifq)->ifq_drv_head; \ while((m = n) != NULL) { \ n = m->m_nextpkt; \ m_freem(m); \ } \ (ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \ (ifq)->ifq_drv_len = 0; \ IFQ_PURGE(ifq); \ } while (0) /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #endif /* _KERNEL */ /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ struct if_data if_data; /* not all members are meaningful */ struct ifnet *ifa_ifp; /* back-pointer to interface */ TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ (int, struct rtentry *, struct rt_addrinfo *); u_short ifa_flags; /* mostly rt_flags for cloning */ u_int ifa_refcnt; /* references to this structure */ int ifa_metric; /* cost of going out this interface */ int (*ifa_claim_addr) /* check if an addr goes to this if */ (struct ifaddr *, struct sockaddr *); struct mtx ifa_mtx; }; #define IFA_ROUTE RTF_UP /* route installed */ /* for compatibility with other BSDs */ #define ifa_list ifa_link #define IFA_LOCK_INIT(ifa) \ mtx_init(&(ifa)->ifa_mtx, "ifaddr", NULL, MTX_DEF) #define IFA_LOCK(ifa) mtx_lock(&(ifa)->ifa_mtx) #define IFA_UNLOCK(ifa) mtx_unlock(&(ifa)->ifa_mtx) #define IFA_DESTROY(ifa) mtx_destroy(&(ifa)->ifa_mtx) /* * The prefix structure contains information about one prefix * of an interface. They are maintained by the different address families, * are allocated and attached when a prefix or an address is set, * and are linked together so all prefixes for an interface can be located. */ struct ifprefix { struct sockaddr *ifpr_prefix; /* prefix of interface */ struct ifnet *ifpr_ifp; /* back-pointer to interface */ TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */ u_char ifpr_plen; /* prefix length in bits */ u_char ifpr_type; /* protocol dependent prefix type */ }; /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. * Also, the reference count here is a count of requests for this * address, not a count of pointers to this structure. */ struct ifmultiaddr { TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ struct ifnet *ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ void *ifma_protospec; /* protocol-specific state, if any */ }; #ifdef _KERNEL #define IFAFREE(ifa) \ do { \ IFA_LOCK(ifa); \ KASSERT((ifa)->ifa_refcnt > 0, \ ("ifa %p !(ifa_refcnt > 0)", ifa)); \ if (--(ifa)->ifa_refcnt == 0) { \ IFA_DESTROY(ifa); \ free(ifa, M_IFADDR); \ } else \ IFA_UNLOCK(ifa); \ } while (0) #define IFAREF(ifa) \ do { \ IFA_LOCK(ifa); \ ++(ifa)->ifa_refcnt; \ IFA_UNLOCK(ifa); \ } while (0) extern struct mtx ifnet_lock; #define IFNET_LOCK_INIT() \ mtx_init(&ifnet_lock, "ifnet", NULL, MTX_DEF | MTX_RECURSE) #define IFNET_WLOCK() mtx_lock(&ifnet_lock) #define IFNET_WUNLOCK() mtx_unlock(&ifnet_lock) #define IFNET_RLOCK() IFNET_WLOCK() #define IFNET_RUNLOCK() IFNET_WUNLOCK() struct ifindex_entry { struct ifnet *ife_ifnet; struct ifaddr *ife_ifnet_addr; struct cdev *ife_dev; }; #define ifnet_byindex(idx) ifindex_table[(idx)].ife_ifnet /* * Given the index, ifaddr_byindex() returns the one and only * link-level ifaddr for the interface. You are not supposed to use * it to traverse the list of addresses associated to the interface. */ #define ifaddr_byindex(idx) ifindex_table[(idx)].ife_ifnet_addr #define ifdev_byindex(idx) ifindex_table[(idx)].ife_dev extern struct ifnethead ifnet; extern struct ifindex_entry *ifindex_table; extern int ifqmaxlen; extern struct ifnet *loif; /* first loopback interface */ extern int if_index; int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(struct ifnet *, int); struct ifnet* if_alloc(u_char); void if_attach(struct ifnet *); int if_delmulti(struct ifnet *, struct sockaddr *); void if_detach(struct ifnet *); void if_purgeaddrs(struct ifnet *); void if_down(struct ifnet *); void if_free(struct ifnet *); void if_free_type(struct ifnet *, u_char); void if_initname(struct ifnet *, const char *, int); void if_link_state_change(struct ifnet *, int); int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3); int if_setlladdr(struct ifnet *, const u_char *, int); void if_up(struct ifnet *); /*void ifinit(void);*/ /* declared in systm.h for main() */ int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(struct ifnet *, int); struct ifnet *ifunit(const char *); struct ifaddr *ifa_ifwithaddr(struct sockaddr *); struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *); struct ifaddr *ifa_ifwithnet(struct sockaddr *); struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *); struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); #define IF_LLADDR(ifp) \ LLADDR((struct sockaddr_dl *) ifaddr_byindex((ifp)->if_index)->ifa_addr) #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS, POLL_DEREGISTER }; typedef void poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count); int ether_poll_register(poll_handler_t *h, struct ifnet *ifp); int ether_poll_deregister(struct ifnet *ifp); #endif /* DEVICE_POLLING */ #endif /* _KERNEL */ #endif /* !_NET_IF_VAR_H_ */ Index: stable/6/sys/net/rtsock.c =================================================================== --- stable/6/sys/net/rtsock.c (revision 149441) +++ stable/6/sys/net/rtsock.c (revision 149442) @@ -1,1272 +1,1273 @@ /*- * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ static struct sockaddr route_dst = { 2, PF_ROUTE, }; static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; static struct { int ip_count; /* attached w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ int ipx_count; /* attached w/ AF_IPX */ int any_count; /* total attached */ } route_cb; struct mtx rtsock_mtx; MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) static struct ifqueue rtsintrq; SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); SYSCTL_INT(_net_route, OID_AUTO, netisr_maxqlen, CTLFLAG_RW, &rtsintrq.ifq_maxlen, 0, "maximum routing socket dispatch queue length"); struct walkarg { int w_tmemsize; int w_op, w_arg; caddr_t w_tmem; struct sysctl_req *w_req; }; static void rts_input(struct mbuf *m); static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo); static int rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w); static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo); static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int sysctl_ifmalist(int af, struct walkarg *w); static int route_output(struct mbuf *m, struct socket *so); static void rt_setmetrics(u_long which, const struct rt_metrics *in, struct rt_metrics_lite *out); static void rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out); static void rt_dispatch(struct mbuf *, const struct sockaddr *); static void rts_init(void) { int tmp; rtsintrq.ifq_maxlen = 256; if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) rtsintrq.ifq_maxlen = tmp; mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF); netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, NETISR_MPSAFE); } SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0) static void rts_input(struct mbuf *m) { struct sockproto route_proto; unsigned short *family; struct m_tag *tag; route_proto.sp_family = PF_ROUTE; tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL); if (tag != NULL) { family = (unsigned short *)(tag + 1); route_proto.sp_protocol = *family; m_tag_delete(m, tag); } else route_proto.sp_protocol = 0; raw_input(m, &route_proto, &route_src, &route_dst); } /* * It really doesn't make any sense at all for this code to share much * with raw_usrreq.c, since its functionality is so restricted. XXX */ static int rts_abort(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_abort(so); splx(s); return error; } /* pru_accept is EOPNOTSUPP */ static int rts_attach(struct socket *so, int proto, struct thread *td) { struct rawcb *rp; int s, error; if (sotorawcb(so) != NULL) return EISCONN; /* XXX panic? */ /* XXX */ MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO); if (rp == NULL) return ENOBUFS; /* * The splnet() is necessary to block protocols from sending * error notifications (like RTM_REDIRECT or RTM_LOSING) while * this PCB is extant but incompletely initialized. * Probably we should try to do more of this work beforehand and * eliminate the spl. */ s = splnet(); so->so_pcb = (caddr_t)rp; error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { splx(s); so->so_pcb = NULL; free(rp, M_PCB); return error; } RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count++; break; case AF_INET6: route_cb.ip6_count++; break; case AF_IPX: route_cb.ipx_count++; break; } rp->rcb_faddr = &route_src; route_cb.any_count++; RTSOCK_UNLOCK(); soisconnected(so); so->so_options |= SO_USELOOPBACK; splx(s); return 0; } static int rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { int s, error; s = splnet(); error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */ splx(s); return error; } static int rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int s, error; s = splnet(); error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */ splx(s); return error; } /* pru_connect2 is EOPNOTSUPP */ /* pru_control is EOPNOTSUPP */ static int rts_detach(struct socket *so) { struct rawcb *rp = sotorawcb(so); int s, error; s = splnet(); if (rp != NULL) { RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count--; break; case AF_INET6: route_cb.ip6_count--; break; case AF_IPX: route_cb.ipx_count--; break; } route_cb.any_count--; RTSOCK_UNLOCK(); } error = raw_usrreqs.pru_detach(so); splx(s); return error; } static int rts_disconnect(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_disconnect(so); splx(s); return error; } /* pru_listen is EOPNOTSUPP */ static int rts_peeraddr(struct socket *so, struct sockaddr **nam) { int s, error; s = splnet(); error = raw_usrreqs.pru_peeraddr(so, nam); splx(s); return error; } /* pru_rcvd is EOPNOTSUPP */ /* pru_rcvoob is EOPNOTSUPP */ static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { int s, error; s = splnet(); error = raw_usrreqs.pru_send(so, flags, m, nam, control, td); splx(s); return error; } /* pru_sense is null */ static int rts_shutdown(struct socket *so) { int s, error; s = splnet(); error = raw_usrreqs.pru_shutdown(so); splx(s); return error; } static int rts_sockaddr(struct socket *so, struct sockaddr **nam) { int s, error; s = splnet(); error = raw_usrreqs.pru_sockaddr(so, nam); splx(s); return error; } static struct pr_usrreqs route_usrreqs = { .pru_abort = rts_abort, .pru_attach = rts_attach, .pru_bind = rts_bind, .pru_connect = rts_connect, .pru_detach = rts_detach, .pru_disconnect = rts_disconnect, .pru_peeraddr = rts_peeraddr, .pru_send = rts_send, .pru_shutdown = rts_shutdown, .pru_sockaddr = rts_sockaddr, }; /*ARGSUSED*/ static int route_output(struct mbuf *m, struct socket *so) { #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct radix_node_head *rnh; struct rt_addrinfo info; int len, error = 0; struct ifnet *ifp = NULL; struct ifaddr *ifa = NULL; struct sockaddr_in jail; #define senderr(e) { error = e; goto flush;} if (m == NULL || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == NULL)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { info.rti_info[RTAX_DST] = NULL; senderr(EINVAL); } R_Malloc(rtm, struct rt_msghdr *, len); if (rtm == NULL) { info.rti_info[RTAX_DST] = NULL; senderr(ENOBUFS); } m_copydata(m, 0, len, (caddr_t)rtm); if (rtm->rtm_version != RTM_VERSION) { info.rti_info[RTAX_DST] = NULL; senderr(EPROTONOSUPPORT); } rtm->rtm_pid = curproc->p_pid; bzero(&info, sizeof(info)); info.rti_addrs = rtm->rtm_addrs; if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { info.rti_info[RTAX_DST] = NULL; senderr(EINVAL); } info.rti_flags = rtm->rtm_flags; if (info.rti_info[RTAX_DST] == NULL || info.rti_info[RTAX_DST]->sa_family >= AF_MAX || (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) senderr(EINVAL); if (info.rti_info[RTAX_GENMASK]) { struct radix_node *t; t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1); if (t != NULL && bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1, (char *)(void *)t->rn_key + 1, ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0) info.rti_info[RTAX_GENMASK] = (struct sockaddr *)t->rn_key; else senderr(ENOBUFS); } /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0) senderr(error); switch (rtm->rtm_type) { struct rtentry *saved_nrt; case RTM_ADD: if (info.rti_info[RTAX_GATEWAY] == NULL) senderr(EINVAL); saved_nrt = NULL; error = rtrequest1(RTM_ADD, &info, &saved_nrt); if (error == 0 && saved_nrt) { RT_LOCK(saved_nrt); rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &saved_nrt->rt_rmx); RT_REMREF(saved_nrt); saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK]; RT_UNLOCK(saved_nrt); } break; case RTM_DELETE: saved_nrt = NULL; error = rtrequest1(RTM_DELETE, &info, &saved_nrt); if (error == 0) { RT_LOCK(saved_nrt); rt = saved_nrt; goto report; } break; case RTM_GET: case RTM_CHANGE: case RTM_LOCK: rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]; if (rnh == NULL) senderr(EAFNOSUPPORT); RADIX_NODE_HEAD_LOCK(rnh); rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST], info.rti_info[RTAX_NETMASK], rnh); if (rt == NULL) { /* XXX looks bogus */ RADIX_NODE_HEAD_UNLOCK(rnh); senderr(ESRCH); } RT_LOCK(rt); RT_ADDREF(rt); RADIX_NODE_HEAD_UNLOCK(rnh); switch(rtm->rtm_type) { case RTM_GET: report: RT_LOCK_ASSERT(rt); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = rt->rt_genmask; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { info.rti_info[RTAX_IFP] = ifaddr_byindex(ifp->if_index)->ifa_addr; if (jailed(so->so_cred)) { bzero(&jail, sizeof(jail)); jail.sin_family = PF_INET; jail.sin_len = sizeof(jail); jail.sin_addr.s_addr = htonl(prison_getip(so->so_cred)); info.rti_info[RTAX_IFA] = (struct sockaddr *)&jail; } else info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; rtm->rtm_index = ifp->if_index; } else { info.rti_info[RTAX_IFP] = NULL; info.rti_info[RTAX_IFA] = NULL; } } else if ((ifp = rt->rt_ifp) != NULL) { rtm->rtm_index = ifp->if_index; } len = rt_msg2(rtm->rtm_type, &info, NULL, NULL); if (len > rtm->rtm_msglen) { struct rt_msghdr *new_rtm; R_Malloc(new_rtm, struct rt_msghdr *, len); if (new_rtm == NULL) { RT_UNLOCK(rt); senderr(ENOBUFS); } bcopy(rtm, new_rtm, rtm->rtm_msglen); Free(rtm); rtm = new_rtm; } (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL); rtm->rtm_flags = rt->rt_flags; rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); rtm->rtm_addrs = info.rti_addrs; break; case RTM_CHANGE: /* * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ if (((rt->rt_flags & RTF_GATEWAY) && info.rti_info[RTAX_GATEWAY] != NULL) || info.rti_info[RTAX_IFP] != NULL || (info.rti_info[RTAX_IFA] != NULL && !sa_equal(info.rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { if ((error = rt_getifa(&info)) != 0) { RT_UNLOCK(rt); senderr(error); } } if (info.rti_info[RTAX_GATEWAY] != NULL && (error = rt_setgate(rt, rt_key(rt), info.rti_info[RTAX_GATEWAY])) != 0) { RT_UNLOCK(rt); senderr(error); } if ((ifa = info.rti_ifa) != NULL) { struct ifaddr *oifa = rt->rt_ifa; if (oifa != ifa) { if (oifa) { if (oifa->ifa_rtrequest) oifa->ifa_rtrequest( RTM_DELETE, rt, &info); IFAFREE(oifa); } IFAREF(ifa); rt->rt_ifa = ifa; rt->rt_ifp = info.rti_ifp; } } rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); if (info.rti_info[RTAX_GENMASK]) rt->rt_genmask = info.rti_info[RTAX_GENMASK]; /* FALLTHROUGH */ case RTM_LOCK: /* We don't support locks anymore */ break; } RT_UNLOCK(rt); break; default: senderr(EOPNOTSUPP); } flush: if (rtm) { if (error) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; } if (rt) /* XXX can this be true? */ RTFREE(rt); { struct rawcb *rp = NULL; /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (route_cb.any_count <= 1) { if (rtm) Free(rtm); m_freem(m); return (error); } /* There is another listener, so construct message */ rp = sotorawcb(so); } if (rtm) { m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); Free(rtm); } if (m) { if (rp) { /* * XXX insure we don't get a copy by * invalidating our protocol */ unsigned short family = rp->rcb_proto.sp_family; rp->rcb_proto.sp_family = 0; rt_dispatch(m, info.rti_info[RTAX_DST]); rp->rcb_proto.sp_family = family; } else rt_dispatch(m, info.rti_info[RTAX_DST]); } } return (error); #undef sa_equal } static void rt_setmetrics(u_long which, const struct rt_metrics *in, struct rt_metrics_lite *out) { #define metric(f, e) if (which & (f)) out->e = in->e; /* * Only these are stored in the routing entry since introduction * of tcp hostcache. The rest is ignored. */ metric(RTV_MTU, rmx_mtu); metric(RTV_EXPIRE, rmx_expire); #undef metric } static void rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out) { #define metric(e) out->e = in->e; bzero(out, sizeof(*out)); metric(rmx_mtu); metric(rmx_expire); #undef metric } /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) { struct sockaddr *sa; int i; for (i = 0; i < RTAX_MAX && cp < cplim; i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if (cp + sa->sa_len > cplim) return (EINVAL); /* * there are no more.. quit now * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ if (sa->sa_len == 0) { rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } /* accept it */ rtinfo->rti_info[i] = sa; cp += SA_SIZE(sa); } return (0); } static struct mbuf * rt_msg1(int type, struct rt_addrinfo *rtinfo) { struct rt_msghdr *rtm; struct mbuf *m; int i; struct sockaddr *sa; int len, dlen; switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; case RTM_IFANNOUNCE: case RTM_IEEE80211: len = sizeof(struct if_announcemsghdr); break; default: len = sizeof(struct rt_msghdr); } if (len > MCLBYTES) panic("rt_msg1"); m = m_gethdr(M_DONTWAIT, MT_DATA); if (m && len > MHLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m = NULL; } } if (m == NULL) return (m); m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = NULL; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } if (m->m_pkthdr.len != len) { m_freem(m); return (NULL); } rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } static int rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) { int i; int len, dlen, second_time = 0; caddr_t cp0; rtinfo->rti_addrs = 0; again: switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; default: len = sizeof(struct rt_msghdr); } cp0 = cp; if (cp0) cp += len; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); if (cp) { bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; } len += dlen; } len = ALIGN(len); if (cp == NULL && w != NULL && !second_time) { struct walkarg *rw = w; if (rw->w_req) { if (rw->w_tmemsize < len) { if (rw->w_tmem) free(rw->w_tmem, M_RTABLE); rw->w_tmem = (caddr_t) malloc(len, M_RTABLE, M_NOWAIT); if (rw->w_tmem) rw->w_tmemsize = len; } if (rw->w_tmem) { cp = rw->w_tmem; second_time = 1; goto again; } } } if (cp) { struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; } return (len); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occured, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) { struct rt_msghdr *rtm; struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (route_cb.any_count == 0) return; m = rt_msg1(type, rtinfo); if (m == NULL) return; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; rt_dispatch(m, sa); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rt_ifmsg(struct ifnet *ifp) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rt_msg1(RTM_IFINFO, &info); if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; - ifm->ifm_flags = ifp->if_flags; + ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = 0; rt_dispatch(m, NULL); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. * if we ever reverse the logic and replace messages TO the routing * socket indicate a request to configure interfaces, then it will * be unnecessary as the routing socket will automatically generate * copies of it. */ void rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) { struct rt_addrinfo info; struct sockaddr *sa = NULL; int pass; struct mbuf *m = NULL; struct ifnet *ifp = ifa->ifa_ifp; KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %u", cmd)); if (route_cb.any_count == 0) return; for (pass = 1; pass < 3; pass++) { bzero((caddr_t)&info, sizeof(info)); if ((cmd == RTM_ADD && pass == 1) || (cmd == RTM_DELETE && pass == 2)) { struct ifa_msghdr *ifam; int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; info.rti_info[RTAX_IFP] = ifaddr_byindex(ifp->if_index)->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; if ((m = rt_msg1(ncmd, &info)) == NULL) continue; ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = info.rti_addrs; } if ((cmd == RTM_ADD && pass == 2) || (cmd == RTM_DELETE && pass == 1)) { struct rt_msghdr *rtm; if (rt == NULL) continue; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_DST] = sa = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; if ((m = rt_msg1(cmd, &info)) == NULL) continue; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_index = ifp->if_index; rtm->rtm_flags |= rt->rt_flags; rtm->rtm_errno = error; rtm->rtm_addrs = info.rti_addrs; } rt_dispatch(m, sa); } } /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) { struct rt_addrinfo info; struct mbuf *m = NULL; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_IFP] = ifp ? ifaddr_byindex(ifp->if_index)->ifa_addr : NULL; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; m = rt_msg1(cmd, &info); if (m == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = info.rti_addrs; rt_dispatch(m, ifma->ifma_addr); } static struct mbuf * rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, struct rt_addrinfo *info) { struct if_announcemsghdr *ifan; struct mbuf *m; if (route_cb.any_count == 0) return NULL; bzero((caddr_t)info, sizeof(*info)); m = rt_msg1(type, info); if (m != NULL) { ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); ifan->ifan_what = what; } return m; } /* * This is called to generate routing socket messages indicating * IEEE80211 wireless events. * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. */ void rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); if (m != NULL) { /* * Append the ieee80211 data. Try to stick it in the * mbuf containing the ifannounce msg; otherwise allocate * a new mbuf and append. * * NB: we assume m is a single mbuf. */ if (data_len > M_TRAILINGSPACE(m)) { struct mbuf *n = m_get(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } bcopy(data, mtod(n, void *), data_len); n->m_len = data_len; m->m_next = n; } else if (data_len > 0) { bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); m->m_len += data_len; } if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += data_len; mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; rt_dispatch(m, NULL); } } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ void rt_ifannouncemsg(struct ifnet *ifp, int what) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info); if (m != NULL) rt_dispatch(m, NULL); } static void rt_dispatch(struct mbuf *m, const struct sockaddr *sa) { struct m_tag *tag; /* * Preserve the family from the sockaddr, if any, in an m_tag for * use when injecting the mbuf into the routing socket buffer from * the netisr. */ if (sa != NULL) { tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short), M_NOWAIT); if (tag == NULL) { m_freem(m); return; } *(unsigned short *)(tag + 1) = sa->sa_family; m_tag_prepend(m, tag); } netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ } /* * This is used in dumping the kernel table via sysctl(). */ static int sysctl_dumpentry(struct radix_node *rn, void *vw) { struct walkarg *w = vw; struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = rt->rt_genmask; if (rt->rt_ifp) { info.rti_info[RTAX_IFP] = ifaddr_byindex(rt->rt_ifp->if_index)->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; } size = rt_msg2(RTM_GET, &info, NULL, w); if (w->w_req && w->w_tmem) { struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; rtm->rtm_flags = rt->rt_flags; rtm->rtm_use = rt->rt_rmx.rmx_pksent; rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); return (error); } return (error); } static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; bzero((caddr_t)&info, sizeof(info)); /* IFNET_RLOCK(); */ /* could sleep XXX */ TAILQ_FOREACH(ifp, &ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = ifaddr_byindex(ifp->if_index); info.rti_info[RTAX_IFP] = ifa->ifa_addr; len = rt_msg2(RTM_IFINFO, &info, NULL, w); info.rti_info[RTAX_IFP] = NULL; if (w->w_req && w->w_tmem) { struct if_msghdr *ifm; ifm = (struct if_msghdr *)w->w_tmem; ifm->ifm_index = ifp->if_index; - ifm->ifm_flags = ifp->if_flags; + ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len); if (error) goto done; } while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) { if (af && af != ifa->ifa_addr->sa_family) continue; if (jailed(curthread->td_ucred) && prison_if(curthread->td_ucred, ifa->ifa_addr)) continue; info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; len = rt_msg2(RTM_NEWADDR, &info, NULL, w); if (w->w_req && w->w_tmem) { struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) goto done; } } info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = info.rti_info[RTAX_BRD] = NULL; } done: /* IFNET_RUNLOCK(); */ /* XXX */ return (error); } int sysctl_ifmalist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifmultiaddr *ifma; struct rt_addrinfo info; int len, error = 0; struct ifaddr *ifa; bzero((caddr_t)&info, sizeof(info)); /* IFNET_RLOCK(); */ /* could sleep XXX */ TAILQ_FOREACH(ifp, &ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = ifaddr_byindex(ifp->if_index); info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (af && af != ifma->ifma_addr->sa_family) continue; if (jailed(curproc->p_ucred) && prison_if(curproc->p_ucred, ifma->ifma_addr)) continue; info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_GATEWAY] = (ifma->ifma_addr->sa_family != AF_LINK) ? ifma->ifma_lladdr : NULL; len = rt_msg2(RTM_NEWMADDR, &info, NULL, w); if (w->w_req && w->w_tmem) { struct ifma_msghdr *ifmam; ifmam = (struct ifma_msghdr *)w->w_tmem; ifmam->ifmam_index = ifma->ifma_ifp->if_index; ifmam->ifmam_flags = 0; ifmam->ifmam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) goto done; } } } done: /* IFNET_RUNLOCK(); */ /* XXX */ return (error); } static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct radix_node_head *rnh; int i, lim, s, error = EINVAL; u_char af; struct walkarg w; name ++; namelen--; if (req->newptr) return (EPERM); if (namelen != 3) return ((namelen < 3) ? EISDIR : ENOTDIR); af = name[0]; if (af > AF_MAX) return (EINVAL); bzero(&w, sizeof(w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; s = splnet(); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: if (af == 0) { /* dump all tables */ i = 1; lim = AF_MAX; } else /* dump only one table */ i = lim = af; for (error = 0; error == 0 && i <= lim; i++) if ((rnh = rt_tables[i]) != NULL) { /* RADIX_NODE_HEAD_LOCK(rnh); */ error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w);/* could sleep XXX */ /* RADIX_NODE_HEAD_UNLOCK(rnh); */ } else if (af != 0) error = EAFNOSUPPORT; break; case NET_RT_IFLIST: error = sysctl_iflist(af, &w); break; case NET_RT_IFMALIST: error = sysctl_ifmalist(af, &w); break; } splx(s); if (w.w_tmem) free(w.w_tmem, M_RTABLE); return (error); } SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. */ extern struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR, 0, route_output, raw_ctlinput, 0, 0, raw_init, 0, 0, 0, &route_usrreqs } }; static struct domain routedomain = { PF_ROUTE, "route", 0, 0, 0, routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] }; DOMAIN_SET(route);